hankcs · callzhang · Dec 16, 2020 · Dec 17, 2020 · Dec 16, 2020 · Dec 16, 2020
diff --git a/.gitignore b/.gitignore
@@ -285,9 +285,10 @@ fabric.properties
 .idea
 *.iml
 data
+.vscode/settings.json
 .vscode
 *.pkl
 *.pdf
 _static/
 _build/
-_templates/
+_templates/
diff --git a/hanlp/common/component.py b/hanlp/common/component.py
@@ -4,7 +4,9 @@
 import inspect
 from abc import ABC, abstractmethod
 from typing import Any
-
+import numpy as np
+import tensorflow as tf
+from tensorflow.keras.mixed_precision import experimental as mixed_precision
 from hanlp_common.configurable import Configurable
 
 

diff --git a/hanlp/common/vocab.py b/hanlp/common/vocab.py
@@ -327,8 +327,8 @@ def safe_pad_token(self) -> str:
         """
         if self.pad_token:
             return self.pad_token
-        if self.first_token:
-            return self.first_token
+        # if self.first_token:
+        #     return self.first_token
         return PAD
 
     @property
@@ -345,8 +345,8 @@ def safe_unk_token(self) -> str:
         """
         if self.unk_token:
             return self.unk_token
-        if self.first_token:
-            return self.first_token
+        # if self.first_token:
+        #     return self.first_token
         return UNK
 
     def __repr__(self) -> str:

diff --git a/hanlp/layers/transformers/loader_tf.py b/hanlp/layers/transformers/loader_tf.py
@@ -12,6 +12,10 @@
 from hanlp.layers.transformers.tf_imports import zh_albert_models_google, bert_models_google
 from hanlp.utils.io_util import get_resource, stdout_redirected, hanlp_home
 
+gpu_devices = tf.config.experimental.list_physical_devices('GPU')
+for gpu in gpu_devices:
+    tf.config.experimental.set_memory_growth(gpu, True)
+
 
 def build_transformer(transformer, max_seq_length, num_labels, tagging=True, tokenizer_only=False):
     spm_model_file = None

diff --git a/hanlp/transform/table.py b/hanlp/transform/table.py
@@ -28,10 +28,9 @@ def file_to_inputs(self, filepath: str, gold=True):
         y_column = self.config.y_column
         num_features = self.config.get('num_features', None)
         for cells in read_cells(filepath, skip_header=self.config.skip_header, delimiter=self.config.delimiter):
-            #multi-label: Dataset in .tsv format: x_columns: at most 2 columns being a sentence pair while in most
-            # cases just one column being the doc content. y_column being the single label, which shall be modified
-            # to load a list of labels.
-            if x_columns:
+            if type(x_columns) is int: 
+                inputs = [cells[x_columns]], cells[y_column]
+            elif type(x_columns) is list:
                 inputs = tuple(c for i, c in enumerate(cells) if i in x_columns), cells[y_column]
             else:
                 if y_column != -1:
@@ -60,7 +59,7 @@ def inputs_to_samples(self, inputs, gold=False):
             if gold:
                 yield cells
             else:
-                yield cells, pad
+                yield cells, [pad] if self.config.multi_label else pad
 
     def y_to_idx(self, y) -> tf.Tensor:
         return self.label_vocab.lookup(y)