From a8444811e890c9ddc5c44ede6c9e1bd3e783aab3 Mon Sep 17 00:00:00 2001
From: Derek Zen <leizhang0121@gmail.com>
Date: Wed, 16 Dec 2020 20:43:25 +0800
Subject: [PATCH 01/14] implemented multi-label support

---
 .gitignore                                    |  3 +-
 hanlp/common/transform.py                     |  2 +-
 hanlp/common/vocab.py                         |  5 +-
 .../classifiers/transformer_classifier.py     | 47 +++++++++++++++----
 hanlp/transform/table.py                      | 24 ++++++++--
 hanlp/utils/tf_util.py                        |  4 +-
 6 files changed, 66 insertions(+), 19 deletions(-)

diff --git a/.gitignore b/.gitignore
index 76b3c5739..3c7ce29ca 100644
--- a/.gitignore
+++ b/.gitignore
@@ -284,4 +284,5 @@ fabric.properties
 .idea/caches/build_file_checksums.ser
 .idea
 *.iml
-data
\ No newline at end of file
+data
+.vscode/settings.json
diff --git a/hanlp/common/transform.py b/hanlp/common/transform.py
index 70fe81b8e..a5352d990 100644
--- a/hanlp/common/transform.py
+++ b/hanlp/common/transform.py
@@ -196,7 +196,7 @@ def mapper(X, Y):
                     Y = self.y_to_idx(Y)
                 return X, Y
 
-            dataset = dataset.map(mapper, num_parallel_calls=tf.data.experimental.AUTOTUNE)
+dataset = dataset.map(mapper, num_parallel_calls=tf.data.experimental.AUTOTUNE)
         return dataset
 
     @abstractmethod
diff --git a/hanlp/common/vocab.py b/hanlp/common/vocab.py
index 7dec92ed0..74a0fc11f 100644
--- a/hanlp/common/vocab.py
+++ b/hanlp/common/vocab.py
@@ -79,7 +79,10 @@ def update(self, tokens: Iterable[str]) -> None:
             self.add(token)
 
     def get_idx(self, token: str) -> int:
-        idx = self.token_to_idx.get(token, None)
+        if type(token) is list:
+            idx = [self.get_idx(t) for t in token]
+        else:
+            idx = self.token_to_idx.get(token, None)
         if idx is None:
             if self.mutable:
                 idx = len(self.token_to_idx)
diff --git a/hanlp/components/classifiers/transformer_classifier.py b/hanlp/components/classifiers/transformer_classifier.py
index 85270eae5..9f586f4b4 100644
--- a/hanlp/components/classifiers/transformer_classifier.py
+++ b/hanlp/components/classifiers/transformer_classifier.py
@@ -15,13 +15,14 @@
 from hanlp.transform.table import TableTransform
 from hanlp.utils.log_util import logger
 from hanlp.utils.util import merge_locals_kwargs
+import numpy as np
 
 
 class TransformerTextTransform(TableTransform):
 
     def __init__(self, config: SerializableDict = None, map_x=False, map_y=True, x_columns=None,
-                 y_column=-1, skip_header=True, delimiter='auto', **kwargs) -> None:
-        super().__init__(config, map_x, map_y, x_columns, y_column, skip_header, delimiter, **kwargs)
+                 y_column=-1, skip_header=True, delimiter='auto', multi_label=False, **kwargs) -> None:
+        super().__init__(config, map_x, map_y, x_columns, y_column, multi_label, skip_header, delimiter, **kwargs)
         self.tokenizer: FullTokenizer = None
 
     def inputs_to_samples(self, inputs, gold=False):
@@ -61,17 +62,17 @@ def inputs_to_samples(self, inputs, gold=False):
                 segment_ids += [0] * diff
 
             assert len(token_ids) == max_length, "Error with input length {} vs {}".format(len(token_ids), max_length)
-            assert len(attention_mask) == max_length, "Error with input length {} vs {}".format(len(attention_mask),
-                                                                                                max_length)
-            assert len(segment_ids) == max_length, "Error with input length {} vs {}".format(len(segment_ids),
-                                                                                             max_length)
+            assert len(attention_mask) == max_length, "Error with input length {} vs {}".format(len(attention_mask), max_length)
+            assert len(segment_ids) == max_length, "Error with input length {} vs {}".format(len(segment_ids), max_length)
+
+
             label = Y
             yield (token_ids, attention_mask, segment_ids), label
 
     def create_types_shapes_values(self) -> Tuple[Tuple, Tuple, Tuple]:
         max_length = self.config.max_length
         types = (tf.int32, tf.int32, tf.int32), tf.string
-        shapes = ([max_length], [max_length], [max_length]), []
+        shapes = ([max_length], [max_length], [max_length]), [None,] if self.config.multi_label else []
         values = (0, 0, 0), self.label_vocab.safe_pad_token
         return types, shapes, values
 
@@ -79,8 +80,22 @@ def x_to_idx(self, x) -> Union[tf.Tensor, Tuple]:
         logger.fatal('map_x should always be set to True')
         exit(1)
 
+    def y_to_idx(self, y) -> tf.Tensor:
+        if self.config.multi_label:
+            #need to change index to binary vector
+            mapped = tf.map_fn(fn=lambda x: tf.cast(self.label_vocab.lookup(x), tf.int32), elems=y, fn_output_signature=tf.TensorSpec(dtype=tf.dtypes.int32, shape=[None,]))
+            one_hots = tf.one_hot(mapped, len(self.label_vocab))
+            idx = tf.reduce_sum(one_hots, -2)
+        else:
+            idx = self.label_vocab.lookup(y)
+        return idx
+
     def Y_to_outputs(self, Y: Union[tf.Tensor, Tuple[tf.Tensor]], gold=False, inputs=None, X=None, batch=None) -> Iterable:
-        preds = tf.argmax(Y, axis=-1)
+        # Prediction to be Y > 0:
+        if self.config.multi_label:
+            preds = Y
+        else:
+            preds = tf.argmax(Y, axis=-1)
         for y in preds:
             yield self.label_vocab.idx_to_token[y]
 
@@ -126,7 +141,14 @@ def _y_id_to_str(self, Y_pred) -> str:
         return self.transform.label_vocab.idx_to_token[Y_pred.numpy()]
 
     def build_loss(self, loss, **kwargs):
-        loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
+        if loss:
+            assert isinstance(loss, tf.keras.losses.loss), 'Must specify loss as an instance in tf.keras.losses'
+            return loss
+        elif self.config.multi_label:
+        #Loss to be BinaryCrossentropy for multi-label:
+            loss = tf.keras.losses.BinaryCrossentropy(from_logits=True)
+        else:
+            loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
         return loss
 
     # noinspection PyMethodOverriding
@@ -158,3 +180,10 @@ def build_vocab(self, trn_data, logger):
         warmup_steps_per_epoch = math.ceil(train_examples * self.config.warmup_steps_ratio / self.config.batch_size)
         self.config.warmup_steps = warmup_steps_per_epoch * self.config.epochs
         return train_examples
+
+    def build_metrics(self, metrics, logger, **kwargs):
+        if self.config.multi_label:
+            metric = tf.keras.metrics.BinaryCrossentropy()
+        else:
+            metric = tf.keras.metrics.SparseCategoricalAccuracy('accuracy')
+        return [metric]x
\ No newline at end of file
diff --git a/hanlp/transform/table.py b/hanlp/transform/table.py
index 046be98f0..ad95fd8f7 100644
--- a/hanlp/transform/table.py
+++ b/hanlp/transform/table.py
@@ -3,7 +3,7 @@
 # Date: 2019-11-10 21:00
 from abc import ABC
 from typing import Tuple, Union
-
+import numpy as np
 import tensorflow as tf
 
 from hanlp.common.structure import SerializableDict
@@ -16,9 +16,9 @@
 
 class TableTransform(Transform, ABC):
     def __init__(self, config: SerializableDict = None, map_x=False, map_y=True, x_columns=None,
-                 y_column=-1,
+                 y_column=-1, multi_label=False,
                  skip_header=True, delimiter='auto', **kwargs) -> None:
-        super().__init__(config, map_x, map_y, x_columns=x_columns, y_column=y_column,
+        super().__init__(config, map_x, map_y, x_columns=x_columns, y_column=y_column, multi_label=multi_label,
                          skip_header=skip_header,
                          delimiter=delimiter, **kwargs)
         self.label_vocab = create_label_vocab()
@@ -28,6 +28,9 @@ def file_to_inputs(self, filepath: str, gold=True):
         y_column = self.config.y_column
         num_features = self.config.get('num_features', None)
         for cells in read_cells(filepath, skip_header=self.config.skip_header, delimiter=self.config.delimiter):
+            #multi-label: Dataset in .tsv format: x_columns: at most 2 columns being a sentence pair while in most 
+            # cases just one column being the doc content. y_column being the single label, which shall be modified 
+            # to load a list of labels.
             if x_columns:
                 inputs = tuple(c for i, c in enumerate(cells) if i in x_columns), cells[y_column]
             else:
@@ -37,6 +40,15 @@ def file_to_inputs(self, filepath: str, gold=True):
             if num_features is None:
                 num_features = len(inputs[0])
                 self.config.num_features = num_features
+            # multi-label support
+            if self.config.multi_label:
+                assert type(inputs[1]) is str, 'Y value has to be string'
+                if inputs[1][0] == '[':
+                    # multi-label is in literal form of a list
+                    labels = eval(inputs[1])
+                else:
+                    labels = inputs[1].strip().split(',')
+                inputs = inputs[0], labels
             else:
                 assert num_features == len(inputs[0]), f'Numbers of columns {num_features} ' \
                                                        f'inconsistent with current {len(inputs[0])}'
@@ -56,7 +68,11 @@ def y_to_idx(self, y) -> tf.Tensor:
     def fit(self, trn_path: str, **kwargs):
         samples = 0
         for t in self.file_to_samples(trn_path, gold=True):
-            self.label_vocab.add(t[1])  # the second one regardless of t is pair or triple
+            if self.config.multi_label:
+                for l in t[1]:
+                    self.label_vocab.add(l)
+            else:
+                self.label_vocab.add(t[1])  # the second one regardless of t is pair or triple
             samples += 1
         return samples
 
diff --git a/hanlp/utils/tf_util.py b/hanlp/utils/tf_util.py
index 465856cc7..1ea040ea9 100644
--- a/hanlp/utils/tf_util.py
+++ b/hanlp/utils/tf_util.py
@@ -11,9 +11,7 @@
 
 
 def size_of_dataset(dataset: tf.data.Dataset) -> int:
-    count = 0
-    for element in dataset.unbatch().batch(1):
-        count += 1
+    count = len(list(dataset.unbatch().as_numpy_iterator()))
     return count
 
 

From 62f7b3da720e7e517c44f01e49594ac065ed79a8 Mon Sep 17 00:00:00 2001
From: Derek Zen <leizhang0121@gmail.com>
Date: Thu, 17 Dec 2020 13:27:21 +0800
Subject: [PATCH 02/14] multi-label support cherry picked to master

---
 hanlp/components/classifiers/transformer_classifier.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hanlp/components/classifiers/transformer_classifier.py b/hanlp/components/classifiers/transformer_classifier.py
index 9f586f4b4..60ec82cbe 100644
--- a/hanlp/components/classifiers/transformer_classifier.py
+++ b/hanlp/components/classifiers/transformer_classifier.py
@@ -183,7 +183,7 @@ def build_vocab(self, trn_data, logger):
 
     def build_metrics(self, metrics, logger, **kwargs):
         if self.config.multi_label:
-            metric = tf.keras.metrics.BinaryCrossentropy()
+            metric = tf.keras.metrics.BinaryAccuracy('binary_accuracy')
         else:
             metric = tf.keras.metrics.SparseCategoricalAccuracy('accuracy')
         return [metric]x
\ No newline at end of file

From d4104d7d57e1e08e24c659165422e43caaf017a9 Mon Sep 17 00:00:00 2001
From: Derek Zhang <leizhang0121@gmail.com>
Date: Wed, 16 Dec 2020 21:49:57 +0800
Subject: [PATCH 03/14] minor fix

---
 hanlp/common/transform.py                              | 2 +-
 hanlp/components/classifiers/transformer_classifier.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/hanlp/common/transform.py b/hanlp/common/transform.py
index a5352d990..70fe81b8e 100644
--- a/hanlp/common/transform.py
+++ b/hanlp/common/transform.py
@@ -196,7 +196,7 @@ def mapper(X, Y):
                     Y = self.y_to_idx(Y)
                 return X, Y
 
-dataset = dataset.map(mapper, num_parallel_calls=tf.data.experimental.AUTOTUNE)
+            dataset = dataset.map(mapper, num_parallel_calls=tf.data.experimental.AUTOTUNE)
         return dataset
 
     @abstractmethod
diff --git a/hanlp/components/classifiers/transformer_classifier.py b/hanlp/components/classifiers/transformer_classifier.py
index 60ec82cbe..a70d9b65a 100644
--- a/hanlp/components/classifiers/transformer_classifier.py
+++ b/hanlp/components/classifiers/transformer_classifier.py
@@ -186,4 +186,4 @@ def build_metrics(self, metrics, logger, **kwargs):
             metric = tf.keras.metrics.BinaryAccuracy('binary_accuracy')
         else:
             metric = tf.keras.metrics.SparseCategoricalAccuracy('accuracy')
-        return [metric]x
\ No newline at end of file
+        return [metric]
\ No newline at end of file

From 91e9847dd941fcb728a2e031fc7d8beff30f73fd Mon Sep 17 00:00:00 2001
From: Derek Zen <leizhang0121@gmail.com>
Date: Thu, 17 Dec 2020 01:31:15 +0800
Subject: [PATCH 04/14] minor fix

---
 hanlp/common/component.py           | 8 +++++++-
 hanlp/layers/transformers/loader.py | 3 +++
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/hanlp/common/component.py b/hanlp/common/component.py
index 44fd8408c..5a347a5c7 100644
--- a/hanlp/common/component.py
+++ b/hanlp/common/component.py
@@ -10,7 +10,7 @@
 
 import numpy as np
 import tensorflow as tf
-
+from tensorflow.keras.mixed_precision import experimental as mixed_precision
 import hanlp
 import hanlp.version
 from hanlp.callbacks.fine_csv_logger import FineCSVLogger
@@ -331,6 +331,12 @@ def fit(self, trn_data, dev_data, save_dir, batch_size, epochs, run_eagerly=Fals
         logger.info('Building...')
         train_steps_per_epoch = math.ceil(num_examples / batch_size) if num_examples else None
         self.config.train_steps = train_steps_per_epoch * epochs if num_examples else None
+        # mixed precision
+        if self.config.use_amp:
+            policy = mixed_precision.Policy('mixed_float16')
+            mixed_precision.set_policy(policy)
+            print('Compute dtype: %s' % policy.compute_dtype)
+            print('Variable dtype: %s' % policy.variable_dtype)
         model, optimizer, loss, metrics = self.build(**merge_dict(self.config, logger=logger, training=True))
         logger.info('Model built:\n' + summary_of_model(self.model))
         self.save_config(save_dir)
diff --git a/hanlp/layers/transformers/loader.py b/hanlp/layers/transformers/loader.py
index 8cea1c08c..53ca44e91 100644
--- a/hanlp/layers/transformers/loader.py
+++ b/hanlp/layers/transformers/loader.py
@@ -12,6 +12,9 @@
 from hanlp.layers.transformers import zh_albert_models_google, bert_models_google
 from hanlp.utils.io_util import get_resource, stdout_redirected, hanlp_home
 
+gpu_devices = tf.config.experimental.list_physical_devices('GPU')
+if len(gpu_devices)>0:
+    tf.config.experimental.set_memory_growth(gpu_devices[0], True)
 
 def build_transformer(transformer, max_seq_length, num_labels, tagging=True, tokenizer_only=False):
     spm_model_file = None

From 7bae452a112f5ea9f36acb527957d832d9a1b115 Mon Sep 17 00:00:00 2001
From: Derek Zen <leizhang0121@gmail.com>
Date: Thu, 17 Dec 2020 14:13:30 +0800
Subject: [PATCH 05/14] Revert "minor fix"

This reverts commit 91e9847dd941fcb728a2e031fc7d8beff30f73fd.

 On branch master
 Your branch is up to date with 'origin/master'.

 Changes to be committed:
	modified:   hanlp/common/component.py
	modified:   hanlp/layers/transformers/loader.py
---
 hanlp/common/component.py           | 8 +-------
 hanlp/layers/transformers/loader.py | 3 ---
 2 files changed, 1 insertion(+), 10 deletions(-)

diff --git a/hanlp/common/component.py b/hanlp/common/component.py
index 5a347a5c7..44fd8408c 100644
--- a/hanlp/common/component.py
+++ b/hanlp/common/component.py
@@ -10,7 +10,7 @@
 
 import numpy as np
 import tensorflow as tf
-from tensorflow.keras.mixed_precision import experimental as mixed_precision
+
 import hanlp
 import hanlp.version
 from hanlp.callbacks.fine_csv_logger import FineCSVLogger
@@ -331,12 +331,6 @@ def fit(self, trn_data, dev_data, save_dir, batch_size, epochs, run_eagerly=Fals
         logger.info('Building...')
         train_steps_per_epoch = math.ceil(num_examples / batch_size) if num_examples else None
         self.config.train_steps = train_steps_per_epoch * epochs if num_examples else None
-        # mixed precision
-        if self.config.use_amp:
-            policy = mixed_precision.Policy('mixed_float16')
-            mixed_precision.set_policy(policy)
-            print('Compute dtype: %s' % policy.compute_dtype)
-            print('Variable dtype: %s' % policy.variable_dtype)
         model, optimizer, loss, metrics = self.build(**merge_dict(self.config, logger=logger, training=True))
         logger.info('Model built:\n' + summary_of_model(self.model))
         self.save_config(save_dir)
diff --git a/hanlp/layers/transformers/loader.py b/hanlp/layers/transformers/loader.py
index 53ca44e91..8cea1c08c 100644
--- a/hanlp/layers/transformers/loader.py
+++ b/hanlp/layers/transformers/loader.py
@@ -12,9 +12,6 @@
 from hanlp.layers.transformers import zh_albert_models_google, bert_models_google
 from hanlp.utils.io_util import get_resource, stdout_redirected, hanlp_home
 
-gpu_devices = tf.config.experimental.list_physical_devices('GPU')
-if len(gpu_devices)>0:
-    tf.config.experimental.set_memory_growth(gpu_devices[0], True)
 
 def build_transformer(transformer, max_seq_length, num_labels, tagging=True, tokenizer_only=False):
     spm_model_file = None

From 4a0dadc5bfb17ca0d90a703ba7ad3dc07319c910 Mon Sep 17 00:00:00 2001
From: Derek Zen <leizhang0121@gmail.com>
Date: Sat, 19 Dec 2020 01:49:29 +0800
Subject: [PATCH 06/14] fixed safe pad issue for mult-label

---
 hanlp/common/vocab.py                         | 10 ++---
 .../classifiers/transformer_classifier.py     | 39 ++++++++++++-------
 2 files changed, 29 insertions(+), 20 deletions(-)

diff --git a/hanlp/common/vocab.py b/hanlp/common/vocab.py
index 74a0fc11f..f242174b7 100644
--- a/hanlp/common/vocab.py
+++ b/hanlp/common/vocab.py
@@ -30,7 +30,7 @@ def __init__(self, idx_to_token: List[str] = None, token_to_idx: Dict = None, mu
         self.pad_token = pad_token
         self.unk_token = unk_token
         self.token_to_idx_table: tf.lookup.StaticHashTable = None
-        self.idx_to_token_table = None
+        # self.idx_to_token_table = None
 
     def __setitem__(self, token: str, idx: int):
         assert self.mutable, 'Update an immutable Vocab object is not allowed'
@@ -239,8 +239,8 @@ def safe_pad_token(self) -> str:
         """
         if self.pad_token:
             return self.pad_token
-        if self.first_token:
-            return self.first_token
+        # if self.first_token:
+        #     return self.first_token
         return PAD
 
     @property
@@ -259,8 +259,8 @@ def safe_unk_token(self) -> str:
         """
         if self.unk_token:
             return self.unk_token
-        if self.first_token:
-            return self.first_token
+        # if self.first_token:
+        #     return self.first_token
         return UNK
 
 
diff --git a/hanlp/components/classifiers/transformer_classifier.py b/hanlp/components/classifiers/transformer_classifier.py
index a70d9b65a..74477ac6d 100644
--- a/hanlp/components/classifiers/transformer_classifier.py
+++ b/hanlp/components/classifiers/transformer_classifier.py
@@ -82,9 +82,9 @@ def x_to_idx(self, x) -> Union[tf.Tensor, Tuple]:
 
     def y_to_idx(self, y) -> tf.Tensor:
         if self.config.multi_label:
-            #need to change index to binary vector
+            #converrt index to binary vector
             mapped = tf.map_fn(fn=lambda x: tf.cast(self.label_vocab.lookup(x), tf.int32), elems=y, fn_output_signature=tf.TensorSpec(dtype=tf.dtypes.int32, shape=[None,]))
-            one_hots = tf.one_hot(mapped, len(self.label_vocab))
+            one_hots = tf.one_hot(mapped, len(self.label_vocab), on_value=1, off_value=0)
             idx = tf.reduce_sum(one_hots, -2)
         else:
             idx = self.label_vocab.lookup(y)
@@ -93,11 +93,13 @@ def y_to_idx(self, y) -> tf.Tensor:
     def Y_to_outputs(self, Y: Union[tf.Tensor, Tuple[tf.Tensor]], gold=False, inputs=None, X=None, batch=None) -> Iterable:
         # Prediction to be Y > 0:
         if self.config.multi_label:
-            preds = Y
+            preds = [np.flatnonzero(y>0) for y in Y]
+            for p in preds:
+                yield [self.label_vocab.idx_to_token[i] for i in p]
         else:
             preds = tf.argmax(Y, axis=-1)
-        for y in preds:
-            yield self.label_vocab.idx_to_token[y]
+            for y in preds:
+                yield self.label_vocab.idx_to_token[y]
 
     def input_is_single_sample(self, input: Any) -> bool:
         return isinstance(input, (str, tuple))
@@ -122,23 +124,29 @@ def evaluate_output(self, tst_data, out, num_batches, metric):
         out.write('sentence\tpred\tgold\n')
         total, correct, score = 0, 0, 0
         for idx, batch in enumerate(tst_data):
-            outputs = self.model.predict_on_batch(batch[0])[0]
-            outputs = tf.argmax(outputs, axis=1)
-            for X, Y_pred, Y_gold, in zip(batch[0][0], outputs, batch[1]):
-                feature = ' '.join(self.transform.tokenizer.convert_ids_to_tokens(X.numpy(), skip_special_tokens=True))
+            outputs = self.model.predict_on_batch(batch[0])
+            tokens = self.transform.Y_to_outputs(outputs)
+            Y_GT = self.transform.Y_to_outputs(batch[1])
+            for X, Y_pred, Y_gold, in zip(batch[0][0], tokens, Y_GT):#batch[1]):
+                feature = ' '.join(self.transform.tokenizer.convert_ids_to_tokens(X.numpy()))#, skip_special_tokens=True))
                 feature = feature.replace(' ##', '')  # fix sub-word generated by BERT tagger
-                out.write('{}\t{}\t{}\n'.format(feature,
-                                                self._y_id_to_str(Y_pred),
-                                                self._y_id_to_str(Y_gold)))
+                # Y_gold = self.transform.label_vocab.idx_to_token[Y_gold]
+                out.write('{}\t{}\t{}\n'.format(feature, Y_pred, Y_gold))
                 total += 1
-                correct += int(tf.equal(Y_pred, Y_gold).numpy())
+                correct += sum([1 for y1 in Y_gold for y2 in Y_pred if y1==y2])/len(Y_gold) if self.config.multi_label else int(Y_pred == Y_gold)
             score = correct / total
             print('\r{}/{} {}: {:.2f}'.format(idx + 1, num_batches, metric, score * 100), end='')
         print()
         return score
 
-    def _y_id_to_str(self, Y_pred) -> str:
-        return self.transform.label_vocab.idx_to_token[Y_pred.numpy()]
+    # def _y_id_to_str(self, Y_pred) -> str:
+    #     logger.info(f'start to produce Y_pred: {Y_pred}')
+    #     if self.config.multi_label:
+    #         Y_pred = np.flatnonzero(Y_pred>0)
+    #         return [self.transform.label_vocab.idx_to_token[y.numpy()] for y in Y_pred]
+    #     else:
+    #         Y_pred = tf.argmax(Y_pred, axis=1)
+    #         return self.transform.label_vocab.idx_to_token[Y_pred.numpy()]
 
     def build_loss(self, loss, **kwargs):
         if loss:
@@ -176,6 +184,7 @@ def build_model(self, transformer, max_length, **kwargs):
         return model
 
     def build_vocab(self, trn_data, logger):
+        self.transform.label_vocab.unlock()
         train_examples = super().build_vocab(trn_data, logger)
         warmup_steps_per_epoch = math.ceil(train_examples * self.config.warmup_steps_ratio / self.config.batch_size)
         self.config.warmup_steps = warmup_steps_per_epoch * self.config.epochs

From a3ff10ab2952ff108d0d559aa56677a6db1d2549 Mon Sep 17 00:00:00 2001
From: Derek Zen <leizhang0121@gmail.com>
Date: Sat, 19 Dec 2020 23:17:34 +0800
Subject: [PATCH 07/14] fixed loss and metrics for multi-label

---
 hanlp/common/component.py                      | 18 +++++++++++++-----
 .../classifiers/transformer_classifier.py      | 14 ++++++++++----
 hanlp/layers/transformers/loader.py            |  4 ++++
 3 files changed, 27 insertions(+), 9 deletions(-)

diff --git a/hanlp/common/component.py b/hanlp/common/component.py
index 44fd8408c..55fd57438 100644
--- a/hanlp/common/component.py
+++ b/hanlp/common/component.py
@@ -10,6 +10,7 @@
 
 import numpy as np
 import tensorflow as tf
+from tensorflow.keras.mixed_precision import experimental as mixed_precision
 
 import hanlp
 import hanlp.version
@@ -326,6 +327,13 @@ def fit(self, trn_data, dev_data, save_dir, batch_size, epochs, run_eagerly=Fals
         if not logger:
             logger = init_logger(name='train', root_dir=save_dir, level=logging.INFO if verbose else logging.WARN)
         logger.info('Hyperparameter:\n' + self.config.to_json())
+        if self.config.use_amp:
+            policy = mixed_precision.Policy('mixed_float16')
+            mixed_precision.set_policy(policy)
+            logger.info(f'Global mixed precision policy has been set.')
+            logger.info('Compute dtype: %s' % policy.compute_dtype)
+            logger.info('Variable dtype: %s' % policy.variable_dtype)
+
         num_examples = self.build_vocab(trn_data, logger)
         # assert num_examples, 'You forgot to return the number of training examples in your build_vocab'
         logger.info('Building...')
@@ -397,16 +405,16 @@ def build_train_dataset(self, trn_data, batch_size, num_examples):
         return trn_data
 
     def build_callbacks(self, save_dir, logger, **kwargs):
-        metrics = kwargs.get('metrics', 'accuracy')
-        if isinstance(metrics, (list, tuple)):
-            metrics = metrics[-1]
-        monitor = f'val_{metrics}'
+        metrics_names = [m.name for m in kwargs.get('metrics', 'accuracy')]
+        if isinstance(metrics_names, (list, tuple)):
+            metrics_names = metrics_names[-1]
+        monitor = f'val_{metrics_names}'
         checkpoint = tf.keras.callbacks.ModelCheckpoint(
             os.path.join(save_dir, 'model.h5'),
             # verbose=1,
             monitor=monitor, save_best_only=True,
             mode='max',
-            save_weights_only=True)
+            save_weights_only=False)
         logger.debug(f'Monitor {checkpoint.monitor} for checkpoint')
         tensorboard_callback = tf.keras.callbacks.TensorBoard(
             log_dir=io_util.makedirs(io_util.path_join(save_dir, 'logs')))
diff --git a/hanlp/components/classifiers/transformer_classifier.py b/hanlp/components/classifiers/transformer_classifier.py
index 74477ac6d..7c6407905 100644
--- a/hanlp/components/classifiers/transformer_classifier.py
+++ b/hanlp/components/classifiers/transformer_classifier.py
@@ -133,7 +133,7 @@ def evaluate_output(self, tst_data, out, num_batches, metric):
                 # Y_gold = self.transform.label_vocab.idx_to_token[Y_gold]
                 out.write('{}\t{}\t{}\n'.format(feature, Y_pred, Y_gold))
                 total += 1
-                correct += sum([1 for y1 in Y_gold for y2 in Y_pred if y1==y2])/len(Y_gold) if self.config.multi_label else int(Y_pred == Y_gold)
+                correct += sum([1 for y1 in Y_gold for y2 in Y_pred if y1==y2])/max(len(Y_pred),len(Y_gold)) if self.config.multi_label else int(Y_pred == Y_gold)
             score = correct / total
             print('\r{}/{} {}: {:.2f}'.format(idx + 1, num_batches, metric, score * 100), end='')
         print()
@@ -150,8 +150,9 @@ def evaluate_output(self, tst_data, out, num_batches, metric):
 
     def build_loss(self, loss, **kwargs):
         if loss:
-            assert isinstance(loss, tf.keras.losses.loss), 'Must specify loss as an instance in tf.keras.losses'
-            return loss
+            # assert isinstance(loss, tf.keras.losses.Loss), 'Must specify loss as an instance in tf.keras.losses.Loss'
+            if not isinstance(loss, tf.keras.losses.Loss): 
+                logger.warn(f'loss function may not be compatible: {loss}')
         elif self.config.multi_label:
         #Loss to be BinaryCrossentropy for multi-label:
             loss = tf.keras.losses.BinaryCrossentropy(from_logits=True)
@@ -191,8 +192,13 @@ def build_vocab(self, trn_data, logger):
         return train_examples
 
     def build_metrics(self, metrics, logger, **kwargs):
+        if metrics:
+            for metric in metrics:
+                assert isinstance(metric, tf.keras.metrics.Metric), f'Metrics defined may not be compatible: {metric}'
+            return metrics
         if self.config.multi_label:
             metric = tf.keras.metrics.BinaryAccuracy('binary_accuracy')
         else:
             metric = tf.keras.metrics.SparseCategoricalAccuracy('accuracy')
-        return [metric]
\ No newline at end of file
+        self.config['metrics'] = [metric]
+        return [metrics]
\ No newline at end of file
diff --git a/hanlp/layers/transformers/loader.py b/hanlp/layers/transformers/loader.py
index 8cea1c08c..2d60d28ee 100644
--- a/hanlp/layers/transformers/loader.py
+++ b/hanlp/layers/transformers/loader.py
@@ -12,6 +12,10 @@
 from hanlp.layers.transformers import zh_albert_models_google, bert_models_google
 from hanlp.utils.io_util import get_resource, stdout_redirected, hanlp_home
 
+gpu_devices = tf.config.experimental.list_physical_devices('GPU')
+for gpu in gpu_devices:
+    tf.config.experimental.set_memory_growth(gpu, True)
+
 
 def build_transformer(transformer, max_seq_length, num_labels, tagging=True, tokenizer_only=False):
     spm_model_file = None

From c59fedccb19ec6c43f19bd74332f1ee55e00c1ec Mon Sep 17 00:00:00 2001
From: Derek Zhang <Leizhang0121@gmail.com>
Date: Sun, 20 Dec 2020 00:54:59 +0800
Subject: [PATCH 08/14] fix metrics passing

---
 hanlp/common/component.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hanlp/common/component.py b/hanlp/common/component.py
index 55fd57438..ee52866cf 100644
--- a/hanlp/common/component.py
+++ b/hanlp/common/component.py
@@ -259,7 +259,7 @@ def build(self, logger, **kwargs):
         loss = self.build_loss(
             **self.config if 'loss' in self.config else dict(list(self.config.items()) + [('loss', None)]))
         # allow for different
-        metrics = self.build_metrics(**merge_dict(self.config, metrics=kwargs.get('metrics', 'accuracy'),
+        metrics = self.build_metrics(**merge_dict(self.config, metrics=kwargs.get('metrics', None),
                                                   logger=logger, overwrite=True))
         if not isinstance(metrics, list):
             if isinstance(metrics, tf.keras.metrics.Metric):

From 6e71838be6af3f8fadc208a229154e1a2ed1377f Mon Sep 17 00:00:00 2001
From: Derek Zen <leizhang0121@gmail.com>
Date: Sun, 20 Dec 2020 03:01:00 +0800
Subject: [PATCH 09/14] metrics fix

---
 hanlp/common/component.py                            | 12 +++++++-----
 .../components/classifiers/transformer_classifier.py |  5 +++--
 2 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/hanlp/common/component.py b/hanlp/common/component.py
index 55fd57438..95fedee9b 100644
--- a/hanlp/common/component.py
+++ b/hanlp/common/component.py
@@ -259,7 +259,7 @@ def build(self, logger, **kwargs):
         loss = self.build_loss(
             **self.config if 'loss' in self.config else dict(list(self.config.items()) + [('loss', None)]))
         # allow for different
-        metrics = self.build_metrics(**merge_dict(self.config, metrics=kwargs.get('metrics', 'accuracy'),
+        metrics = self.build_metrics(**merge_dict(self.config, metrics=kwargs.get('metrics', None),
                                                   logger=logger, overwrite=True))
         if not isinstance(metrics, list):
             if isinstance(metrics, tf.keras.metrics.Metric):
@@ -346,7 +346,7 @@ def fit(self, trn_data, dev_data, save_dir, batch_size, epochs, run_eagerly=Fals
         self.save_meta(save_dir)
         trn_data = self.build_train_dataset(trn_data, batch_size, num_examples)
         dev_data = self.build_valid_dataset(dev_data, batch_size)
-        callbacks = self.build_callbacks(save_dir, **merge_dict(self.config, overwrite=True, logger=logger))
+        callbacks = self.build_callbacks(save_dir, **merge_dict(self.config, overwrite=True, logger=logger, metrics=metrics))
         # need to know #batches, otherwise progbar crashes
         dev_steps = math.ceil(size_of_dataset(dev_data) / batch_size)
         checkpoint = get_callback_by_class(callbacks, tf.keras.callbacks.ModelCheckpoint)
@@ -405,9 +405,11 @@ def build_train_dataset(self, trn_data, batch_size, num_examples):
         return trn_data
 
     def build_callbacks(self, save_dir, logger, **kwargs):
-        metrics_names = [m.name for m in kwargs.get('metrics', 'accuracy')]
-        if isinstance(metrics_names, (list, tuple)):
-            metrics_names = metrics_names[-1]
+        metrics = kwargs.get('metrics', 'accuracy')
+        if isinstance(metrics, str):
+            metrics_names = metrics
+        else:
+            metrics_names = [m.name for m in metrics][-1]
         monitor = f'val_{metrics_names}'
         checkpoint = tf.keras.callbacks.ModelCheckpoint(
             os.path.join(save_dir, 'model.h5'),
diff --git a/hanlp/components/classifiers/transformer_classifier.py b/hanlp/components/classifiers/transformer_classifier.py
index 7c6407905..6746e54f6 100644
--- a/hanlp/components/classifiers/transformer_classifier.py
+++ b/hanlp/components/classifiers/transformer_classifier.py
@@ -194,11 +194,12 @@ def build_vocab(self, trn_data, logger):
     def build_metrics(self, metrics, logger, **kwargs):
         if metrics:
             for metric in metrics:
-                assert isinstance(metric, tf.keras.metrics.Metric), f'Metrics defined may not be compatible: {metric}'
+                # assert isinstance(metric, tf.keras.metrics.Metric), f'Metrics defined may not be compatible: {metric}'
+                if not isinstance(metric, tf.keras.metrics.Metric): logger.warn(f'metric may not be compatible: {metric}')
             return metrics
         if self.config.multi_label:
             metric = tf.keras.metrics.BinaryAccuracy('binary_accuracy')
         else:
             metric = tf.keras.metrics.SparseCategoricalAccuracy('accuracy')
         self.config['metrics'] = [metric]
-        return [metrics]
\ No newline at end of file
+        return [metric]
\ No newline at end of file

From 825a300edf9721d803c63d4b98dd17f6071f9626 Mon Sep 17 00:00:00 2001
From: Derek Zhang <Leizhang0121@gmail.com>
Date: Sun, 20 Dec 2020 20:28:41 +0800
Subject: [PATCH 10/14] fixed metrics on loading

---
 hanlp/common/component.py                         | 15 ++++++++-------
 .../classifiers/transformer_classifier.py         |  2 +-
 2 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/hanlp/common/component.py b/hanlp/common/component.py
index 95fedee9b..6ab4409a4 100644
--- a/hanlp/common/component.py
+++ b/hanlp/common/component.py
@@ -236,6 +236,7 @@ def save(self, save_dir: str, **kwargs):
         self.save_config(save_dir)
         self.save_vocabs(save_dir)
         self.save_weights(save_dir)
+        self.model.save(save_dir)
 
     def load(self, save_dir: str, logger=hanlp.utils.log_util.logger, **kwargs):
         self.meta['load_path'] = save_dir
@@ -244,6 +245,7 @@ def load(self, save_dir: str, logger=hanlp.utils.log_util.logger, **kwargs):
         self.load_vocabs(save_dir)
         self.build(**merge_dict(self.config, training=False, logger=logger, **kwargs, overwrite=True, inplace=True))
         self.load_weights(save_dir, **kwargs)
+        # tf.keras.models.load_model(save_dir)
         self.load_meta(save_dir)
 
     @property
@@ -341,9 +343,7 @@ def fit(self, trn_data, dev_data, save_dir, batch_size, epochs, run_eagerly=Fals
         self.config.train_steps = train_steps_per_epoch * epochs if num_examples else None
         model, optimizer, loss, metrics = self.build(**merge_dict(self.config, logger=logger, training=True))
         logger.info('Model built:\n' + summary_of_model(self.model))
-        self.save_config(save_dir)
-        self.save_vocabs(save_dir)
-        self.save_meta(save_dir)
+        self.save(save_dir)
         trn_data = self.build_train_dataset(trn_data, batch_size, num_examples)
         dev_data = self.build_valid_dataset(dev_data, batch_size)
         callbacks = self.build_callbacks(save_dir, **merge_dict(self.config, overwrite=True, logger=logger, metrics=metrics))
@@ -361,7 +361,8 @@ def fit(self, trn_data, dev_data, save_dir, batch_size, epochs, run_eagerly=Fals
         except KeyboardInterrupt:
             print()
             if not checkpoint or checkpoint.best in (np.Inf, -np.Inf):
-                self.save_weights(save_dir)
+                # self.save_weights(save_dir)
+                self.save(save_dir)
                 logger.info('Aborted with model saved')
             else:
                 logger.info(f'Aborted with model saved with best {checkpoint.monitor} = {checkpoint.best:.4f}')
@@ -413,10 +414,10 @@ def build_callbacks(self, save_dir, logger, **kwargs):
         monitor = f'val_{metrics_names}'
         checkpoint = tf.keras.callbacks.ModelCheckpoint(
             os.path.join(save_dir, 'model.h5'),
-            # verbose=1,
+            verbose=1,
             monitor=monitor, save_best_only=True,
-            mode='max',
-            save_weights_only=False)
+            mode='auto',
+            save_weights_only=True)
         logger.debug(f'Monitor {checkpoint.monitor} for checkpoint')
         tensorboard_callback = tf.keras.callbacks.TensorBoard(
             log_dir=io_util.makedirs(io_util.path_join(save_dir, 'logs')))
diff --git a/hanlp/components/classifiers/transformer_classifier.py b/hanlp/components/classifiers/transformer_classifier.py
index 6746e54f6..ed1dc61c5 100644
--- a/hanlp/components/classifiers/transformer_classifier.py
+++ b/hanlp/components/classifiers/transformer_classifier.py
@@ -192,7 +192,7 @@ def build_vocab(self, trn_data, logger):
         return train_examples
 
     def build_metrics(self, metrics, logger, **kwargs):
-        if metrics:
+        if metrics and type(metrics[0]) is not str:
             for metric in metrics:
                 # assert isinstance(metric, tf.keras.metrics.Metric), f'Metrics defined may not be compatible: {metric}'
                 if not isinstance(metric, tf.keras.metrics.Metric): logger.warn(f'metric may not be compatible: {metric}')

From 35858918baf4c300e5cd421ce249addf75a8280c Mon Sep 17 00:00:00 2001
From: Derek Zen <leizhang0121@gmail.com>
Date: Sun, 20 Dec 2020 20:29:40 +0800
Subject: [PATCH 11/14] fix evaluation

---
 .../classifiers/transformer_classifier.py          | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/hanlp/components/classifiers/transformer_classifier.py b/hanlp/components/classifiers/transformer_classifier.py
index 6746e54f6..b52b36437 100644
--- a/hanlp/components/classifiers/transformer_classifier.py
+++ b/hanlp/components/classifiers/transformer_classifier.py
@@ -126,15 +126,15 @@ def evaluate_output(self, tst_data, out, num_batches, metric):
         for idx, batch in enumerate(tst_data):
             outputs = self.model.predict_on_batch(batch[0])
             tokens = self.transform.Y_to_outputs(outputs)
-            Y_GT = self.transform.Y_to_outputs(batch[1])
-            for X, Y_pred, Y_gold, in zip(batch[0][0], tokens, Y_GT):#batch[1]):
-                feature = ' '.join(self.transform.tokenizer.convert_ids_to_tokens(X.numpy()))#, skip_special_tokens=True))
+            Y_gold = self.transform.Y_to_outputs(batch[1]) if self.config.multi_label else batch[1]
+            for x, y_pred, y_gold, in zip(batch[0][0], tokens, Y_gold):#batch[1]):
+                feature = ''.join(self.transform.tokenizer.convert_ids_to_tokens(x.numpy()))#, skip_special_tokens=True))
                 feature = feature.replace(' ##', '')  # fix sub-word generated by BERT tagger
                 # Y_gold = self.transform.label_vocab.idx_to_token[Y_gold]
-                out.write('{}\t{}\t{}\n'.format(feature, Y_pred, Y_gold))
-                total += 1
-                correct += sum([1 for y1 in Y_gold for y2 in Y_pred if y1==y2])/max(len(Y_pred),len(Y_gold)) if self.config.multi_label else int(Y_pred == Y_gold)
-            score = correct / total
+                out.write('{}\t{}\t{}\n'.format(feature, y_pred, y_gold))
+                # total += 1
+                # correct += sum([1 for y1 in y_gold for y2 in y_pred if y1==y2])/max(len(y_pred),len(y_gold)) if self.config.multi_label else int(y_pred == y_gold)
+            score = metric[-1](Y_gold, list(tokens))
             print('\r{}/{} {}: {:.2f}'.format(idx + 1, num_batches, metric, score * 100), end='')
         print()
         return score

From ff092adb7cadf9c2b3b2b3c15d715f2fa7a0affb Mon Sep 17 00:00:00 2001
From: Derek Zen <leizhang0121@gmail.com>
Date: Mon, 21 Dec 2020 16:24:19 +0800
Subject: [PATCH 12/14] fixed evaluation output on classification

---
 hanlp/common/component.py                     |  4 +-
 .../classifiers/transformer_classifier.py     | 52 +++++++++----------
 hanlp/transform/table.py                      |  4 +-
 3 files changed, 29 insertions(+), 31 deletions(-)

diff --git a/hanlp/common/component.py b/hanlp/common/component.py
index 6ab4409a4..df951f7b0 100644
--- a/hanlp/common/component.py
+++ b/hanlp/common/component.py
@@ -126,9 +126,7 @@ def evaluate(self, input_path: str, save_dir=None, output=False, batch_size=128,
                                 format_scores(score) if isinstance(score, dict) else format_metrics(self.model.metrics),
                                 speed, extra_report))
         if output:
-            logger.info('Saving output to {}'.format(output))
-            with open(output, 'w', encoding='utf-8') as out:
-                self.evaluate_output(tst_data, out, num_batches, self.model.metrics)
+            self.evaluate_output(tst_data, output, num_batches, self.model.metrics)
 
         return loss, score, speed
 
diff --git a/hanlp/components/classifiers/transformer_classifier.py b/hanlp/components/classifiers/transformer_classifier.py
index daba371c6..7bb08e1ef 100644
--- a/hanlp/components/classifiers/transformer_classifier.py
+++ b/hanlp/components/classifiers/transformer_classifier.py
@@ -2,7 +2,7 @@
 # Author: hankcs
 # Date: 2019-11-10 13:19
 
-import math
+import math, re
 from typing import Union, Tuple, List, Any, Iterable
 
 import tensorflow as tf
@@ -120,34 +120,32 @@ def fit(self, trn_data: Any, dev_data: Any, save_dir: str, transformer: str, max
             epochs=3, logger=None, verbose=1, **kwargs):
         return super().fit(**merge_locals_kwargs(locals(), kwargs))
 
-    def evaluate_output(self, tst_data, out, num_batches, metric):
-        out.write('sentence\tpred\tgold\n')
-        total, correct, score = 0, 0, 0
-        for idx, batch in enumerate(tst_data):
-            outputs = self.model.predict_on_batch(batch[0])
-            tokens = self.transform.Y_to_outputs(outputs)
-            Y_gold = self.transform.Y_to_outputs(batch[1]) if self.config.multi_label else batch[1]
-            for x, y_pred, y_gold, in zip(batch[0][0], tokens, Y_gold):#batch[1]):
-                feature = ''.join(self.transform.tokenizer.convert_ids_to_tokens(x.numpy()))#, skip_special_tokens=True))
-                feature = feature.replace(' ##', '')  # fix sub-word generated by BERT tagger
-                # Y_gold = self.transform.label_vocab.idx_to_token[Y_gold]
-                out.write('{}\t{}\t{}\n'.format(feature, y_pred, y_gold))
-                # total += 1
-                # correct += sum([1 for y1 in y_gold for y2 in y_pred if y1==y2])/max(len(y_pred),len(y_gold)) if self.config.multi_label else int(y_pred == y_gold)
-            score = metric[-1](Y_gold, list(tokens))
-            print('\r{}/{} {}: {:.2f}'.format(idx + 1, num_batches, metric, score * 100), end='')
-        print()
+    def evaluate_output(self, tst_data, output, num_batches, metrics):
+        metric = metrics[-1]
+        try:
+            metric_name = metric.name
+        except:
+            metric_name = 'accuracy'
+        logger.info('Saving output to {}'.format(output))
+        with open(output, 'w', encoding='utf-8') as out:
+            out.write('sentence\tpred\tgold\n')
+            total, correct, score = 0, 0, 0
+            for idx, batch in enumerate(tst_data):
+                Y_pred = self.model.predict_on_batch(batch[0])
+                for x, y_pred, y_gold, in zip(batch[0][0], Y_pred, batch[1]):
+                    feature = ''.join(self.transform.tokenizer.convert_ids_to_tokens(x.numpy()))#, skip_special_tokens=True))
+                    feature = feature.replace('[CLS]', '')
+                    feature = feature.replace('[PAD]', '')
+                    feature = feature.replace(' ##', '')  # fix sub-word generated by BERT tagger
+                    y_pred_str = self.transform.Y_to_outputs([y_pred])
+                    y_gold_str = self.transform.Y_to_outputs([y_gold])
+                    out.write('{}\t{}\t{}\n'.format(feature, y_pred_str, y_gold_str))
+                    total += 1
+                    correct += metric(y_gold, y_pred)
+                score = correct/total
+                logger.info(f'{idx + 1}/{num_batches} {metric_name}: {score * 100}')
         return score
 
-    # def _y_id_to_str(self, Y_pred) -> str:
-    #     logger.info(f'start to produce Y_pred: {Y_pred}')
-    #     if self.config.multi_label:
-    #         Y_pred = np.flatnonzero(Y_pred>0)
-    #         return [self.transform.label_vocab.idx_to_token[y.numpy()] for y in Y_pred]
-    #     else:
-    #         Y_pred = tf.argmax(Y_pred, axis=1)
-    #         return self.transform.label_vocab.idx_to_token[Y_pred.numpy()]
-
     def build_loss(self, loss, **kwargs):
         if loss:
             # assert isinstance(loss, tf.keras.losses.Loss), 'Must specify loss as an instance in tf.keras.losses.Loss'
diff --git a/hanlp/transform/table.py b/hanlp/transform/table.py
index ad95fd8f7..f86ef5316 100644
--- a/hanlp/transform/table.py
+++ b/hanlp/transform/table.py
@@ -31,7 +31,9 @@ def file_to_inputs(self, filepath: str, gold=True):
             #multi-label: Dataset in .tsv format: x_columns: at most 2 columns being a sentence pair while in most 
             # cases just one column being the doc content. y_column being the single label, which shall be modified 
             # to load a list of labels.
-            if x_columns:
+            if type(x_columns) is int:
+                inputs = [cells[x_columns]], cells[y_column]
+            elif type(x_columns) is list:
                 inputs = tuple(c for i, c in enumerate(cells) if i in x_columns), cells[y_column]
             else:
                 if y_column != -1:

From 5d9bb8bb7ea033ad2672e79e82ccf8813ad754ac Mon Sep 17 00:00:00 2001
From: Derek Zen <leizhang0121@gmail.com>
Date: Mon, 21 Dec 2020 17:51:31 +0800
Subject: [PATCH 13/14] fixed predict on multi-label

---
 hanlp/transform/table.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hanlp/transform/table.py b/hanlp/transform/table.py
index f86ef5316..36350fbd2 100644
--- a/hanlp/transform/table.py
+++ b/hanlp/transform/table.py
@@ -62,7 +62,7 @@ def inputs_to_samples(self, inputs, gold=False):
             if gold:
                 yield cells
             else:
-                yield cells, pad
+                yield cells, [pad] if self.config.multi_label else pad
 
     def y_to_idx(self, y) -> tf.Tensor:
         return self.label_vocab.lookup(y)

From d2f1dc6a2dea1b50a3fd7a49ceadff1437f372f1 Mon Sep 17 00:00:00 2001
From: Derek Zen <leizhang0121@gmail.com>
Date: Wed, 23 Dec 2020 03:35:40 +0800
Subject: [PATCH 14/14] fix on evaluation output

---
 hanlp/common/component.py                     | 14 ++++++----
 .../classifiers/transformer_classifier.py     | 26 +++++++++----------
 hanlp/transform/table.py                      |  5 +---
 3 files changed, 23 insertions(+), 22 deletions(-)

diff --git a/hanlp/common/component.py b/hanlp/common/component.py
index df951f7b0..b99b0b6f8 100644
--- a/hanlp/common/component.py
+++ b/hanlp/common/component.py
@@ -92,7 +92,7 @@ def evaluate(self, input_path: str, save_dir=None, output=False, batch_size=128,
         if save_dir and not logger:
             logger = init_logger(name=name, root_dir=save_dir, level=logging.INFO if verbose else logging.WARN,
                                  mode='w')
-        tst_data = self.transform.file_to_dataset(input_path, batch_size=batch_size)
+        tst_data = self.transform.file_to_dataset(input_path, batch_size=batch_size) 
         samples = size_of_dataset(tst_data)
         num_batches = math.ceil(samples / batch_size)
         if warm_up:
@@ -126,7 +126,7 @@ def evaluate(self, input_path: str, save_dir=None, output=False, batch_size=128,
                                 format_scores(score) if isinstance(score, dict) else format_metrics(self.model.metrics),
                                 speed, extra_report))
         if output:
-            self.evaluate_output(tst_data, output, num_batches, self.model.metrics)
+            self.evaluate_output(tst_data, input=input_path, output=output, num_batches=num_batches, metrics=self.model.metrics)
 
         return loss, score, speed
 
@@ -134,7 +134,7 @@ def evaluate_dataset(self, tst_data, callbacks, output, num_batches):
         loss, score = self.model.evaluate(tst_data, callbacks=callbacks, steps=num_batches)
         return loss, score, output
 
-    def evaluate_output(self, tst_data, out, num_batches, metrics: List[tf.keras.metrics.Metric]):
+    def evaluate_output(self, tst_data, input, output, num_batches, metrics: List[tf.keras.metrics.Metric]):
         # out.write('x\ty_true\ty_pred\n')
         for metric in metrics:
             metric.reset_states()
@@ -231,14 +231,15 @@ def load_transform(self, save_dir) -> Transform:
         return self.transform
 
     def save(self, save_dir: str, **kwargs):
+        self.save_meta(save_dir)
         self.save_config(save_dir)
         self.save_vocabs(save_dir)
         self.save_weights(save_dir)
         self.model.save(save_dir)
 
     def load(self, save_dir: str, logger=hanlp.utils.log_util.logger, **kwargs):
-        self.meta['load_path'] = save_dir
         save_dir = get_resource(save_dir)
+        self.meta['load_path'] = save_dir
         self.load_config(save_dir)
         self.load_vocabs(save_dir)
         self.build(**merge_dict(self.config, training=False, logger=logger, **kwargs, overwrite=True, inplace=True))
@@ -408,7 +409,10 @@ def build_callbacks(self, save_dir, logger, **kwargs):
         if isinstance(metrics, str):
             metrics_names = metrics
         else:
-            metrics_names = [m.name for m in metrics][-1]
+            try:
+                metrics_names = metrics[-1].name
+            except:
+                metrics_names = 'accuracy'
         monitor = f'val_{metrics_names}'
         checkpoint = tf.keras.callbacks.ModelCheckpoint(
             os.path.join(save_dir, 'model.h5'),
diff --git a/hanlp/components/classifiers/transformer_classifier.py b/hanlp/components/classifiers/transformer_classifier.py
index 7bb08e1ef..bbcb9457c 100644
--- a/hanlp/components/classifiers/transformer_classifier.py
+++ b/hanlp/components/classifiers/transformer_classifier.py
@@ -93,11 +93,11 @@ def y_to_idx(self, y) -> tf.Tensor:
     def Y_to_outputs(self, Y: Union[tf.Tensor, Tuple[tf.Tensor]], gold=False, inputs=None, X=None, batch=None) -> Iterable:
         # Prediction to be Y > 0:
         if self.config.multi_label:
-            preds = [np.flatnonzero(y>0) for y in Y]
+            preds = [np.flatnonzero(y>0) for y in Y] if not gold else Y
             for p in preds:
                 yield [self.label_vocab.idx_to_token[i] for i in p]
         else:
-            preds = tf.argmax(Y, axis=-1)
+            preds = tf.argmax(Y, axis=-1) if not gold else Y
             for y in preds:
                 yield self.label_vocab.idx_to_token[y]
 
@@ -120,7 +120,7 @@ def fit(self, trn_data: Any, dev_data: Any, save_dir: str, transformer: str, max
             epochs=3, logger=None, verbose=1, **kwargs):
         return super().fit(**merge_locals_kwargs(locals(), kwargs))
 
-    def evaluate_output(self, tst_data, output, num_batches, metrics):
+    def evaluate_output(self, tst_data, input, output, num_batches, metrics):
         metric = metrics[-1]
         try:
             metric_name = metric.name
@@ -128,22 +128,22 @@ def evaluate_output(self, tst_data, output, num_batches, metrics):
             metric_name = 'accuracy'
         logger.info('Saving output to {}'.format(output))
         with open(output, 'w', encoding='utf-8') as out:
-            out.write('sentence\tpred\tgold\n')
             total, correct, score = 0, 0, 0
+            prediction = []
             for idx, batch in enumerate(tst_data):
                 Y_pred = self.model.predict_on_batch(batch[0])
-                for x, y_pred, y_gold, in zip(batch[0][0], Y_pred, batch[1]):
-                    feature = ''.join(self.transform.tokenizer.convert_ids_to_tokens(x.numpy()))#, skip_special_tokens=True))
-                    feature = feature.replace('[CLS]', '')
-                    feature = feature.replace('[PAD]', '')
-                    feature = feature.replace(' ##', '')  # fix sub-word generated by BERT tagger
-                    y_pred_str = self.transform.Y_to_outputs([y_pred])
-                    y_gold_str = self.transform.Y_to_outputs([y_gold])
-                    out.write('{}\t{}\t{}\n'.format(feature, y_pred_str, y_gold_str))
+                Y_pred_str = self.transform.Y_to_outputs(Y_pred)
+                prediction += [y for y in Y_pred_str]
+                for y_pred, y_gold, in zip(Y_pred, batch[1]):
                     total += 1
                     correct += metric(y_gold, y_pred)
                 score = correct/total
-                logger.info(f'{idx + 1}/{num_batches} {metric_name}: {score * 100}')
+                logger.info(f'{idx + 1}/{num_batches} {metric_name}: {score * 100:.2f}%')
+            with open(input, 'r') as f:
+                out.write(f.readline().replace('\n', '')+'\tpred\n')
+                for i, y_pred in enumerate(prediction):
+                    out.write(f.readline().replace('\n', '')+f'\t{y_pred}\n')
+
         return score
 
     def build_loss(self, loss, **kwargs):
diff --git a/hanlp/transform/table.py b/hanlp/transform/table.py
index 36350fbd2..4bf625fb4 100644
--- a/hanlp/transform/table.py
+++ b/hanlp/transform/table.py
@@ -28,10 +28,7 @@ def file_to_inputs(self, filepath: str, gold=True):
         y_column = self.config.y_column
         num_features = self.config.get('num_features', None)
         for cells in read_cells(filepath, skip_header=self.config.skip_header, delimiter=self.config.delimiter):
-            #multi-label: Dataset in .tsv format: x_columns: at most 2 columns being a sentence pair while in most 
-            # cases just one column being the doc content. y_column being the single label, which shall be modified 
-            # to load a list of labels.
-            if type(x_columns) is int:
+            if type(x_columns) is int: 
                 inputs = [cells[x_columns]], cells[y_column]
             elif type(x_columns) is list:
                 inputs = tuple(c for i, c in enumerate(cells) if i in x_columns), cells[y_column]