Skip to content

Commit

Permalink
fix(base_trainer): force pad_token regardless of architecture
Browse files Browse the repository at this point in the history
  • Loading branch information
maxreciprocate committed Jun 23, 2023
1 parent f59086b commit 087e73d
Showing 1 changed file with 2 additions and 3 deletions.
5 changes: 2 additions & 3 deletions trlx/trainer/accelerate_base_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,9 +73,8 @@ def __init__(self, config, **kwargs): # noqa: C901
self.tokenizer.padding_side = config.tokenizer.padding_side
self.tokenizer.truncation_side = config.tokenizer.truncation_side
self.tokenizer.sep_token = "<sep>"
if config.model.model_arch_type != "seq2seq":
self.tokenizer.pad_token = self.tokenizer.eos_token
self.tokenizer.pad_token_id = self.tokenizer.eos_token_id
if self.tokenizer.pad_token is None:
self.tokenizer.pad_token = "<|padding|>"

script_name = os.path.basename(sys.argv[0]).rsplit(".", 1)[0]
if not isinstance(config.model.model_path, str):
Expand Down

0 comments on commit 087e73d

Please sign in to comment.