Skip to content

Commit

Permalink
feat: support parallel reward function
Browse files Browse the repository at this point in the history
  • Loading branch information
Jingru committed Nov 15, 2023
1 parent e085ba2 commit d283ee2
Showing 1 changed file with 4 additions and 1 deletion.
5 changes: 4 additions & 1 deletion trlx/trainer/accelerate_ppo_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -351,7 +351,10 @@ def make_experience(self, num_rollouts: int = 1024, iter_count: int = 0): # noq
scores = all_scores
scores_mask = scores != -np.inf

str_samples, str_prompts, str_outputs = self.decode(prompt_tensors, samples, append_eos_token=True)
if self.config.train.reward_only_in_main_process:
str_samples, str_prompts, str_outputs = self.decode(prompt_tensors, samples, append_eos_token=True)
else:
str_samples, str_prompts, str_outputs = all_str_samples, all_str_prompts, all_str_outputs

# Pad the sample outputs
outputs = self.tokenizer(str_outputs).input_ids
Expand Down

0 comments on commit d283ee2

Please sign in to comment.