-
Notifications
You must be signed in to change notification settings - Fork 2.9k
-
Notifications
You must be signed in to change notification settings - Fork 2.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[Bug]: Got Exception during training 'GPT3-1.3B' - TypeError: object of type 'NoneType' has no len() #8948
Comments
了解,所以現在 看起來以前可以從 paddlenlp/transformers/gpt/configuration.py:118導入, |
目前主要是配置化的方式,上面的方式有可能会导致部分参数缺失;建议按照GPT的示例配置文件来适配 |
@wawltor 感謝你的回覆,然而不幸的是,看起來並非root-cause 仍然有問題 我照你的步驟建立 {
"model_name_or_path": "gpt3-1.3B-en",
"tokenizer_name_or_path": "gpt3-1.3B-en",
"input_dir": "/workspace/dataset",
"output_dir": "output/paddlenlp_gpt3/debug/model_output",
"bf16": true,
"sequence_parallel": true,
"tensor_parallel_degree": 8,
"sharding_parallel_degree": 1,
"sharding": "stage2",
"pipeline_parallel_degree": 1,
"virtual_pp_degree": 1,
"pipeline_parallel_config": "disable_partial_send_recv",
"per_device_train_batch_size": 72,
"per_device_eval_batch_size": 72,
"gradient_accumulation_steps": 32,
"split": "949,50,1",
"max_seq_length": 2048,
"fuse_attention_qkv": true,
"use_flash_attention": true,
"fp16_opt_level": "O2",
"learning_rate": 0.00001,
"min_learning_rate": 0.000005,
"save_steps": 100000,
"weight_decay": 0.01,
"warmup_ratio": 0.01,
"max_grad_norm": 1.0,
"logging_steps": 1,
"dataloader_num_workers": 1,
"hidden_dropout_prob": 0.1,
"attention_probs_dropout_prob": 0.1,
"max_steps": 32,
"eval_steps": 100000,
"report_to": "visualdl",
"disable_tqdm": true,
"do_train": true,
"continue_training": 0,
"device": "gpu"
} 然後運行 Traceback (most recent call last):
File "/home/scratch.ameng_gpu/git/2PaddleNLP_anderson/llm/run_pretrain.py", line 605, in <module>
main()
File "/home/scratch.ameng_gpu/git/2PaddleNLP_anderson/llm/run_pretrain.py", line 511, in main
model = model_class.from_config(config, dtype=dtype)
File "/home/scratch.ameng_gpu/git/2PaddleNLP_anderson/paddlenlp/transformers/auto/modeling.py", line 269, in from_config
model_class = cls._get_model_class_from_config(None, None, config)
File "/home/scratch.ameng_gpu/git/2PaddleNLP_anderson/paddlenlp/transformers/auto/modeling.py", line 218, in _get_model_class_from_config
init_class = architectures.pop() if len(architectures) > 0 else None
TypeError: object of type 'NoneType' has no len() GP3-1.3B是公開的模型,要不要直接在你那邊覆現看看? |
软件环境
重复问题
错误描述
使用llm/run_pretrain.py嘗試訓練"GPT3-1.3B",初始化模型階段會發生錯誤:
architecture
不知為何為None
Log (click me)
[2024-08-16 06:13:15,389] [ INFO] - We are using <class 'paddlenlp.transformers.gpt.tokenizer.GPTTokenizer'> to load 'gpt3-1.3B-en'.
[2024-08-16 06:13:32,179] [ ERROR] - Using bos_token, but it is not set yet.
[2024-08-16 06:13:32,230] [ INFO] - tokenizer config file saved in /tmp/ameng/.paddlenlp/models/gpt3-1.3B-en/tokenizer_config.json
[2024-08-16 06:13:32,230] [ INFO] - Special tokens file saved in /tmp/ameng/.paddlenlp/models/gpt3-1.3B-en/special_tokens_map.json
[2024-08-16 06:13:32,233] [ INFO] - Reset vocab size to 50304 for batter amp peformance.
Final pre-training config: GPTConfig {
"attention_probs_dropout_prob": 0.1,
"bos_token_id": 0,
"context_parallel_degree": -1,
"eol_token_id": 198,
"eos_token_id": 50256,
"fused_softmax_with_triangular": false,
"hidden_act": "gelu",
"hidden_activation": "gelu",
"hidden_dropout_prob": 0.1,
"hidden_size": 2048,
"ignore_index": 0,
"initializer_range": 0.02,
"intermediate_size": 8192,
"layer_norm_eps": 1e-05,
"max_position_embeddings": 1024,
"model_type": "gpt",
"normalize_before": true,
"num_attention_heads": 16,
"num_hidden_layers": 24,
"num_partitions": 1,
"pad_token_id": 0,
"paddlenlp_version": "3.0.0b0.post20240816",
"pipeline_parallel_degree": -1,
"scale_qk_coeff": 1.0,
"sep_parallel_degree": -1,
"seq_length": 1024,
"tensor_parallel_degree": -1,
"type_vocab_size": 1,
"use_fast_layer_norm": false,
"vocab_size": 50304
}
Traceback (most recent call last):
File "/workspace/PaddleNLP/llm/run_pretrain.py", line 595, in
main()
File "/workspace/PaddleNLP/llm/run_pretrain.py", line 501, in main
model = model_class.from_config(config, dtype=dtype)
File "/workspace/PaddleNLP/paddlenlp/transformers/auto/modeling.py", line 269, in from_config
model_class = cls._get_model_class_from_config(None, None, config)
File "/workspace/PaddleNLP/paddlenlp/transformers/auto/modeling.py", line 218, in _get_model_class_from_config
init_class = architectures.pop() if len(architectures) > 0 else None
TypeError: object of type 'NoneType' has no len()
稳定复现步骤 & 代码
python -u -m paddle.distributed.launch --gpus "0,1" llm/run_pretrain.py --model_name_or_path gpt3-1.3B-en --output_dir output
The text was updated successfully, but these errors were encountered: