Use local tokenizer.
This commit is contained in:
parent
ac61c4d925
commit
122cbd9ff8
|
@ -0,0 +1 @@
|
|||
{"model_max_length": 1024}
|
|
@ -182,7 +182,7 @@ if __name__ == "__main__":
|
|||
lit_module = LitModule(args.model_name, "./custom_models/gpt2", args.learning_rate, args.use_tril_attention_mask)
|
||||
|
||||
# datasets
|
||||
tokenizer = load_tokenizer(args.tokenizer_name_or_path)
|
||||
tokenizer = load_tokenizer("./custom_models/gpt2")
|
||||
train_dataset_list = []
|
||||
val_dataset_list = []
|
||||
for dataset_name in args.dataset_name:
|
||||
|
|
Loading…
Reference in New Issue