Use local tokenizer.
This commit is contained in:
parent
ac61c4d925
commit
122cbd9ff8
|
@ -0,0 +1 @@
|
||||||
|
{"model_max_length": 1024}
|
|
@ -182,7 +182,7 @@ if __name__ == "__main__":
|
||||||
lit_module = LitModule(args.model_name, "./custom_models/gpt2", args.learning_rate, args.use_tril_attention_mask)
|
lit_module = LitModule(args.model_name, "./custom_models/gpt2", args.learning_rate, args.use_tril_attention_mask)
|
||||||
|
|
||||||
# datasets
|
# datasets
|
||||||
tokenizer = load_tokenizer(args.tokenizer_name_or_path)
|
tokenizer = load_tokenizer("./custom_models/gpt2")
|
||||||
train_dataset_list = []
|
train_dataset_list = []
|
||||||
val_dataset_list = []
|
val_dataset_list = []
|
||||||
for dataset_name in args.dataset_name:
|
for dataset_name in args.dataset_name:
|
||||||
|
|
Loading…
Reference in New Issue