parent
f8480678d8
commit
c92db47135
|
@ -47,7 +47,6 @@ def InitDataset(config):
|
||||||
print(f"INFO: Load dataset end")
|
print(f"INFO: Load dataset end")
|
||||||
else:
|
else:
|
||||||
raw_dataset = MeaningDataset(start, start + size, vocab, None, conf.level_ratio, conf.min_subitem)
|
raw_dataset = MeaningDataset(start, start + size, vocab, None, conf.level_ratio, conf.min_subitem)
|
||||||
print("INFO: raw_dataset.token_frequency" + raw_dataset.token_frequency())
|
|
||||||
raw_dataset.set_mask(conf.mask_level, conf.mask_idx)
|
raw_dataset.set_mask(conf.mask_level, conf.mask_idx)
|
||||||
train_dataset, val_dataset = raw_dataset.split(0.9)
|
train_dataset, val_dataset = raw_dataset.split(0.9)
|
||||||
torch.save(train_dataset, trainfile)
|
torch.save(train_dataset, trainfile)
|
||||||
|
|
Loading…
Reference in New Issue