diff --git a/wit/dataset/dataset.py b/wit/dataset/dataset.py index 9d07a4d..1c9a5ee 100644 --- a/wit/dataset/dataset.py +++ b/wit/dataset/dataset.py @@ -47,7 +47,6 @@ def InitDataset(config): print(f"INFO: Load dataset end") else: raw_dataset = MeaningDataset(start, start + size, vocab, None, conf.level_ratio, conf.min_subitem) - print("INFO: raw_dataset.token_frequency" + raw_dataset.token_frequency()) raw_dataset.set_mask(conf.mask_level, conf.mask_idx) train_dataset, val_dataset = raw_dataset.split(0.9) torch.save(train_dataset, trainfile)