From ce81f268450c572798df9ddb14f367c116326d35 Mon Sep 17 00:00:00 2001 From: Colin <> Date: Tue, 12 Aug 2025 11:08:16 +0800 Subject: [PATCH] Fix dataset cache check with vocab size. --- wit/meaning/dataset.py | 10 ++++------ wit/meaning/meaning_dataset.py | 2 +- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/wit/meaning/dataset.py b/wit/meaning/dataset.py index b196a81..55ce971 100644 --- a/wit/meaning/dataset.py +++ b/wit/meaning/dataset.py @@ -37,9 +37,8 @@ def InitDataset(config): seed = c.seed path = "./data/" - conf_name = ( - f"_s{start}_e{end}_s{size}_ms{c.min_subitem}_maxs{c.max_subitem}_stride{c.stride}_tree{c.with_tree}.pt" - ) + conf_name = f"_s{start}_e{end}_s{size}_ms{c.min_subitem}_maxs{c.max_subitem}_seed{seed}" + conf_name = conf_name + f"_vocab{vocab}_stride{c.stride}_tree{c.with_tree}.pt" trainfile = path + f"MeaningDataset_train" + conf_name valfile = path + f"MeaningDataset_val" + conf_name if not os.path.exists(path): @@ -101,9 +100,8 @@ def InitValDataset(config): seed = c.seed path = "./data/" - conf_name = ( - f"_s{start}_e{end}_s{size}_ms{c.min_subitem}_maxs{c.max_subitem}_stride{c.stride}_tree{c.with_tree}.pt" - ) + conf_name = f"_s{start}_e{end}_s{size}_ms{c.min_subitem}_maxs{c.max_subitem}_seed{seed}" + conf_name = conf_name + f"_vocab{vocab}_stride{c.stride}_tree{c.with_tree}.pt" valfile = path + f"MeaningDataset_val" + conf_name if not os.path.exists(path): os.mkdir(path) diff --git a/wit/meaning/meaning_dataset.py b/wit/meaning/meaning_dataset.py index 8c6f501..2044cdd 100644 --- a/wit/meaning/meaning_dataset.py +++ b/wit/meaning/meaning_dataset.py @@ -50,7 +50,7 @@ class MeaningMap: path = "./data/" file = "structured_language_" + str(size) + "_" + str(vocab_size) file += "_" + str(max_subitem) + "_" + str(min_subitem) - file += "_" + str(stride) + "_" + str(with_tree) + file += "_" + str(stride) + "_" + str(with_tree) + "_" + str(seed) file_prop = path + file + "_prop.npy" file_data = path + file + "_data.npy" file_level = path + file + "_level.npy"