Fix dataset cache check with vocab size.

This commit is contained in:
Colin 2025-08-12 11:08:16 +08:00
parent 2a09b9d9b1
commit ce81f26845
2 changed files with 5 additions and 7 deletions

View File

@ -37,9 +37,8 @@ def InitDataset(config):
seed = c.seed
path = "./data/"
conf_name = (
f"_s{start}_e{end}_s{size}_ms{c.min_subitem}_maxs{c.max_subitem}_stride{c.stride}_tree{c.with_tree}.pt"
)
conf_name = f"_s{start}_e{end}_s{size}_ms{c.min_subitem}_maxs{c.max_subitem}_seed{seed}"
conf_name = conf_name + f"_vocab{vocab}_stride{c.stride}_tree{c.with_tree}.pt"
trainfile = path + f"MeaningDataset_train" + conf_name
valfile = path + f"MeaningDataset_val" + conf_name
if not os.path.exists(path):
@ -101,9 +100,8 @@ def InitValDataset(config):
seed = c.seed
path = "./data/"
conf_name = (
f"_s{start}_e{end}_s{size}_ms{c.min_subitem}_maxs{c.max_subitem}_stride{c.stride}_tree{c.with_tree}.pt"
)
conf_name = f"_s{start}_e{end}_s{size}_ms{c.min_subitem}_maxs{c.max_subitem}_seed{seed}"
conf_name = conf_name + f"_vocab{vocab}_stride{c.stride}_tree{c.with_tree}.pt"
valfile = path + f"MeaningDataset_val" + conf_name
if not os.path.exists(path):
os.mkdir(path)

View File

@ -50,7 +50,7 @@ class MeaningMap:
path = "./data/"
file = "structured_language_" + str(size) + "_" + str(vocab_size)
file += "_" + str(max_subitem) + "_" + str(min_subitem)
file += "_" + str(stride) + "_" + str(with_tree)
file += "_" + str(stride) + "_" + str(with_tree) + "_" + str(seed)
file_prop = path + file + "_prop.npy"
file_data = path + file + "_data.npy"
file_level = path + file + "_level.npy"