diff --git a/wit/meaning/meaning_dataset.py b/wit/meaning/meaning_dataset.py index 0baa894..7dc0cd8 100644 --- a/wit/meaning/meaning_dataset.py +++ b/wit/meaning/meaning_dataset.py @@ -73,15 +73,13 @@ class MeaningMap: ): print("Mapping Load from disk cache: " + file) slhwm = np.load(file_prop) - self.ms_map = slhwm[:, 4:] + self.ms_map = slhwm[:, 2:] self.ms_data = np.load(file_data) self.ms_start = slhwm[:, 0] self.ms_len = slhwm[:, 1] self.ms_level = np.load(file_level) self.ms_rank_idx = np.load(file_rank_idx) self.ms_rank_all = np.load(file_rank_all) - self.ms_height = slhwm[:, 2] - self.ms_weight = slhwm[:, 3] print("Mapping Load end, elapsed:" + str(time.time() - start_time) + "s") else: print("Mapping Disk cache miss, build new one. size:" + str(size)) @@ -112,8 +110,6 @@ class MeaningMap: ms_start = np.zeros((size), dtype=np.int32) # meaning sequence start ms_end = np.zeros((size), dtype=np.int32) # meaning sequence end ms_len = np.zeros((size), dtype=np.int32) # meaning sequence len - ms_height = np.zeros((size), dtype=np.int32) # meaning tree height - ms_weight = np.zeros((size), dtype=np.int32) # meaning tree weight ms_data = np.zeros((datastep), dtype=np.int32) # meaning sequence ms_level = np.zeros((datastep), dtype=np.uint32) # meaning level, vocab's level is 0 ms_rank_idx = np.zeros((datastep), dtype=np.uint32) # meaning index of all level @@ -134,8 +130,6 @@ class MeaningMap: ms_start[i] = index ms_end[i] = index + stride ms_len[i] = stride - ms_height[i] = 0 - ms_weight[i] = 1 index = index + stride for i in range(self.normal_vocab, size): @@ -185,8 +179,6 @@ class MeaningMap: ms_start[i] = index ms_end[i] = end ms_len[i] = len_ma - ms_height[i] = max(ms_height[m_list]) + 1 - ms_weight[i] = sum(ms_weight[m_list]) index = index + len_ma if i % 10000 == 0: print(i) @@ -199,15 +191,11 @@ class MeaningMap: np.save(file_rank_all, ms_rank_all) ms_start = np.array(ms_start).astype(np.int32) - ms_height = np.array(ms_height).astype(np.int32) - ms_weight = np.array(ms_weight).astype(np.int32) ms_len = np.array(ms_len).astype(np.int32) slhwm = np.concatenate( ( ms_start.reshape((-1, 1)), ms_len.reshape((-1, 1)), - ms_height.reshape((-1, 1)), - ms_weight.reshape((-1, 1)), map, ), axis=1, @@ -222,8 +210,6 @@ class MeaningMap: self.ms_map = map # ms_map[i] = [sub(i),sub(i),sub(i),sub(i)...sub(i)] self.ms_start = ms_start self.ms_len = ms_len - self.ms_height = ms_height - self.ms_weight = ms_weight print("Mapping Disk cache build end, elapsed:" + str(time.time() - start_time) + "s") def get_sequence(self, meaning): # return sequence[meaning] @@ -352,8 +338,6 @@ class MeaningDataset(Dataset): self.rank_all = [] self.seq_meaning = [] map = self.get_meaning_map() - self.m_height = map.ms_height - self.m_weight = map.ms_weight if size: meanings = np.random.randint(start, end, size=(size)) else: