Remove weight and height in the meaning dataset.
This commit is contained in:
parent
b2fe00c157
commit
ee30eb4aab
|
@ -73,15 +73,13 @@ class MeaningMap:
|
||||||
):
|
):
|
||||||
print("Mapping Load from disk cache: " + file)
|
print("Mapping Load from disk cache: " + file)
|
||||||
slhwm = np.load(file_prop)
|
slhwm = np.load(file_prop)
|
||||||
self.ms_map = slhwm[:, 4:]
|
self.ms_map = slhwm[:, 2:]
|
||||||
self.ms_data = np.load(file_data)
|
self.ms_data = np.load(file_data)
|
||||||
self.ms_start = slhwm[:, 0]
|
self.ms_start = slhwm[:, 0]
|
||||||
self.ms_len = slhwm[:, 1]
|
self.ms_len = slhwm[:, 1]
|
||||||
self.ms_level = np.load(file_level)
|
self.ms_level = np.load(file_level)
|
||||||
self.ms_rank_idx = np.load(file_rank_idx)
|
self.ms_rank_idx = np.load(file_rank_idx)
|
||||||
self.ms_rank_all = np.load(file_rank_all)
|
self.ms_rank_all = np.load(file_rank_all)
|
||||||
self.ms_height = slhwm[:, 2]
|
|
||||||
self.ms_weight = slhwm[:, 3]
|
|
||||||
print("Mapping Load end, elapsed:" + str(time.time() - start_time) + "s")
|
print("Mapping Load end, elapsed:" + str(time.time() - start_time) + "s")
|
||||||
else:
|
else:
|
||||||
print("Mapping Disk cache miss, build new one. size:" + str(size))
|
print("Mapping Disk cache miss, build new one. size:" + str(size))
|
||||||
|
@ -112,8 +110,6 @@ class MeaningMap:
|
||||||
ms_start = np.zeros((size), dtype=np.int32) # meaning sequence start
|
ms_start = np.zeros((size), dtype=np.int32) # meaning sequence start
|
||||||
ms_end = np.zeros((size), dtype=np.int32) # meaning sequence end
|
ms_end = np.zeros((size), dtype=np.int32) # meaning sequence end
|
||||||
ms_len = np.zeros((size), dtype=np.int32) # meaning sequence len
|
ms_len = np.zeros((size), dtype=np.int32) # meaning sequence len
|
||||||
ms_height = np.zeros((size), dtype=np.int32) # meaning tree height
|
|
||||||
ms_weight = np.zeros((size), dtype=np.int32) # meaning tree weight
|
|
||||||
ms_data = np.zeros((datastep), dtype=np.int32) # meaning sequence
|
ms_data = np.zeros((datastep), dtype=np.int32) # meaning sequence
|
||||||
ms_level = np.zeros((datastep), dtype=np.uint32) # meaning level, vocab's level is 0
|
ms_level = np.zeros((datastep), dtype=np.uint32) # meaning level, vocab's level is 0
|
||||||
ms_rank_idx = np.zeros((datastep), dtype=np.uint32) # meaning index of all level
|
ms_rank_idx = np.zeros((datastep), dtype=np.uint32) # meaning index of all level
|
||||||
|
@ -134,8 +130,6 @@ class MeaningMap:
|
||||||
ms_start[i] = index
|
ms_start[i] = index
|
||||||
ms_end[i] = index + stride
|
ms_end[i] = index + stride
|
||||||
ms_len[i] = stride
|
ms_len[i] = stride
|
||||||
ms_height[i] = 0
|
|
||||||
ms_weight[i] = 1
|
|
||||||
index = index + stride
|
index = index + stride
|
||||||
|
|
||||||
for i in range(self.normal_vocab, size):
|
for i in range(self.normal_vocab, size):
|
||||||
|
@ -185,8 +179,6 @@ class MeaningMap:
|
||||||
ms_start[i] = index
|
ms_start[i] = index
|
||||||
ms_end[i] = end
|
ms_end[i] = end
|
||||||
ms_len[i] = len_ma
|
ms_len[i] = len_ma
|
||||||
ms_height[i] = max(ms_height[m_list]) + 1
|
|
||||||
ms_weight[i] = sum(ms_weight[m_list])
|
|
||||||
index = index + len_ma
|
index = index + len_ma
|
||||||
if i % 10000 == 0:
|
if i % 10000 == 0:
|
||||||
print(i)
|
print(i)
|
||||||
|
@ -199,15 +191,11 @@ class MeaningMap:
|
||||||
np.save(file_rank_all, ms_rank_all)
|
np.save(file_rank_all, ms_rank_all)
|
||||||
|
|
||||||
ms_start = np.array(ms_start).astype(np.int32)
|
ms_start = np.array(ms_start).astype(np.int32)
|
||||||
ms_height = np.array(ms_height).astype(np.int32)
|
|
||||||
ms_weight = np.array(ms_weight).astype(np.int32)
|
|
||||||
ms_len = np.array(ms_len).astype(np.int32)
|
ms_len = np.array(ms_len).astype(np.int32)
|
||||||
slhwm = np.concatenate(
|
slhwm = np.concatenate(
|
||||||
(
|
(
|
||||||
ms_start.reshape((-1, 1)),
|
ms_start.reshape((-1, 1)),
|
||||||
ms_len.reshape((-1, 1)),
|
ms_len.reshape((-1, 1)),
|
||||||
ms_height.reshape((-1, 1)),
|
|
||||||
ms_weight.reshape((-1, 1)),
|
|
||||||
map,
|
map,
|
||||||
),
|
),
|
||||||
axis=1,
|
axis=1,
|
||||||
|
@ -222,8 +210,6 @@ class MeaningMap:
|
||||||
self.ms_map = map # ms_map[i] = [sub(i),sub(i),sub(i),sub(i)...sub(i)]
|
self.ms_map = map # ms_map[i] = [sub(i),sub(i),sub(i),sub(i)...sub(i)]
|
||||||
self.ms_start = ms_start
|
self.ms_start = ms_start
|
||||||
self.ms_len = ms_len
|
self.ms_len = ms_len
|
||||||
self.ms_height = ms_height
|
|
||||||
self.ms_weight = ms_weight
|
|
||||||
print("Mapping Disk cache build end, elapsed:" + str(time.time() - start_time) + "s")
|
print("Mapping Disk cache build end, elapsed:" + str(time.time() - start_time) + "s")
|
||||||
|
|
||||||
def get_sequence(self, meaning): # return sequence[meaning]
|
def get_sequence(self, meaning): # return sequence[meaning]
|
||||||
|
@ -352,8 +338,6 @@ class MeaningDataset(Dataset):
|
||||||
self.rank_all = []
|
self.rank_all = []
|
||||||
self.seq_meaning = []
|
self.seq_meaning = []
|
||||||
map = self.get_meaning_map()
|
map = self.get_meaning_map()
|
||||||
self.m_height = map.ms_height
|
|
||||||
self.m_weight = map.ms_weight
|
|
||||||
if size:
|
if size:
|
||||||
meanings = np.random.randint(start, end, size=(size))
|
meanings = np.random.randint(start, end, size=(size))
|
||||||
else:
|
else:
|
||||||
|
|
Loading…
Reference in New Issue