Remove weight and height in the meaning dataset.
This commit is contained in:
		
							parent
							
								
									b2fe00c157
								
							
						
					
					
						commit
						ee30eb4aab
					
				|  | @ -73,15 +73,13 @@ class MeaningMap: | |||
|         ): | ||||
|             print("Mapping Load from disk cache: " + file) | ||||
|             slhwm = np.load(file_prop) | ||||
|             self.ms_map = slhwm[:, 4:] | ||||
|             self.ms_map = slhwm[:, 2:] | ||||
|             self.ms_data = np.load(file_data) | ||||
|             self.ms_start = slhwm[:, 0] | ||||
|             self.ms_len = slhwm[:, 1] | ||||
|             self.ms_level = np.load(file_level) | ||||
|             self.ms_rank_idx = np.load(file_rank_idx) | ||||
|             self.ms_rank_all = np.load(file_rank_all) | ||||
|             self.ms_height = slhwm[:, 2] | ||||
|             self.ms_weight = slhwm[:, 3] | ||||
|             print("Mapping Load end, elapsed:" + str(time.time() - start_time) + "s") | ||||
|         else: | ||||
|             print("Mapping Disk cache miss, build new one. size:" + str(size)) | ||||
|  | @ -112,8 +110,6 @@ class MeaningMap: | |||
|             ms_start = np.zeros((size), dtype=np.int32)  # meaning sequence start | ||||
|             ms_end = np.zeros((size), dtype=np.int32)  # meaning sequence end | ||||
|             ms_len = np.zeros((size), dtype=np.int32)  # meaning sequence len | ||||
|             ms_height = np.zeros((size), dtype=np.int32)  # meaning tree height | ||||
|             ms_weight = np.zeros((size), dtype=np.int32)  # meaning tree weight | ||||
|             ms_data = np.zeros((datastep), dtype=np.int32)  # meaning sequence | ||||
|             ms_level = np.zeros((datastep), dtype=np.uint32)  # meaning level, vocab's level is 0 | ||||
|             ms_rank_idx = np.zeros((datastep), dtype=np.uint32)  # meaning index of all level | ||||
|  | @ -134,8 +130,6 @@ class MeaningMap: | |||
|                 ms_start[i] = index | ||||
|                 ms_end[i] = index + stride | ||||
|                 ms_len[i] = stride | ||||
|                 ms_height[i] = 0 | ||||
|                 ms_weight[i] = 1 | ||||
|                 index = index + stride | ||||
| 
 | ||||
|             for i in range(self.normal_vocab, size): | ||||
|  | @ -185,8 +179,6 @@ class MeaningMap: | |||
|                 ms_start[i] = index | ||||
|                 ms_end[i] = end | ||||
|                 ms_len[i] = len_ma | ||||
|                 ms_height[i] = max(ms_height[m_list]) + 1 | ||||
|                 ms_weight[i] = sum(ms_weight[m_list]) | ||||
|                 index = index + len_ma | ||||
|                 if i % 10000 == 0: | ||||
|                     print(i) | ||||
|  | @ -199,15 +191,11 @@ class MeaningMap: | |||
|             np.save(file_rank_all, ms_rank_all) | ||||
| 
 | ||||
|             ms_start = np.array(ms_start).astype(np.int32) | ||||
|             ms_height = np.array(ms_height).astype(np.int32) | ||||
|             ms_weight = np.array(ms_weight).astype(np.int32) | ||||
|             ms_len = np.array(ms_len).astype(np.int32) | ||||
|             slhwm = np.concatenate( | ||||
|                 ( | ||||
|                     ms_start.reshape((-1, 1)), | ||||
|                     ms_len.reshape((-1, 1)), | ||||
|                     ms_height.reshape((-1, 1)), | ||||
|                     ms_weight.reshape((-1, 1)), | ||||
|                     map, | ||||
|                 ), | ||||
|                 axis=1, | ||||
|  | @ -222,8 +210,6 @@ class MeaningMap: | |||
|             self.ms_map = map  # ms_map[i] = [sub(i),sub(i),sub(i),sub(i)...sub(i)] | ||||
|             self.ms_start = ms_start | ||||
|             self.ms_len = ms_len | ||||
|             self.ms_height = ms_height | ||||
|             self.ms_weight = ms_weight | ||||
|             print("Mapping Disk cache build end, elapsed:" + str(time.time() - start_time) + "s") | ||||
| 
 | ||||
|     def get_sequence(self, meaning):  # return sequence[meaning] | ||||
|  | @ -352,8 +338,6 @@ class MeaningDataset(Dataset): | |||
|         self.rank_all = [] | ||||
|         self.seq_meaning = [] | ||||
|         map = self.get_meaning_map() | ||||
|         self.m_height = map.ms_height | ||||
|         self.m_weight = map.ms_weight | ||||
|         if size: | ||||
|             meanings = np.random.randint(start, end, size=(size)) | ||||
|         else: | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue
	
	 Colin
						Colin