Update more.
This commit is contained in:
parent
50e502ae96
commit
d50cb798b6
|
@ -0,0 +1,93 @@
|
||||||
|
import torch
|
||||||
|
import torch.nn as nn
|
||||||
|
import torch.nn.functional as F # Add this line
|
||||||
|
import torchvision
|
||||||
|
import torchvision.transforms as transforms
|
||||||
|
|
||||||
|
|
||||||
|
# Device configuration
|
||||||
|
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||||
|
|
||||||
|
# Hyper-parameters
|
||||||
|
num_epochs = 5
|
||||||
|
batch_size = 4
|
||||||
|
learning_rate = 0.001
|
||||||
|
|
||||||
|
# Dataset has PILImage images of range [0, 1].
|
||||||
|
# We transform them to Tensors of normalized range [-1, 1]
|
||||||
|
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
|
||||||
|
|
||||||
|
# CIFAR10: 60000 32x32 color images in 10 classes, with 6000 images per class
|
||||||
|
train_dataset = torchvision.datasets.CIFAR10(root="./data", train=True, download=True, transform=transform)
|
||||||
|
|
||||||
|
test_dataset = torchvision.datasets.CIFAR10(root="./data", train=False, download=True, transform=transform)
|
||||||
|
|
||||||
|
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
|
||||||
|
|
||||||
|
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
|
||||||
|
|
||||||
|
|
||||||
|
class ConvNet(nn.Module):
|
||||||
|
def __init__(self):
|
||||||
|
super(ConvNet, self).__init__()
|
||||||
|
self.conv1 = nn.Conv2d(3, 6, 5)
|
||||||
|
self.pool = nn.MaxPool2d(2, 2)
|
||||||
|
self.conv2 = nn.Conv2d(6, 16, 5)
|
||||||
|
self.fc1 = nn.Linear(16 * 5 * 5, 120)
|
||||||
|
self.fc2 = nn.Linear(120, 84)
|
||||||
|
self.fc3 = nn.Linear(84, 10)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
x = self.pool(F.relu(self.conv1(x)))
|
||||||
|
x = self.pool(F.relu(self.conv2(x)))
|
||||||
|
x = x.view(-1, 16 * 5 * 5)
|
||||||
|
# x = F.relu(self.fc1(x))
|
||||||
|
x = self.fc1(x)
|
||||||
|
# x = F.relu(self.fc2(x))
|
||||||
|
x = self.fc2(x)
|
||||||
|
x = self.fc3(x)
|
||||||
|
return x
|
||||||
|
|
||||||
|
|
||||||
|
model = ConvNet().to(device)
|
||||||
|
|
||||||
|
criterion = nn.CrossEntropyLoss()
|
||||||
|
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
|
||||||
|
|
||||||
|
# Train the model
|
||||||
|
n_total_steps = len(train_loader)
|
||||||
|
for epoch in range(num_epochs):
|
||||||
|
for i, (images, labels) in enumerate(train_loader):
|
||||||
|
images = images.to(device)
|
||||||
|
labels = labels.to(device)
|
||||||
|
|
||||||
|
# Forward pass
|
||||||
|
outputs = model(images)
|
||||||
|
loss = criterion(outputs, labels)
|
||||||
|
|
||||||
|
# Backward and optimize
|
||||||
|
optimizer.zero_grad()
|
||||||
|
loss.backward()
|
||||||
|
optimizer.step()
|
||||||
|
|
||||||
|
if (i + 1) % 2000 == 0:
|
||||||
|
print(f"Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{n_total_steps}], Loss: {loss.item():.4f}")
|
||||||
|
|
||||||
|
print("Finished Training")
|
||||||
|
|
||||||
|
# Test the model
|
||||||
|
with torch.no_grad():
|
||||||
|
n_correct = 0
|
||||||
|
n_samples = 0
|
||||||
|
for images, labels in test_loader:
|
||||||
|
images = images.to(device)
|
||||||
|
labels = labels.to(device)
|
||||||
|
outputs = model(images)
|
||||||
|
|
||||||
|
# max returns (value ,index)
|
||||||
|
_, predicted = torch.max(outputs.data, 1)
|
||||||
|
n_samples += labels.size(0)
|
||||||
|
n_correct += (predicted == labels).sum().item()
|
||||||
|
|
||||||
|
acc = 100.0 * n_correct / n_samples
|
||||||
|
print(f"Accuracy of the network on the 10000 test images: {acc} %")
|
|
@ -10,12 +10,13 @@ meaning数据集是一个模仿自然语言,以及抽象表达的数据集。
|
||||||
4. 从0到(vocab_size-1)的编号表示基本meaning,是不能被拆解的,也就是token
|
4. 从0到(vocab_size-1)的编号表示基本meaning,是不能被拆解的,也就是token
|
||||||
5. meaning通过一层层的向低编号的meaning进行组合替换,最终形成一个最底层是token的树形数据
|
5. meaning通过一层层的向低编号的meaning进行组合替换,最终形成一个最底层是token的树形数据
|
||||||
6. level表示当前token相对于root meaning的距离
|
6. level表示当前token相对于root meaning的距离
|
||||||
7. rank_idx表示当前token在不同层的排序编号,每4位表示在一层里面的编号,低4位表示最低层级的rank_idx,高位无用的位用1填充
|
7. rank
|
||||||
7. rank_all表示当前token在不同层的分子个数,每4位表示在一层里面的编号,低4位表示最低层级的rank_all,高位无用的位用1填充
|
8. rank_idx表示当前token在不同层的排序编号,每4位表示在一层里面的编号,低4位表示最低层级的rank_idx,高位无用的位用1填充
|
||||||
8. tree用于存储每个meaning的拆解的数据,使用字典表达一个树形结构
|
9. rank_all表示当前token在不同层的分子个数,每4位表示在一层里面的编号,低4位表示最低层级的rank_all,高位无用的位用1填充
|
||||||
9. get_seq_mask返回一个sequence每个token在对应level是不是对应的index,level=0:最底层,index=-1:最后一个,index=0:第一个
|
10. tree用于存储每个meaning的拆解的数据,使用字典表达一个树形结构
|
||||||
10. meaning_height 当前meaning的总高度
|
11. get_seq_mask返回一个sequence每个token在对应level是不是对应的index,level=0:最底层,index=-1:最后一个,index=0:第一个
|
||||||
11. meaning_weight 当前meaning的总宽度
|
12. meaning_height 当前meaning的总高度
|
||||||
|
13. meaning_weight 当前meaning的总宽度
|
||||||
|
|
||||||
|
|
||||||
```
|
```
|
||||||
|
@ -31,10 +32,11 @@ vocab_size = 256 meaning = 115200
|
||||||
/ \ / \ / \ / \
|
/ \ / \ / \ / \
|
||||||
176 11 255 129 129 99 211 111
|
176 11 255 129 129 99 211 111
|
||||||
|
|
||||||
sequence = 123 42 32 176 11 255 129 245 233 129 99 23 211 111 93 176
|
sequence = 123 42 32 176 11 255 129 245 233 129 99 23 211 111 93 176
|
||||||
level = 3 3 2 4 4 4 4 2 2 4 4 3 4 4 3 3
|
level = 3 3 2 4 4 4 4 2 2 4 4 3 4 4 3 3
|
||||||
idx at 0 = 0 1 1 0 1 0 1 0 1 0 1 2 0 1 0 1
|
idx at 0 = 0 1 1 0 1 0 1 0 1 0 1 2 0 1 0 1
|
||||||
idx at 1 = 0 0 0 0 0 1 1 1 1 0 0 0 0 0 2 2
|
idx at 1 = 0 0 0 0 0 1 1 1 1 0 0 0 0 0 2 2
|
||||||
idx 0 1 1 0 1 16 17 16 17 0 1 2 0 1 32 33
|
idx 0 1 1 0 1 16 17 16 17 0 1 2 0 1 32 33
|
||||||
|
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
|
@ -352,7 +352,6 @@ class MeaningDataset(Dataset):
|
||||||
output["labels"] = data.clone()
|
output["labels"] = data.clone()
|
||||||
output["token_type_ids"] = torch.zeros(data.shape)
|
output["token_type_ids"] = torch.zeros(data.shape)
|
||||||
output["tree"] = [self.tree[i] for i in idx_list]
|
output["tree"] = [self.tree[i] for i in idx_list]
|
||||||
output["level"] = [self.level[i] for i in idx_list]
|
|
||||||
output["mask"] = self.get_seq_mask_tensor(idx_list)
|
output["mask"] = self.get_seq_mask_tensor(idx_list)
|
||||||
return output
|
return output
|
||||||
|
|
||||||
|
|
12
wit/train.py
12
wit/train.py
|
@ -24,19 +24,19 @@ dataloader_works = 2
|
||||||
vocab_size = 256
|
vocab_size = 256
|
||||||
level_ratio = 5
|
level_ratio = 5
|
||||||
level = 5
|
level = 5
|
||||||
dataset_level = 1.5
|
dataset_level = 3
|
||||||
min_subitem = 2
|
min_subitem = 2
|
||||||
|
|
||||||
hidden_size = 128 # 128 1024 2048 32
|
hidden_size = 128 # 128 1024 2048 32
|
||||||
num_attention_heads = 16 # 8 8 16
|
num_attention_heads = 16 # 8 8 16
|
||||||
num_hidden_layers = 6 # 6 12 24 3
|
num_hidden_layers = 6 # 6 12 24 3
|
||||||
|
|
||||||
mask_level = [0, 1]
|
mask_level = [0, 1, 2]
|
||||||
mask_idx = [0, -1]
|
mask_idx = [0, 0, -1]
|
||||||
|
|
||||||
# name = "vocab_ratio_level_data_hidden_head_layer"
|
# name = "vocab_ratio_level_data_hidden_head_layer"
|
||||||
# name = "mask_level_idx"
|
# name = "mask_level_idx"
|
||||||
name = "hard"
|
name = "bigger"
|
||||||
|
|
||||||
ver = f"{vocab_size}" + "_" + f"{level_ratio}" + "_" + f"{level}" + "_" + f"{min_subitem}" + "_" + f"{dataset_level}"
|
ver = f"{vocab_size}" + "_" + f"{level_ratio}" + "_" + f"{level}" + "_" + f"{min_subitem}" + "_" + f"{dataset_level}"
|
||||||
ver = ver + "_" + f"{hidden_size}" + "_" + f"{num_attention_heads}" + "_" + f"{num_hidden_layers}"
|
ver = ver + "_" + f"{hidden_size}" + "_" + f"{num_attention_heads}" + "_" + f"{num_hidden_layers}"
|
||||||
|
@ -56,7 +56,9 @@ if __name__ == "__main__":
|
||||||
|
|
||||||
start = vocab_size * (level_ratio**level)
|
start = vocab_size * (level_ratio**level)
|
||||||
size = vocab_size * int((level_ratio**dataset_level))
|
size = vocab_size * int((level_ratio**dataset_level))
|
||||||
raw_dataset = MeaningDataset(start, start + size, size, vocab_size, level_ratio, min_subitem)
|
|
||||||
|
raw_dataset = MeaningDataset(start, start + size, vocab_size, None, level_ratio, min_subitem)
|
||||||
|
# print(raw_dataset.token_frequency())
|
||||||
raw_dataset.set_mask(mask_level, mask_idx)
|
raw_dataset.set_mask(mask_level, mask_idx)
|
||||||
train_dataset, val_dataset = raw_dataset.split(0.9)
|
train_dataset, val_dataset = raw_dataset.split(0.9)
|
||||||
train_dataloader = BatchGroupMeaningDataloader(train_dataset, train_batch_size).dataloader(dataloader_works)
|
train_dataloader = BatchGroupMeaningDataloader(train_dataset, train_batch_size).dataloader(dataloader_works)
|
||||||
|
|
Loading…
Reference in New Issue