Update more.
This commit is contained in:
parent
50e502ae96
commit
d50cb798b6
|
@ -0,0 +1,93 @@
|
|||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F # Add this line
|
||||
import torchvision
|
||||
import torchvision.transforms as transforms
|
||||
|
||||
|
||||
# Device configuration
|
||||
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||
|
||||
# Hyper-parameters
|
||||
num_epochs = 5
|
||||
batch_size = 4
|
||||
learning_rate = 0.001
|
||||
|
||||
# Dataset has PILImage images of range [0, 1].
|
||||
# We transform them to Tensors of normalized range [-1, 1]
|
||||
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
|
||||
|
||||
# CIFAR10: 60000 32x32 color images in 10 classes, with 6000 images per class
|
||||
train_dataset = torchvision.datasets.CIFAR10(root="./data", train=True, download=True, transform=transform)
|
||||
|
||||
test_dataset = torchvision.datasets.CIFAR10(root="./data", train=False, download=True, transform=transform)
|
||||
|
||||
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
|
||||
|
||||
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
|
||||
|
||||
|
||||
class ConvNet(nn.Module):
|
||||
def __init__(self):
|
||||
super(ConvNet, self).__init__()
|
||||
self.conv1 = nn.Conv2d(3, 6, 5)
|
||||
self.pool = nn.MaxPool2d(2, 2)
|
||||
self.conv2 = nn.Conv2d(6, 16, 5)
|
||||
self.fc1 = nn.Linear(16 * 5 * 5, 120)
|
||||
self.fc2 = nn.Linear(120, 84)
|
||||
self.fc3 = nn.Linear(84, 10)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.pool(F.relu(self.conv1(x)))
|
||||
x = self.pool(F.relu(self.conv2(x)))
|
||||
x = x.view(-1, 16 * 5 * 5)
|
||||
# x = F.relu(self.fc1(x))
|
||||
x = self.fc1(x)
|
||||
# x = F.relu(self.fc2(x))
|
||||
x = self.fc2(x)
|
||||
x = self.fc3(x)
|
||||
return x
|
||||
|
||||
|
||||
model = ConvNet().to(device)
|
||||
|
||||
criterion = nn.CrossEntropyLoss()
|
||||
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
|
||||
|
||||
# Train the model
|
||||
n_total_steps = len(train_loader)
|
||||
for epoch in range(num_epochs):
|
||||
for i, (images, labels) in enumerate(train_loader):
|
||||
images = images.to(device)
|
||||
labels = labels.to(device)
|
||||
|
||||
# Forward pass
|
||||
outputs = model(images)
|
||||
loss = criterion(outputs, labels)
|
||||
|
||||
# Backward and optimize
|
||||
optimizer.zero_grad()
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
|
||||
if (i + 1) % 2000 == 0:
|
||||
print(f"Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{n_total_steps}], Loss: {loss.item():.4f}")
|
||||
|
||||
print("Finished Training")
|
||||
|
||||
# Test the model
|
||||
with torch.no_grad():
|
||||
n_correct = 0
|
||||
n_samples = 0
|
||||
for images, labels in test_loader:
|
||||
images = images.to(device)
|
||||
labels = labels.to(device)
|
||||
outputs = model(images)
|
||||
|
||||
# max returns (value ,index)
|
||||
_, predicted = torch.max(outputs.data, 1)
|
||||
n_samples += labels.size(0)
|
||||
n_correct += (predicted == labels).sum().item()
|
||||
|
||||
acc = 100.0 * n_correct / n_samples
|
||||
print(f"Accuracy of the network on the 10000 test images: {acc} %")
|
|
@ -10,12 +10,13 @@ meaning数据集是一个模仿自然语言,以及抽象表达的数据集。
|
|||
4. 从0到(vocab_size-1)的编号表示基本meaning,是不能被拆解的,也就是token
|
||||
5. meaning通过一层层的向低编号的meaning进行组合替换,最终形成一个最底层是token的树形数据
|
||||
6. level表示当前token相对于root meaning的距离
|
||||
7. rank_idx表示当前token在不同层的排序编号,每4位表示在一层里面的编号,低4位表示最低层级的rank_idx,高位无用的位用1填充
|
||||
7. rank_all表示当前token在不同层的分子个数,每4位表示在一层里面的编号,低4位表示最低层级的rank_all,高位无用的位用1填充
|
||||
8. tree用于存储每个meaning的拆解的数据,使用字典表达一个树形结构
|
||||
9. get_seq_mask返回一个sequence每个token在对应level是不是对应的index,level=0:最底层,index=-1:最后一个,index=0:第一个
|
||||
10. meaning_height 当前meaning的总高度
|
||||
11. meaning_weight 当前meaning的总宽度
|
||||
7. rank
|
||||
8. rank_idx表示当前token在不同层的排序编号,每4位表示在一层里面的编号,低4位表示最低层级的rank_idx,高位无用的位用1填充
|
||||
9. rank_all表示当前token在不同层的分子个数,每4位表示在一层里面的编号,低4位表示最低层级的rank_all,高位无用的位用1填充
|
||||
10. tree用于存储每个meaning的拆解的数据,使用字典表达一个树形结构
|
||||
11. get_seq_mask返回一个sequence每个token在对应level是不是对应的index,level=0:最底层,index=-1:最后一个,index=0:第一个
|
||||
12. meaning_height 当前meaning的总高度
|
||||
13. meaning_weight 当前meaning的总宽度
|
||||
|
||||
|
||||
```
|
||||
|
@ -31,10 +32,11 @@ vocab_size = 256 meaning = 115200
|
|||
/ \ / \ / \ / \
|
||||
176 11 255 129 129 99 211 111
|
||||
|
||||
sequence = 123 42 32 176 11 255 129 245 233 129 99 23 211 111 93 176
|
||||
level = 3 3 2 4 4 4 4 2 2 4 4 3 4 4 3 3
|
||||
idx at 0 = 0 1 1 0 1 0 1 0 1 0 1 2 0 1 0 1
|
||||
idx at 1 = 0 0 0 0 0 1 1 1 1 0 0 0 0 0 2 2
|
||||
idx 0 1 1 0 1 16 17 16 17 0 1 2 0 1 32 33
|
||||
sequence = 123 42 32 176 11 255 129 245 233 129 99 23 211 111 93 176
|
||||
level = 3 3 2 4 4 4 4 2 2 4 4 3 4 4 3 3
|
||||
idx at 0 = 0 1 1 0 1 0 1 0 1 0 1 2 0 1 0 1
|
||||
idx at 1 = 0 0 0 0 0 1 1 1 1 0 0 0 0 0 2 2
|
||||
idx 0 1 1 0 1 16 17 16 17 0 1 2 0 1 32 33
|
||||
|
||||
|
||||
```
|
||||
|
|
|
@ -352,7 +352,6 @@ class MeaningDataset(Dataset):
|
|||
output["labels"] = data.clone()
|
||||
output["token_type_ids"] = torch.zeros(data.shape)
|
||||
output["tree"] = [self.tree[i] for i in idx_list]
|
||||
output["level"] = [self.level[i] for i in idx_list]
|
||||
output["mask"] = self.get_seq_mask_tensor(idx_list)
|
||||
return output
|
||||
|
||||
|
|
12
wit/train.py
12
wit/train.py
|
@ -24,19 +24,19 @@ dataloader_works = 2
|
|||
vocab_size = 256
|
||||
level_ratio = 5
|
||||
level = 5
|
||||
dataset_level = 1.5
|
||||
dataset_level = 3
|
||||
min_subitem = 2
|
||||
|
||||
hidden_size = 128 # 128 1024 2048 32
|
||||
num_attention_heads = 16 # 8 8 16
|
||||
num_hidden_layers = 6 # 6 12 24 3
|
||||
|
||||
mask_level = [0, 1]
|
||||
mask_idx = [0, -1]
|
||||
mask_level = [0, 1, 2]
|
||||
mask_idx = [0, 0, -1]
|
||||
|
||||
# name = "vocab_ratio_level_data_hidden_head_layer"
|
||||
# name = "mask_level_idx"
|
||||
name = "hard"
|
||||
name = "bigger"
|
||||
|
||||
ver = f"{vocab_size}" + "_" + f"{level_ratio}" + "_" + f"{level}" + "_" + f"{min_subitem}" + "_" + f"{dataset_level}"
|
||||
ver = ver + "_" + f"{hidden_size}" + "_" + f"{num_attention_heads}" + "_" + f"{num_hidden_layers}"
|
||||
|
@ -56,7 +56,9 @@ if __name__ == "__main__":
|
|||
|
||||
start = vocab_size * (level_ratio**level)
|
||||
size = vocab_size * int((level_ratio**dataset_level))
|
||||
raw_dataset = MeaningDataset(start, start + size, size, vocab_size, level_ratio, min_subitem)
|
||||
|
||||
raw_dataset = MeaningDataset(start, start + size, vocab_size, None, level_ratio, min_subitem)
|
||||
# print(raw_dataset.token_frequency())
|
||||
raw_dataset.set_mask(mask_level, mask_idx)
|
||||
train_dataset, val_dataset = raw_dataset.split(0.9)
|
||||
train_dataloader = BatchGroupMeaningDataloader(train_dataset, train_batch_size).dataloader(dataloader_works)
|
||||
|
|
Loading…
Reference in New Issue