Update more.

This commit is contained in:
Colin 2024-07-31 22:04:01 +08:00
parent 50e502ae96
commit d50cb798b6
4 changed files with 113 additions and 17 deletions

93
unsuper/minist.py Normal file
View File

@ -0,0 +1,93 @@
import torch
import torch.nn as nn
import torch.nn.functional as F # Add this line
import torchvision
import torchvision.transforms as transforms
# Device configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Hyper-parameters
num_epochs = 5
batch_size = 4
learning_rate = 0.001
# Dataset has PILImage images of range [0, 1].
# We transform them to Tensors of normalized range [-1, 1]
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
# CIFAR10: 60000 32x32 color images in 10 classes, with 6000 images per class
train_dataset = torchvision.datasets.CIFAR10(root="./data", train=True, download=True, transform=transform)
test_dataset = torchvision.datasets.CIFAR10(root="./data", train=False, download=True, transform=transform)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
class ConvNet(nn.Module):
def __init__(self):
super(ConvNet, self).__init__()
self.conv1 = nn.Conv2d(3, 6, 5)
self.pool = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(6, 16, 5)
self.fc1 = nn.Linear(16 * 5 * 5, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10)
def forward(self, x):
x = self.pool(F.relu(self.conv1(x)))
x = self.pool(F.relu(self.conv2(x)))
x = x.view(-1, 16 * 5 * 5)
# x = F.relu(self.fc1(x))
x = self.fc1(x)
# x = F.relu(self.fc2(x))
x = self.fc2(x)
x = self.fc3(x)
return x
model = ConvNet().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
# Train the model
n_total_steps = len(train_loader)
for epoch in range(num_epochs):
for i, (images, labels) in enumerate(train_loader):
images = images.to(device)
labels = labels.to(device)
# Forward pass
outputs = model(images)
loss = criterion(outputs, labels)
# Backward and optimize
optimizer.zero_grad()
loss.backward()
optimizer.step()
if (i + 1) % 2000 == 0:
print(f"Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{n_total_steps}], Loss: {loss.item():.4f}")
print("Finished Training")
# Test the model
with torch.no_grad():
n_correct = 0
n_samples = 0
for images, labels in test_loader:
images = images.to(device)
labels = labels.to(device)
outputs = model(images)
# max returns (value ,index)
_, predicted = torch.max(outputs.data, 1)
n_samples += labels.size(0)
n_correct += (predicted == labels).sum().item()
acc = 100.0 * n_correct / n_samples
print(f"Accuracy of the network on the 10000 test images: {acc} %")

View File

@ -10,12 +10,13 @@ meaning数据集是一个模仿自然语言以及抽象表达的数据集。
4. 从0到(vocab_size-1)的编号表示基本meaning是不能被拆解的也就是token 4. 从0到(vocab_size-1)的编号表示基本meaning是不能被拆解的也就是token
5. meaning通过一层层的向低编号的meaning进行组合替换最终形成一个最底层是token的树形数据 5. meaning通过一层层的向低编号的meaning进行组合替换最终形成一个最底层是token的树形数据
6. level表示当前token相对于root meaning的距离 6. level表示当前token相对于root meaning的距离
7. rank_idx表示当前token在不同层的排序编号每4位表示在一层里面的编号低4位表示最低层级的rank_idx高位无用的位用1填充 7. rank
7. rank_all表示当前token在不同层的分子个数每4位表示在一层里面的编号低4位表示最低层级的rank_all高位无用的位用1填充 8. rank_idx表示当前token在不同层的排序编号每4位表示在一层里面的编号低4位表示最低层级的rank_idx高位无用的位用1填充
8. tree用于存储每个meaning的拆解的数据使用字典表达一个树形结构 9. rank_all表示当前token在不同层的分子个数每4位表示在一层里面的编号低4位表示最低层级的rank_all高位无用的位用1填充
9. get_seq_mask返回一个sequence每个token在对应level是不是对应的index,level=0:最底层index=-1:最后一个index=0:第一个 10. tree用于存储每个meaning的拆解的数据使用字典表达一个树形结构
10. meaning_height 当前meaning的总高度 11. get_seq_mask返回一个sequence每个token在对应level是不是对应的index,level=0:最底层index=-1:最后一个index=0:第一个
11. meaning_weight 当前meaning的总宽度 12. meaning_height 当前meaning的总高度
13. meaning_weight 当前meaning的总宽度
``` ```
@ -31,10 +32,11 @@ vocab_size = 256 meaning = 115200
/ \ / \ / \ / \ / \ / \ / \ / \
176 11 255 129 129 99 211 111 176 11 255 129 129 99 211 111
sequence = 123 42 32 176 11 255 129 245 233 129 99 23 211 111 93 176 sequence = 123 42 32 176 11 255 129 245 233 129 99 23 211 111 93 176
level = 3 3 2 4 4 4 4 2 2 4 4 3 4 4 3 3 level = 3 3 2 4 4 4 4 2 2 4 4 3 4 4 3 3
idx at 0 = 0 1 1 0 1 0 1 0 1 0 1 2 0 1 0 1 idx at 0 = 0 1 1 0 1 0 1 0 1 0 1 2 0 1 0 1
idx at 1 = 0 0 0 0 0 1 1 1 1 0 0 0 0 0 2 2 idx at 1 = 0 0 0 0 0 1 1 1 1 0 0 0 0 0 2 2
idx 0 1 1 0 1 16 17 16 17 0 1 2 0 1 32 33 idx 0 1 1 0 1 16 17 16 17 0 1 2 0 1 32 33
``` ```

View File

@ -352,7 +352,6 @@ class MeaningDataset(Dataset):
output["labels"] = data.clone() output["labels"] = data.clone()
output["token_type_ids"] = torch.zeros(data.shape) output["token_type_ids"] = torch.zeros(data.shape)
output["tree"] = [self.tree[i] for i in idx_list] output["tree"] = [self.tree[i] for i in idx_list]
output["level"] = [self.level[i] for i in idx_list]
output["mask"] = self.get_seq_mask_tensor(idx_list) output["mask"] = self.get_seq_mask_tensor(idx_list)
return output return output

View File

@ -24,19 +24,19 @@ dataloader_works = 2
vocab_size = 256 vocab_size = 256
level_ratio = 5 level_ratio = 5
level = 5 level = 5
dataset_level = 1.5 dataset_level = 3
min_subitem = 2 min_subitem = 2
hidden_size = 128 # 128 1024 2048 32 hidden_size = 128 # 128 1024 2048 32
num_attention_heads = 16 # 8 8 16 num_attention_heads = 16 # 8 8 16
num_hidden_layers = 6 # 6 12 24 3 num_hidden_layers = 6 # 6 12 24 3
mask_level = [0, 1] mask_level = [0, 1, 2]
mask_idx = [0, -1] mask_idx = [0, 0, -1]
# name = "vocab_ratio_level_data_hidden_head_layer" # name = "vocab_ratio_level_data_hidden_head_layer"
# name = "mask_level_idx" # name = "mask_level_idx"
name = "hard" name = "bigger"
ver = f"{vocab_size}" + "_" + f"{level_ratio}" + "_" + f"{level}" + "_" + f"{min_subitem}" + "_" + f"{dataset_level}" ver = f"{vocab_size}" + "_" + f"{level_ratio}" + "_" + f"{level}" + "_" + f"{min_subitem}" + "_" + f"{dataset_level}"
ver = ver + "_" + f"{hidden_size}" + "_" + f"{num_attention_heads}" + "_" + f"{num_hidden_layers}" ver = ver + "_" + f"{hidden_size}" + "_" + f"{num_attention_heads}" + "_" + f"{num_hidden_layers}"
@ -56,7 +56,9 @@ if __name__ == "__main__":
start = vocab_size * (level_ratio**level) start = vocab_size * (level_ratio**level)
size = vocab_size * int((level_ratio**dataset_level)) size = vocab_size * int((level_ratio**dataset_level))
raw_dataset = MeaningDataset(start, start + size, size, vocab_size, level_ratio, min_subitem)
raw_dataset = MeaningDataset(start, start + size, vocab_size, None, level_ratio, min_subitem)
# print(raw_dataset.token_frequency())
raw_dataset.set_mask(mask_level, mask_idx) raw_dataset.set_mask(mask_level, mask_idx)
train_dataset, val_dataset = raw_dataset.split(0.9) train_dataset, val_dataset = raw_dataset.split(0.9)
train_dataloader = BatchGroupMeaningDataloader(train_dataset, train_batch_size).dataloader(dataloader_works) train_dataloader = BatchGroupMeaningDataloader(train_dataset, train_batch_size).dataloader(dataloader_works)