Add dump in minist.

2024-08-18 17:42:00 +08:00 · 2024-08-18 17:42:00 +08:00 · f2ee49a639
parent 950055c210
commit f2ee49a639
11 changed files with 50 additions and 15 deletions
--- a/unsuper/conv1_output.png
+++ b/unsuper/conv1_output.png
--- a/unsuper/conv1_weight.png
+++ b/unsuper/conv1_weight.png
--- a/unsuper/conv1_weight_grad.png
+++ b/unsuper/conv1_weight_grad.png
--- a/unsuper/conv2_output.png
+++ b/unsuper/conv2_output.png
--- a/unsuper/conv2_weight.png
+++ b/unsuper/conv2_weight.png
--- a/unsuper/conv2_weight_grad.png
+++ b/unsuper/conv2_weight_grad.png
--- a/unsuper/fc_output.png
+++ b/unsuper/fc_output.png
--- a/unsuper/fc_weight.png
+++ b/unsuper/fc_weight.png
--- a/unsuper/fc_weight_grad.png
+++ b/unsuper/fc_weight_grad.png
--- a/unsuper/input_image.png
+++ b/unsuper/input_image.png
--- a/unsuper/minist.py
+++ b/unsuper/minist.py
@ -1,49 +1,48 @@
 import os
 import sys
 import torch
 import torch.nn as nn
 import torch.nn.functional as F  # Add this line
 import torchvision
 import torchvision.transforms as transforms
 sys.path.append("..")
 from tools import show
 seed = 4321
 torch.manual_seed(seed)
 torch.cuda.manual_seed_all(seed)
 # Device configuration
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 # Hyper-parameters
-num_epochs = 5
+num_epochs = 1
-batch_size = 4
+batch_size = 1
 learning_rate = 0.001
 # Dataset has PILImage images of range [0, 1].
 # We transform them to Tensors of normalized range [-1, 1]
 transform = transforms.Compose([transforms.ToTensor()])
 # CIFAR10: 60000 32x32 color images in 10 classes, with 6000 images per class
 # train_dataset = torchvision.datasets.CIFAR10(root="./data", train=True, download=True, transform=transform)
 train_dataset = torchvision.datasets.MNIST(root="./data", train=True, download=True, transform=transform)
 # test_dataset = torchvision.datasets.CIFAR10(root="./data", train=False, download=True, transform=transform)
 test_dataset = torchvision.datasets.MNIST(root="./data", train=False, download=True, transform=transform)
 train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
 test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
 class ConvNet(nn.Module):
    def __init__(self):
        super(ConvNet, self).__init__()
-        self.conv1 = nn.Conv2d(1, 6, 3, 1, 1)
+        self.conv1 = nn.Conv2d(1, 8, 3, 1, 1)
        self.pool = nn.MaxPool2d(2, 2)
-        self.conv2 = nn.Conv2d(6, 16, 5)
+        self.conv2 = nn.Conv2d(8, 8, 5)
-        self.fc1 = nn.Linear(16 * 5 * 5, 10)
+        self.fc1 = nn.Linear(8 * 5 * 5, 10)
        # self.fc2 = nn.Linear(120, 84)
        # self.fc3 = nn.Linear(84, 10)
    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
-        x = x.view(-1, 16 * 5 * 5)
+        x = x.view(-1, 8 * 5 * 5)
        # x = F.relu(self.fc1(x))
        # x = F.relu(self.fc2(x))
        # x = self.fc3(x)
@ -51,6 +50,38 @@ class ConvNet(nn.Module):
        x = self.fc1(x)
        return x
    def printFector(self, x, label):
        show.DumpTensorToImage(x.view(-1, x.shape[2], x.shape[3]), "input_image.png")
        x = self.conv1(x)
        w = self.conv1.weight
        show.DumpTensorToImage(w.view(-1, w.shape[2], w.shape[3]), "conv1_weight.png")
        show.DumpTensorToImage(x.view(-1, x.shape[2], x.shape[3]), "conv1_output.png")
        x = self.pool(F.relu(x))
        x = self.conv2(x)
        w = self.conv2.weight
        show.DumpTensorToImage(w.view(-1, w.shape[2], w.shape[3]), "conv2_weight.png")
        show.DumpTensorToImage(x.view(-1, x.shape[2], x.shape[3]), "conv2_output.png")
        x = self.pool(F.relu(x))
        x = x.view(-1, 8 * 5 * 5)
        x = self.fc1(x)
        show.DumpTensorToImage(self.fc1.weight.view(-1, 10, 10).permute(2, 0, 1), "fc_weight.png")
        show.DumpTensorToImage(x.view(-1), "fc_output.png")
        criterion = nn.CrossEntropyLoss()
        loss = criterion(x, label)
        optimizer.zero_grad()
        loss.backward()
        w = self.conv1.weight.grad
        show.DumpTensorToImage(w.view(-1, w.shape[2], w.shape[3]), "conv1_weight_grad.png")
        w = self.conv2.weight.grad
        show.DumpTensorToImage(w.view(-1, w.shape[2], w.shape[3]), "conv2_weight_grad.png")
        show.DumpTensorToImage(self.fc1.weight.grad.view(-1, 10, 10).permute(2, 0, 1), "fc_weight_grad.png")
 model = ConvNet().to(device)
@ -77,6 +108,10 @@ for epoch in range(num_epochs):
        if (i + 1) % 2000 == 0:
            print(f"Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{n_total_steps}], Loss: {loss.item():.4f}")
 for images, labels in test_loader:
    model.printFector(images, labels)
    break
 print("Finished Training")
 # Test the model