import os import torch import torch.nn as nn import torch.nn.functional as F import torch.optim as optim import torchvision from torchvision import datasets, transforms import torchvision.models as models from torch.cuda.amp import autocast as autocast from torch.utils.data import Dataset, DataLoader import numpy as np import random import cv2 from torch.profiler import profile, record_function, ProfilerActivity batchsize = 12 class ManualDataset(Dataset): def __init__(self, transform=None): self.transform = transform def __getitem__(self, index): # data = np.random.random_integers(0,255,(28,28)) data = np.zeros((28, 28), dtype="float32") data = data.astype("float32") radiu = int(random.random()*7+3) basex = random.randint(radiu, 29-radiu) basey = random.randint(radiu, 29-radiu) ifcircle = random.random() if ifcircle > 0.5: cv2.circle(data, (basex, basey), radiu, 255, -1) angle = random.random() * 360 M = cv2.getRotationMatrix2D((basex, basey), angle, 1.0) data = cv2.warpAffine(data, M, (28, 28)) label = 0 else: cv2.rectangle(data, (basex-radiu, basey-radiu), (basex+radiu, basey+radiu), 255, -1) angle = random.random() * 360 M = cv2.getRotationMatrix2D((basex, basey), angle, 1.0) data = cv2.warpAffine(data, M, (28, 28)) label = 1 # cv2.imwrite("test.jpg",data) data = (data - 128) / 256.0 img = torch.from_numpy(data) img = img.view(1, 28, 28) return img, label def __len__(self): return 10000 train_loader = torch.utils.data.DataLoader( ManualDataset(transform=transforms.Compose([ # transforms.ColorJitter(0.2,0.2), # transforms.RandomRotation(30), # transforms.RandomResizedCrop(28), transforms.ToTensor(), transforms.Normalize((128,), (256,)), ])), batch_size=batchsize, shuffle=True, num_workers=2) test_loader = torch.utils.data.DataLoader( ManualDataset(transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((128,), (256,))])), batch_size=batchsize, shuffle=True, num_workers=2) class Net(nn.Module): def __init__(self): super(Net, self).__init__() layers = [] layers += [nn.Conv2d(1, 8, kernel_size=5), nn.MaxPool2d(kernel_size=2, stride=2), nn.Sigmoid()] layers += [nn.Conv2d(8, 8, kernel_size=3), nn.MaxPool2d(kernel_size=2, stride=2), nn.Sigmoid()] layers += [nn.Conv2d(8, 10, kernel_size=5)] self.features = nn.Sequential(*layers) # self.conv1 = nn.Conv2d(1, 8, kernel_size=5) # self.conv2 = self.__conv(8, 8, kernel_size=3) # self.conv3 = self.__conv(8, 10, kernel_size=5) def forward(self, x): # x = F.sigmoid(F.max_pool2d(self.conv1(x), 2)) # x = F.sigmoid(F.max_pool2d(self.conv2(x), 2)) # x = self.conv3(x) x = self.features(x) x = x.view(-1, 1*10) return F.log_softmax(x, dim=1) model = Net() # model = model.cuda() optimizer = optim.SGD(model.parameters(), lr=0.0005) for batch_idx, (data, target) in enumerate(train_loader): # data = data.cuda() # target = target.cuda() optimizer.zero_grad() with profile(activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA], with_stack=True, record_shapes=True) as prof: with record_function("model_inference"): with autocast(): output = model(data) loss = F.nll_loss(output, target) # print(prof.key_averages().table(sort_by="cpu_time_total")) print(prof.key_averages().table(sort_by="cpu_time_total")) prof.export_chrome_trace("trace.json") exit() # 反向传播在autocast上下文之外 loss.backward() optimizer.step() # from torch.cuda.amp import autocast as autocast # # 创建model,默认是torch.FloatTensor # model = Net().cuda() # optimizer = optim.SGD(model.parameters(), ...) # # 在训练最开始之前实例化一个GradScaler对象 # scaler = GradScaler() # for epoch in epochs: # for input, target in data: # optimizer.zero_grad() # # 前向过程(model + loss)开启 autocast # with autocast(): # output = model(input) # loss = loss_fn(output, target) # # Scales loss. 为了梯度放大. # scaler.scale(loss).backward() # # scaler.step() 首先把梯度的值unscale回来. # # 如果梯度的值不是 infs 或者 NaNs, 那么调用optimizer.step()来更新权重, # # 否则,忽略step调用,从而保证权重不更新(不被破坏) # scaler.step(optimizer) # # 准备着,查看是否要增大scaler # scaler.update()