diff --git a/unsuper/Readme.md b/unsuper/Readme.md index ff7f156..d614bde 100644 --- a/unsuper/Readme.md +++ b/unsuper/Readme.md @@ -1,9 +1,35 @@ -1. 3x3的时候会有重复 - 1. 重复的权重,虽然权重看起来都一样,但是有稍微的不同,不是完全一样 - 2. 3x3太小了导致了样本的信噪比太低,大部分的样本切出来都是0 -2. 5x5的时候会有网格状重复 -3. 7x7的时候边框区域问题 1. 输入的信噪比 2. loss函数的设计 -3. grad信息的应用 \ No newline at end of file +3. grad信息的应用 + + +abs.cpu().detach().numpy() +array([[8.1206687e-02, 2.2388995e-05, 3.7080176e-02, 5.7033218e-02, + 1.7404296e-03, 7.6270252e-02, 5.9453689e-02, 4.0801242e-05]], + dtype=float32) +ratio_nor.cpu().detach().numpy() +array([[4.3121886e+00, 3.2778070e-07, 8.9907879e-01, 2.1270120e+00, + 1.9807382e-03, 3.8038602e+00, 2.3113825e+00, 1.0885816e-06]], + dtype=float32) + +都比较差的时候区分不开 + + +## 发现的问题 +1. 3x3的时候会有重复 + 1. 重复的权重,虽然权重看起来都一样,但是有稍微的不同,不是完全一样 + 2. 3x3太小了导致了样本的信噪比太低,大部分的样本切出来都是0 +2. 5x5的时候会有网格状重复 +3. 7x7的时候边框区域问题 + +## 发现的原因 +1. 几个卷积核都是重复的,网格状的 + 1. grad太大,learning rate 太大 + 2. grad太小,训练的epoch不合适 + +## 可能的策略 +1. 每个卷积核的改变权重(grad)能量守恒 +2. 卷积核的每个像素的权重都独立统计? + 1. 权重的reduce体现的是相互之间的可比性,关系 +3. 需要考虑梯度的绝对比值? \ No newline at end of file diff --git a/unsuper/minist.py b/unsuper/minist.py index 33e9158..78eb13b 100644 --- a/unsuper/minist.py +++ b/unsuper/minist.py @@ -20,8 +20,8 @@ np.random.seed(seed) random.seed(seed) -# device = torch.device("cuda" if torch.cuda.is_available() else "cpu") -device = torch.device("cpu") +device = torch.device("cuda" if torch.cuda.is_available() else "cpu") +# device = torch.device("cpu") # device = torch.device("mps") num_epochs = 1 @@ -110,11 +110,27 @@ model = ConvNet().to(device) model.train() # Train the model unsuper -epochs = 2 +epochs = 3 n_total_steps = len(train_loader) for epoch in range(epochs): for i, (images, labels) in enumerate(train_loader): images = images.to(device) + + # images = torch.ones((1, 1, 5, 5), device=device) + # type = random.randint(0, 10) + # if type == 0: + # rand = random.randint(0, 4) + # images[:, :, rand, :] = images[:, :, rand, :] * 0.5 + # if type == 1: + # rand = random.randint(0, 4) + # images[:, :, :, rand] = images[:, :, :, rand] * 0.5 + # if type == 2: + # images[:, :, 0, 0] = images[:, :, 0, 0] * 0.5 + # images[:, :, 1, 1] = images[:, :, 1, 1] * 0.5 + # images[:, :, 2, 2] = images[:, :, 2, 2] * 0.5 + # images[:, :, 3, 3] = images[:, :, 3, 3] * 0.5 + # images[:, :, 4, 4] = images[:, :, 4, 4] * 0.5 + outputs = model.forward_unsuper(images) outputs = outputs.permute(0, 2, 3, 1) # 64 8 24 24 -> 64 24 24 8 @@ -137,7 +153,9 @@ for epoch in range(epochs): model.conv1.weight.grad = None loss.backward() - model.conv1.weight.data = model.conv1.weight.data - model.conv1.weight.grad * 1000 + # if epoch >= (epochs - 1): + # continue + model.conv1.weight.data = model.conv1.weight.data - model.conv1.weight.grad * 0.001 model.conv1.weight.data = model.normal_conv1_weight() if (i + 1) % 100 == 0: