Update mnist.

2025-06-10 23:14:27 +08:00 · 2025-06-10 23:14:27 +08:00 · 3d2ff85fc0
parent 6cb969ac3b
commit 3d2ff85fc0
2 changed files with 11 additions and 10 deletions
--- a/binary/mnist.py
+++ b/binary/mnist.py
@ -22,7 +22,7 @@ np.random.seed(1234)
 torch.cuda.manual_seed_all(1234)

 BS = 16
-LR = 0.001
+LR = 0.01

 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 print(f"Using device: {device}")
@ -279,14 +279,14 @@ class SimpleLNN(nn.Module):
        print(self.lutg1.weight[[1, 2, 4, 8, 16, 32, 64, 128, 256, 512], :].detach().cpu().numpy())
        print("=============================")
        print("=============================")
-        print("self.lutg1.grad")
-        print(self.lutg1.weight.grad[[1, 2, 4, 8, 16, 32, 64, 128, 256, 512], :].detach().cpu().numpy())
-        print("=============================")
-        print("=============================")
-        print("self.lutg2")
-        print(self.lutg2.weight.detach().cpu().numpy())
-        print("=============================")
-        print("=============================")
+        # print("self.lutg1.grad")
+        # print(self.lutg1.weight.grad[[1, 2, 4, 8, 16, 32, 64, 128, 256, 512], :].detach().cpu().numpy())
+        # print("=============================")
+        # print("=============================")
+        # print("self.lutg2")
+        # print(self.lutg2.weight.detach().cpu().numpy())
+        # print("=============================")
+        # print("=============================")


 torch.autograd.set_detect_anomaly(True)
--- a/binary/readme.md
+++ b/binary/readme.md
@ -36,4 +36,5 @@
    1. LUT不是对卷积核进行计算,更容易收敛，但是精度没有更高
    2. LUT不是对卷积核进行计算,不容易收敛，精度差不多
 * 好像只有AdamW优化器可以优化参数，明显收敛
-* LUT的输出进行二值化对精度有影响，大概94->81
+* LUT的输出进行二值化对精度有影响，大概93->81
+* LUT参数初始化为1.0，收敛速度非常快，好像比随机精度高，大概81->93