From 3d2ff85fc04b4f40d9b0109780b0fdcc1949cecd Mon Sep 17 00:00:00 2001
From: Colin <colin>
Date: Tue, 10 Jun 2025 23:14:27 +0800
Subject: [PATCH] Update mnist.

---
 binary/mnist.py  | 18 +++++++++---------
 binary/readme.md |  3 ++-
 2 files changed, 11 insertions(+), 10 deletions(-)
diff --git a/binary/mnist.py b/binary/mnist.py
index 7015044..a1ccf80 100644
--- a/binary/mnist.py
+++ b/binary/mnist.py
@@ -22,7 +22,7 @@ np.random.seed(1234)
 torch.cuda.manual_seed_all(1234)
 
 BS = 16
-LR = 0.001
+LR = 0.01
 
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 print(f"Using device: {device}")
@@ -279,14 +279,14 @@ class SimpleLNN(nn.Module):
         print(self.lutg1.weight[[1, 2, 4, 8, 16, 32, 64, 128, 256, 512], :].detach().cpu().numpy())
         print("=============================")
         print("=============================")
-        print("self.lutg1.grad")
-        print(self.lutg1.weight.grad[[1, 2, 4, 8, 16, 32, 64, 128, 256, 512], :].detach().cpu().numpy())
-        print("=============================")
-        print("=============================")
-        print("self.lutg2")
-        print(self.lutg2.weight.detach().cpu().numpy())
-        print("=============================")
-        print("=============================")
+        # print("self.lutg1.grad")
+        # print(self.lutg1.weight.grad[[1, 2, 4, 8, 16, 32, 64, 128, 256, 512], :].detach().cpu().numpy())
+        # print("=============================")
+        # print("=============================")
+        # print("self.lutg2")
+        # print(self.lutg2.weight.detach().cpu().numpy())
+        # print("=============================")
+        # print("=============================")
 
 
 torch.autograd.set_detect_anomaly(True)
diff --git a/binary/readme.md b/binary/readme.md
index e6e4432..47ca668 100644
--- a/binary/readme.md
+++ b/binary/readme.md
@@ -36,4 +36,5 @@
     1. LUT不是对卷积核进行计算,更容易收敛，但是精度没有更高
     2. LUT不是对卷积核进行计算,不容易收敛，精度差不多
 * 好像只有AdamW优化器可以优化参数，明显收敛
-* LUT的输出进行二值化对精度有影响，大概94->81
\ No newline at end of file
+* LUT的输出进行二值化对精度有影响，大概93->81
+* LUT参数初始化为1.0，收敛速度非常快，好像比随机精度高，大概81->93
\ No newline at end of file