From 3d2ff85fc04b4f40d9b0109780b0fdcc1949cecd Mon Sep 17 00:00:00 2001 From: Colin Date: Tue, 10 Jun 2025 23:14:27 +0800 Subject: [PATCH] Update mnist. --- binary/mnist.py | 18 +++++++++--------- binary/readme.md | 3 ++- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/binary/mnist.py b/binary/mnist.py index 7015044..a1ccf80 100644 --- a/binary/mnist.py +++ b/binary/mnist.py @@ -22,7 +22,7 @@ np.random.seed(1234) torch.cuda.manual_seed_all(1234) BS = 16 -LR = 0.001 +LR = 0.01 device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print(f"Using device: {device}") @@ -279,14 +279,14 @@ class SimpleLNN(nn.Module): print(self.lutg1.weight[[1, 2, 4, 8, 16, 32, 64, 128, 256, 512], :].detach().cpu().numpy()) print("=============================") print("=============================") - print("self.lutg1.grad") - print(self.lutg1.weight.grad[[1, 2, 4, 8, 16, 32, 64, 128, 256, 512], :].detach().cpu().numpy()) - print("=============================") - print("=============================") - print("self.lutg2") - print(self.lutg2.weight.detach().cpu().numpy()) - print("=============================") - print("=============================") + # print("self.lutg1.grad") + # print(self.lutg1.weight.grad[[1, 2, 4, 8, 16, 32, 64, 128, 256, 512], :].detach().cpu().numpy()) + # print("=============================") + # print("=============================") + # print("self.lutg2") + # print(self.lutg2.weight.detach().cpu().numpy()) + # print("=============================") + # print("=============================") torch.autograd.set_detect_anomaly(True) diff --git a/binary/readme.md b/binary/readme.md index e6e4432..47ca668 100644 --- a/binary/readme.md +++ b/binary/readme.md @@ -36,4 +36,5 @@ 1. LUT不是对卷积核进行计算,更容易收敛,但是精度没有更高 2. LUT不是对卷积核进行计算,不容易收敛,精度差不多 * 好像只有AdamW优化器可以优化参数,明显收敛 -* LUT的输出进行二值化对精度有影响,大概94->81 \ No newline at end of file +* LUT的输出进行二值化对精度有影响,大概93->81 +* LUT参数初始化为1.0,收敛速度非常快,好像比随机精度高,大概81->93 \ No newline at end of file