Update readme.

This commit is contained in:
Colin 2023-12-26 18:14:11 +08:00
parent 29fb562aea
commit 235f65aa19
4 changed files with 43 additions and 3 deletions

View File

@ -28,6 +28,19 @@ response = tokenizer.decode(outputs)
## RMSNorm ## RMSNorm
hidden_states -> [6, 1, 4096]
/ \
| pow(2) -> [6, 1, 4096]
| |
| mean -> [6, 1, 1]
| ↓
| rsqrt( + eps) -> [6, 1, 1]
\ /
mul -> [6, 1, 4096]
\ weight -> [4096]
\ /
mul -> [6, 1, 4096]
hidden_states -> [6, 1, 4096] 4096:hidden_size hidden_states -> [6, 1, 4096] 4096:hidden_size
variance = hidden_states.pow(2).mean(-1, keepdim=True) -> [6, 1, 1] variance = hidden_states.pow(2).mean(-1, keepdim=True) -> [6, 1, 1]
hidden_states = hidden_states * torch.rsqrt(variance + self.eps) 平方根倒数 hidden_states = hidden_states * torch.rsqrt(variance + self.eps) 平方根倒数
@ -36,12 +49,40 @@ return (self.weight * hidden_states) -> [6, 1, 4096]
## MLP ## MLP
hidden_states -> [6, 1, 4096]
Linear -> [6, 1, 27392]
/ \
chunk1 chunk0 -> [6, 1, 13696]
| | \
| | sigmoid
| | /
| mul
\ /
mul -> [6, 1, 13696]
Linear -> [6, 1, 4096]
Linear(hidden_states) no bias -> [6, 1, 27392] Linear(hidden_states) no bias -> [6, 1, 27392]
silu (x) = [6, 1, 13696] * sigmoid([6, 1, 13696]) silu (x) = [6, 1, 13696] * sigmoid([6, 1, 13696])
Linear(intermediate_parallel) no bias -> [6, 1, 4096] Linear(intermediate_parallel) no bias -> [6, 1, 4096]
## self_attention ## self_attention
x -> [6, 1, 4096]
|
Linear -> [6, 1, 4608]
/ | \
[6, 1, 32, 128] <- q k v
/ | \
pos_emb pos_emb \
| | \
| expand expand -> [6, 1, 32, 128]
\ / |
dot |
softmax /
\ /
dot -> [1, 32, 6, 128]
Linear -> [6, 1, 4096]
hidden_states: [s, b, h] hidden_states: [s, b, h]
mixed_x_layer = Linear(hidden_states) -> [6, 1, 4608] 4608:4096+256+256 mixed_x_layer = Linear(hidden_states) -> [6, 1, 4608] 4608:4096+256+256

View File

Before

Width:  |  Height:  |  Size: 2.4 KiB

After

Width:  |  Height:  |  Size: 2.4 KiB

View File

Before

Width:  |  Height:  |  Size: 262 KiB

After

Width:  |  Height:  |  Size: 262 KiB

View File

@ -23,9 +23,8 @@ init_kwargs["name_or_path"] = pretrained_model_name_or_path
tokenizer = ChatGLMTokenizer(*init_inputs, **init_kwargs) tokenizer = ChatGLMTokenizer(*init_inputs, **init_kwargs)
aa = tokenizer.build_chat_input("") a = tokenizer.encode("")
ab = tokenizer.encode("") b = tokenizer.decode([236,173,140])
a = tokenizer.decode([236,173,140])