Update readme.
This commit is contained in:
parent
29fb562aea
commit
235f65aa19
41
Readme.md
41
Readme.md
|
@ -28,6 +28,19 @@ response = tokenizer.decode(outputs)
|
||||||
|
|
||||||
## RMSNorm
|
## RMSNorm
|
||||||
|
|
||||||
|
hidden_states -> [6, 1, 4096]
|
||||||
|
/ \
|
||||||
|
| pow(2) -> [6, 1, 4096]
|
||||||
|
| |
|
||||||
|
| mean -> [6, 1, 1]
|
||||||
|
| ↓
|
||||||
|
| rsqrt( + eps) -> [6, 1, 1]
|
||||||
|
\ /
|
||||||
|
mul -> [6, 1, 4096]
|
||||||
|
\ weight -> [4096]
|
||||||
|
\ /
|
||||||
|
mul -> [6, 1, 4096]
|
||||||
|
|
||||||
hidden_states -> [6, 1, 4096] 4096:hidden_size
|
hidden_states -> [6, 1, 4096] 4096:hidden_size
|
||||||
variance = hidden_states.pow(2).mean(-1, keepdim=True) -> [6, 1, 1]
|
variance = hidden_states.pow(2).mean(-1, keepdim=True) -> [6, 1, 1]
|
||||||
hidden_states = hidden_states * torch.rsqrt(variance + self.eps) 平方根倒数
|
hidden_states = hidden_states * torch.rsqrt(variance + self.eps) 平方根倒数
|
||||||
|
@ -36,12 +49,40 @@ return (self.weight * hidden_states) -> [6, 1, 4096]
|
||||||
|
|
||||||
## MLP
|
## MLP
|
||||||
|
|
||||||
|
hidden_states -> [6, 1, 4096]
|
||||||
|
Linear -> [6, 1, 27392]
|
||||||
|
/ \
|
||||||
|
chunk1 chunk0 -> [6, 1, 13696]
|
||||||
|
| | \
|
||||||
|
| | sigmoid
|
||||||
|
| | /
|
||||||
|
| mul
|
||||||
|
\ /
|
||||||
|
mul -> [6, 1, 13696]
|
||||||
|
Linear -> [6, 1, 4096]
|
||||||
|
|
||||||
Linear(hidden_states) no bias -> [6, 1, 27392]
|
Linear(hidden_states) no bias -> [6, 1, 27392]
|
||||||
silu (x) = [6, 1, 13696] * sigmoid([6, 1, 13696])
|
silu (x) = [6, 1, 13696] * sigmoid([6, 1, 13696])
|
||||||
Linear(intermediate_parallel) no bias -> [6, 1, 4096]
|
Linear(intermediate_parallel) no bias -> [6, 1, 4096]
|
||||||
|
|
||||||
## self_attention
|
## self_attention
|
||||||
|
|
||||||
|
x -> [6, 1, 4096]
|
||||||
|
|
|
||||||
|
Linear -> [6, 1, 4608]
|
||||||
|
/ | \
|
||||||
|
[6, 1, 32, 128] <- q k v
|
||||||
|
/ | \
|
||||||
|
pos_emb pos_emb \
|
||||||
|
| | \
|
||||||
|
| expand expand -> [6, 1, 32, 128]
|
||||||
|
\ / |
|
||||||
|
dot |
|
||||||
|
softmax /
|
||||||
|
\ /
|
||||||
|
dot -> [1, 32, 6, 128]
|
||||||
|
Linear -> [6, 1, 4096]
|
||||||
|
|
||||||
hidden_states: [s, b, h]
|
hidden_states: [s, b, h]
|
||||||
mixed_x_layer = Linear(hidden_states) -> [6, 1, 4608] 4608:4096+256+256
|
mixed_x_layer = Linear(hidden_states) -> [6, 1, 4608] 4608:4096+256+256
|
||||||
|
|
||||||
|
|
Before Width: | Height: | Size: 2.4 KiB After Width: | Height: | Size: 2.4 KiB |
Before Width: | Height: | Size: 262 KiB After Width: | Height: | Size: 262 KiB |
|
@ -23,9 +23,8 @@ init_kwargs["name_or_path"] = pretrained_model_name_or_path
|
||||||
tokenizer = ChatGLMTokenizer(*init_inputs, **init_kwargs)
|
tokenizer = ChatGLMTokenizer(*init_inputs, **init_kwargs)
|
||||||
|
|
||||||
|
|
||||||
aa = tokenizer.build_chat_input("骉")
|
a = tokenizer.encode("骉")
|
||||||
ab = tokenizer.encode("骉")
|
b = tokenizer.decode([236,173,140])
|
||||||
a = tokenizer.decode([236,173,140])
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue