import json import torch from tools import show from chatglm import ChatGLMTokenizer pretrained_model_name_or_path = "../ZhipuAI/chatglm3-6b" tokenizer_config_file = "./chatglm/tokenizer_config.json" if tokenizer_config_file is not None: with open(tokenizer_config_file, encoding="utf-8") as tokenizer_config_handle: init_kwargs = json.load(tokenizer_config_handle) init_kwargs.pop("tokenizer_class", None) init_kwargs.pop("tokenizer_file", None) saved_init_inputs = init_kwargs.pop("init_inputs", ()) init_inputs = saved_init_inputs init_kwargs["vocab_file"] = "./chatglm/tokenizer.model" init_kwargs["added_tokens_file"] = None init_kwargs["special_tokens_map_file"] = None init_kwargs["tokenizer_file"] = None init_kwargs["name_or_path"] = pretrained_model_name_or_path tokenizer = ChatGLMTokenizer(*init_inputs, **init_kwargs) aa = tokenizer.build_chat_input("骉") ab = tokenizer.encode("骉") a = tokenizer.decode([236,173,140]) token = [] for i in range(64798): token.append(str(i) + " : " + tokenizer.decode(i)) show.DumpListToFile(token, "generated/token.log")