From aa2d3b96c4cd93de09fbf6062f00a9a4a39fbdc7 Mon Sep 17 00:00:00 2001 From: Colin Date: Sun, 7 Jan 2024 21:43:02 +0800 Subject: [PATCH] Delete unused files. --- qwen/modeling_qwen.py | 137 +++--------------------------------------- 1 file changed, 9 insertions(+), 128 deletions(-) diff --git a/qwen/modeling_qwen.py b/qwen/modeling_qwen.py index 658fae5..19bde37 100644 --- a/qwen/modeling_qwen.py +++ b/qwen/modeling_qwen.py @@ -77,8 +77,6 @@ class QWenAttention(nn.Module): config.hidden_size, self.projection_size, bias=not config.no_bias ) - self.is_fp32 = True - self.use_dynamic_ntk = config.use_dynamic_ntk self.use_logn_attn = config.use_logn_attn @@ -301,29 +299,6 @@ class QWenPreTrainedModel(PreTrainedModel): def __init__(self, *inputs, **kwargs): super().__init__(*inputs, **kwargs) - def _init_weights(self, module): - """Initialize the weights.""" - if isinstance(module, nn.Linear): - module.weight.data.normal_(mean=0.0, std=self.config.initializer_range) - if module.bias is not None: - module.bias.data.zero_() - elif isinstance(module, nn.Embedding): - module.weight.data.normal_(mean=0.0, std=self.config.initializer_range) - if module.padding_idx is not None: - module.weight.data[module.padding_idx].zero_() - elif isinstance(module, RMSNorm): - module.weight.data.fill_(1.0) - - for name, p in module.named_parameters(): - if name == "c_proj.weight": - p.data.normal_( - mean=0.0, - std=( - self.config.initializer_range - / math.sqrt(2 * self.config.num_hidden_layers) - ), - ) - class QWenModel(QWenPreTrainedModel): def __init__(self, config): @@ -347,8 +322,6 @@ class QWenModel(QWenPreTrainedModel): dim = self.rotary_ndims if self.rotary_ndims is not None else config.kv_channels self.rotary_emb = RotaryEmbedding(dim, base=config.rotary_emb_base) - self.is_fp32 = True - self.h = nn.ModuleList( [QWenBlock(config) for i in range(config.num_hidden_layers)] ) @@ -375,19 +348,13 @@ class QWenModel(QWenPreTrainedModel): encoder_hidden_states: Optional[torch.Tensor] = None, encoder_attention_mask: Optional[torch.FloatTensor] = None, use_cache: Optional[bool] = None, - output_attentions: Optional[bool] = None, - output_hidden_states: Optional[bool] = None + output_attentions: Optional[bool] = None ): output_attentions = ( output_attentions if output_attentions is not None else self.config.output_attentions ) - output_hidden_states = ( - output_hidden_states - if output_hidden_states is not None - else self.config.output_hidden_states - ) use_cache = use_cache if use_cache is not None else self.config.use_cache if input_ids is not None and inputs_embeds is not None: @@ -408,8 +375,6 @@ class QWenModel(QWenPreTrainedModel): past_key_values = tuple([None] * len(self.h)) if attention_mask is not None: - if batch_size <= 0: - raise ValueError("batch_size has to be defined and > 0") attention_mask = attention_mask.view(batch_size, -1) attention_mask = attention_mask[:, None, None, :] attention_mask = attention_mask.to(dtype=self.dtype) @@ -458,10 +423,8 @@ class QWenModel(QWenPreTrainedModel): presents = () if use_cache else None all_self_attentions = () if output_attentions else None - all_hidden_states = () if output_hidden_states else None + all_hidden_states = None for i, (block, layer_past) in enumerate(zip(self.h, past_key_values)): - if output_hidden_states: - all_hidden_states = all_hidden_states + (hidden_states,) outputs = block( hidden_states, @@ -486,10 +449,6 @@ class QWenModel(QWenPreTrainedModel): hidden_states = self.ln_f(hidden_states) hidden_states = hidden_states.view(output_shape) - # Add last hidden state - if output_hidden_states: - all_hidden_states = all_hidden_states + (hidden_states,) - return BaseModelOutputWithPast( last_hidden_state=hidden_states, past_key_values=presents, @@ -543,8 +502,7 @@ class QWenLMHeadModel(QWenPreTrainedModel): encoder_attention_mask: Optional[torch.FloatTensor] = None, labels: Optional[torch.LongTensor] = None, use_cache: Optional[bool] = None, - output_attentions: Optional[bool] = None, - output_hidden_states: Optional[bool] = None, + output_attentions: Optional[bool] = None ) -> Union[Tuple, CausalLMOutputWithPast]: @@ -557,8 +515,7 @@ class QWenLMHeadModel(QWenPreTrainedModel): encoder_hidden_states=encoder_hidden_states, encoder_attention_mask=encoder_attention_mask, use_cache=use_cache, - output_attentions=output_attentions, - output_hidden_states=output_hidden_states + output_attentions=output_attentions ) hidden_states = transformer_outputs[0] @@ -727,28 +684,6 @@ class QWenLMHeadModel(QWenPreTrainedModel): # 1. Handle `generation_config` and kwargs that might update it, and validate the `.generate()` call self._validate_model_class() - # priority: `generation_config` argument > `model.generation_config` (the default generation config) - if generation_config is None: - # legacy: users may modify the model configuration to control generation. To trigger this legacy behavior, - # two conditions must be met - # 1) the generation config must have been created from the model config (`_from_model_config` field); - # 2) the generation config must have seen no modification since its creation (the hash is the same). - if ( - self.generation_config._from_model_config - and self.generation_config._original_object_hash - == hash(self.generation_config) - ): - new_generation_config = GenerationConfig.from_model_config(self.config) - if new_generation_config != self.generation_config: - warnings.warn( - "You have modified the pretrained model configuration to control generation. This is a" - " deprecated strategy to control generation and will be removed soon, in a future version." - " Please use and modify the model generation configuration (see" - " https://huggingface.co/docs/transformers/generation_strategies#default-text-generation-configuration )" - ) - self.generation_config = new_generation_config - generation_config = self.generation_config - generation_config = copy.deepcopy(generation_config) model_kwargs = generation_config.update( **kwargs @@ -791,11 +726,8 @@ class QWenLMHeadModel(QWenPreTrainedModel): inputs_tensor, model_input_name, model_kwargs = self._prepare_model_inputs( inputs, generation_config.bos_token_id, model_kwargs ) - batch_size = inputs_tensor.shape[0] - # 4. Define other model kwargs model_kwargs["output_attentions"] = generation_config.output_attentions - model_kwargs["output_hidden_states"] = generation_config.output_hidden_states # decoder-only models with inputs_embeds forwarding must use caching (otherwise we can't detect whether we are # generating the first new token or not, and we only want to use the embeddings for the first new token) if not self.config.is_encoder_decoder and model_input_name == "inputs_embeds": @@ -822,7 +754,6 @@ class QWenLMHeadModel(QWenPreTrainedModel): ) # 5. Prepare `input_ids` which will be used for auto-regressive generation - input_ids = ( inputs_tensor if model_input_name == "input_ids" @@ -838,40 +769,13 @@ class QWenLMHeadModel(QWenPreTrainedModel): kwargs.get("max_length") is None and generation_config.max_length is not None ) - if generation_config.max_new_tokens is not None: - if not has_default_max_length and generation_config.max_length is not None: - logger.warning( - f"Both `max_new_tokens` (={generation_config.max_new_tokens}) and `max_length`(=" - f"{generation_config.max_length}) seem to have been set. `max_new_tokens` will take precedence. " - "Please refer to the documentation for more information. " - "(https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)" - ) - generation_config.max_length = ( - generation_config.max_new_tokens + input_ids_length - ) + generation_config.max_length = ( + generation_config.max_new_tokens + input_ids_length + ) self._validate_generated_length( generation_config, input_ids_length, has_default_max_length ) - # 7. determine generation mode - generation_mode = self._get_generation_mode(generation_config, assistant_model) - - if streamer is not None and (generation_config.num_beams > 1): - raise ValueError( - "`streamer` cannot be used with beam search (yet!). Make sure that `num_beams` is set to 1." - ) - - if self.device.type != input_ids.device.type: - warnings.warn( - "You are calling .generate() with the `input_ids` being on a device type different" - f" than your model's device. `input_ids` is on {input_ids.device.type}, whereas the model" - f" is on {self.device.type}. You may experience unexpected behaviors or slower generation." - " Please make sure that you have put `input_ids` to the" - f" correct device by calling for example input_ids = input_ids.to('{self.device.type}') before" - " running `.generate()`.", - UserWarning, - ) - # 8. prepare distribution pre_processing samplers logits_processor = self._get_logits_processor( generation_config=generation_config, @@ -925,7 +829,6 @@ class QWenLMHeadModel(QWenPreTrainedModel): pad_token_id: Optional[int] = None, eos_token_id: Optional[Union[int, List[int]]] = None, output_attentions: Optional[bool] = None, - output_hidden_states: Optional[bool] = None, output_scores: Optional[bool] = None, synced_gpus: bool = False, streamer: Optional["BaseStreamer"] = None, @@ -940,13 +843,7 @@ class QWenLMHeadModel(QWenPreTrainedModel): if stopping_criteria is not None else StoppingCriteriaList() ) - # if max_length is not None: - # warnings.warn( - # "`max_length` is deprecated in this function, use" - # " `stopping_criteria=StoppingCriteriaList([MaxLengthCriteria(max_length=max_length)])` instead.", - # UserWarning, - # ) - # stopping_criteria = validate_stopping_criteria(stopping_criteria, max_length) + logits_warper = ( logits_warper if logits_warper is not None else LogitsProcessorList() ) @@ -977,11 +874,6 @@ class QWenLMHeadModel(QWenPreTrainedModel): if output_attentions is not None else self.generation_config.output_attentions ) - output_hidden_states = ( - output_hidden_states - if output_hidden_states is not None - else self.generation_config.output_hidden_states - ) # init attention / hidden states / scores tuples scores = None @@ -1000,8 +892,7 @@ class QWenLMHeadModel(QWenPreTrainedModel): # forward pass to get next token outputs = self( **model_inputs, - output_attentions=output_attentions, - output_hidden_states=output_hidden_states, + output_attentions=output_attentions ) next_token_logits = outputs.logits[:, -1, :] @@ -1064,9 +955,6 @@ class RotaryEmbedding(torch.nn.Module): self.base = base inv_freq = 1.0 / (base ** (torch.arange(0, dim, 2).float() / dim)) self.register_buffer("inv_freq", inv_freq, persistent=False) - if importlib.util.find_spec("einops") is None: - raise RuntimeError("einops is required for Rotary Embedding") - self._rotary_pos_emb_cache = None self._seq_len_cached = 0 self._ntk_alpha_cached = 1.0 @@ -1110,14 +998,7 @@ def _rotate_half(x): def apply_rotary_pos_emb(t, freqs): - """Apply rotary embedding to the first rotary_dim of the iput - Arguments: - t (tensor(batch_size, seq_len, n_head, head_dim)): - the input embedding/hidden states - freqs (list[tensor(1, seq_len, 1, rotary_dim), tensor(1, seq_len, 1, rotary_dim)]): - the cached cos/sin position embeddings - """ rot_dim = freqs[0].shape[-1] cos, sin = freqs t_float = t.float()