diff --git a/comfy/ops.py b/comfy/ops.py index b33fde1aa..9f9041e69 100644 --- a/comfy/ops.py +++ b/comfy/ops.py @@ -1237,7 +1237,7 @@ def mixed_precision_ops(quant_config={}, compute_dtype=torch.bfloat16, full_prec input, qdata, self.padding_idx, self.max_norm, self.norm_type, self.scale_grad_by_freq, self.sparse) uncast_bias_weight(self, qdata, None, offload_stream) - target_dtype = out_dtype if out_dtype is not None else weight.params.orig_dtype + target_dtype = out_dtype if out_dtype is not None else weight._params.orig_dtype x = x.to(dtype=target_dtype) if scale is not None and scale != 1.0: x = x * scale.to(dtype=target_dtype) diff --git a/comfy/text_encoders/gemma4.py b/comfy/text_encoders/gemma4.py index 9573cd427..8905f375f 100644 --- a/comfy/text_encoders/gemma4.py +++ b/comfy/text_encoders/gemma4.py @@ -1073,7 +1073,6 @@ class Gemma4_Tokenizer(): return np.maximum(np.zeros(1), np.minimum(down_slopes, up_slopes)) def tokenize_with_weights(self, text, return_word_ids=False, image=None, audio=None, video=None, llama_template=None, skip_template=True, thinking=False, **kwargs): - self.thinking = thinking # Process audio audio_features = [] @@ -1131,7 +1130,7 @@ class Gemma4_Tokenizer(): llama_text = llama_template.format(text) else: # Build template from modalities present - system = "<|turn>system\n<|think|>\n" if self.thinking else "" + system = "<|turn>system\n<|think|>\n" if thinking else "" media = "" if len(images) > 0: if is_video: