diff --git a/comfy/ops.py b/comfy/ops.py
index b5cd1d47e..7a9b4b84c 100644
--- a/comfy/ops.py
+++ b/comfy/ops.py
@@ -1151,7 +1151,7 @@ def mixed_precision_ops(quant_config={}, compute_dtype=torch.bfloat16, full_prec
                     if param is None:
                         continue
                     p = fn(param)
-                    if p.is_inference():
+                    if (not torch.is_inference_mode_enabled()) and p.is_inference():
                         p = p.clone()
                     self.register_parameter(key, torch.nn.Parameter(p, requires_grad=False))
                 for key, buf in self._buffers.items():
diff --git a/comfy/text_encoders/ernie.py b/comfy/text_encoders/ernie.py
index 2c7df78fe..46d24d222 100644
--- a/comfy/text_encoders/ernie.py
+++ b/comfy/text_encoders/ernie.py
@@ -35,4 +35,4 @@ def te(dtype_llama=None, llama_quantization_metadata=None):
                 model_options = model_options.copy()
                 model_options["quantization_metadata"] = llama_quantization_metadata
             super().__init__(device=device, dtype=dtype, model_options=model_options)
-    return ErnieTEModel
+    return ErnieTEModel_
diff --git a/comfy_extras/nodes_textgen.py b/comfy_extras/nodes_textgen.py
index f1aeb63fa..eed26c582 100644
--- a/comfy_extras/nodes_textgen.py
+++ b/comfy_extras/nodes_textgen.py
@@ -35,6 +35,7 @@ class TextGenerate(io.ComfyNode):
                 io.Int.Input("max_length", default=256, min=1, max=2048),
                 io.DynamicCombo.Input("sampling_mode", options=sampling_options, display_name="Sampling Mode"),
                 io.Boolean.Input("thinking", optional=True, default=False, tooltip="Operate in thinking mode if the model supports it."),
+                io.Boolean.Input("use_default_template", optional=True, default=True, tooltip="Use the built in system prompt/template if the model has one.", advanced=True),
             ],
             outputs=[
                 io.String.Output(display_name="generated_text"),
@@ -42,9 +43,9 @@ class TextGenerate(io.ComfyNode):
         )
 
     @classmethod
-    def execute(cls, clip, prompt, max_length, sampling_mode, image=None, thinking=False) -> io.NodeOutput:
+    def execute(cls, clip, prompt, max_length, sampling_mode, image=None, thinking=False, use_default_template=True) -> io.NodeOutput:
 
-        tokens = clip.tokenize(prompt, image=image, skip_template=False, min_length=1, thinking=thinking)
+        tokens = clip.tokenize(prompt, image=image, skip_template=not use_default_template, min_length=1, thinking=thinking)
 
         # Get sampling parameters from dynamic combo
         do_sample = sampling_mode.get("sampling_mode") == "on"