Merge 019eaab4c9 into fcd9a236b0

Update template to 0.7.69 (#11719 )
Add warning for old pytorch. (#11718 )
2026-01-26 14:20:27 +08:00 · 2026-01-08 06:03:29 +03:00 · 2026-01-07 18:22:23 -08:00 · 2026-01-07 21:07:26 -05:00 · 2026-01-07 21:01:16 -05:00 · 2026-01-07 20:11:22 -05:00
12 changed files with 146 additions and 38 deletions
--- a/.github/workflows/stable-release.yml
+++ b/.github/workflows/stable-release.yml
@ -117,7 +117,7 @@ jobs:
          ./python.exe get-pip.py
          ./python.exe -s -m pip install ../${{ inputs.cache_tag }}_python_deps/*

-          grep comfyui ../ComfyUI/requirements.txt > ./requirements_comfyui.txt
+          grep comfy ../ComfyUI/requirements.txt > ./requirements_comfyui.txt
          ./python.exe -s -m pip install -r requirements_comfyui.txt
          rm requirements_comfyui.txt

--- a/app/user_manager.py
+++ b/app/user_manager.py
@ -377,8 +377,22 @@ class UserManager():
            try:
                body = await request.read()

-                with open(path, "wb") as f:
-                    f.write(body)
+                # Pretty print JSON files for better source control
+                if path.lower().endswith('.json'):
+                    try:
+                        # Parse JSON and re-serialize with indentation
+                        json_data = json.loads(body.decode('utf-8'))
+                        formatted_json = json.dumps(json_data, indent=2)
+                        with open(path, "w", encoding='utf-8') as f:
+                            f.write(formatted_json)
+                    except (json.JSONDecodeError, UnicodeDecodeError):
+                        # If JSON parsing fails, save as-is
+                        with open(path, "wb") as f:
+                            f.write(body)
+                else:
+                    # Non-JSON files are saved as-is
+                    with open(path, "wb") as f:
+                        f.write(body)
            except OSError as e:
                logging.warning(f"Error saving file '{path}': {e}")
                return web.Response(
--- a/comfy/model_patcher.py
+++ b/comfy/model_patcher.py
@ -718,6 +718,7 @@ class ModelPatcher:
                            continue

                cast_weight = self.force_cast_weights
+                m.comfy_force_cast_weights = self.force_cast_weights
                if lowvram_weight:
                    if hasattr(m, "comfy_cast_weights"):
                        m.weight_function = []
@ -790,11 +791,12 @@ class ModelPatcher:
                for param in params:
                    self.pin_weight_to_device("{}.{}".format(n, param))

+            usable_stat = "{:.2f} MB usable,".format(lowvram_model_memory / (1024 * 1024)) if lowvram_model_memory < 1e32 else ""
            if lowvram_counter > 0:
-                logging.info("loaded partially; {:.2f} MB usable, {:.2f} MB loaded, {:.2f} MB offloaded, {:.2f} MB buffer reserved, lowvram patches: {}".format(lowvram_model_memory / (1024 * 1024), mem_counter / (1024 * 1024), lowvram_mem_counter / (1024 * 1024), offload_buffer / (1024 * 1024), patch_counter))
+                logging.info("loaded partially; {} {:.2f} MB loaded, {:.2f} MB offloaded, {:.2f} MB buffer reserved, lowvram patches: {}".format(usable_stat, mem_counter / (1024 * 1024), lowvram_mem_counter / (1024 * 1024), offload_buffer / (1024 * 1024), patch_counter))
                self.model.model_lowvram = True
            else:
-                logging.info("loaded completely; {:.2f} MB usable, {:.2f} MB loaded, full load: {}".format(lowvram_model_memory / (1024 * 1024), mem_counter / (1024 * 1024), full_load))
+                logging.info("loaded completely; {} {:.2f} MB loaded, full load: {}".format(usable_stat, mem_counter / (1024 * 1024), full_load))
                self.model.model_lowvram = False
                if full_load:
                    self.model.to(device_to)
--- a/comfy/ops.py
+++ b/comfy/ops.py
@ -654,29 +654,29 @@ def mixed_precision_ops(quant_config={}, compute_dtype=torch.bfloat16, full_prec
                run_every_op()

                input_shape = input.shape
-                tensor_3d = input.ndim == 3
-
-                if self._full_precision_mm or self.comfy_cast_weights or len(self.weight_function) > 0 or len(self.bias_function) > 0:
-                    return self.forward_comfy_cast_weights(input, *args, **kwargs)
+                reshaped_3d = False

                if (getattr(self, 'layout_type', None) is not None and
-                    not isinstance(input, QuantizedTensor)):
+                    not isinstance(input, QuantizedTensor) and not self._full_precision_mm and
+                    not getattr(self, 'comfy_force_cast_weights', False) and
+                    len(self.weight_function) == 0 and len(self.bias_function) == 0):

                    # Reshape 3D tensors to 2D for quantization (needed for NVFP4 and others)
-                    if tensor_3d:
-                        input = input.reshape(-1, input_shape[2])
+                    input_reshaped = input.reshape(-1, input_shape[2]) if input.ndim == 3 else input

-                    if input.ndim != 2:
-                        # Fall back to comfy_cast_weights for non-2D tensors
-                        return self.forward_comfy_cast_weights(input.reshape(input_shape), *args, **kwargs)
+                    # Fall back to non-quantized for non-2D tensors
+                    if input_reshaped.ndim == 2:
+                        reshaped_3d = input.ndim == 3
+                        # dtype is now implicit in the layout class
+                        scale = getattr(self, 'input_scale', None)
+                        if scale is not None:
+                            scale = comfy.model_management.cast_to_device(scale, input.device, None)
+                        input = QuantizedTensor.from_float(input_reshaped, self.layout_type, scale=scale)

-                    # dtype is now implicit in the layout class
-                    input = QuantizedTensor.from_float(input, self.layout_type, scale=getattr(self, 'input_scale', None))
-
-                output = self._forward(input, self.weight, self.bias)
+                output = self.forward_comfy_cast_weights(input)

                # Reshape output back to 3D if input was 3D
-                if tensor_3d:
+                if reshaped_3d:
                    output = output.reshape((input_shape[0], input_shape[1], self.weight.shape[0]))

                return output
--- a/comfy/quant_ops.py
+++ b/comfy/quant_ops.py
@ -19,6 +19,7 @@ try:
        cuda_version = tuple(map(int, str(torch.version.cuda).split('.')))
        if cuda_version < (13,):
            ck.registry.disable("cuda")
+            logging.warning("WARNING: You need pytorch with cu130 or higher to use optimized CUDA operations.")

    ck.registry.disable("triton")
    for k, v in ck.list_backends().items():
--- a/comfy/sd.py
+++ b/comfy/sd.py
@ -218,7 +218,7 @@ class CLIP:
            if unprojected:
                self.cond_stage_model.set_clip_options({"projected_pooled": False})

-            self.load_model()
+            self.load_model(tokens)
            self.cond_stage_model.set_clip_options({"execution_device": self.patcher.load_device})
            all_hooks.reset()
            self.patcher.patch_hooks(None)
@ -266,7 +266,7 @@ class CLIP:
        if return_pooled == "unprojected":
            self.cond_stage_model.set_clip_options({"projected_pooled": False})

-        self.load_model()
+        self.load_model(tokens)
        self.cond_stage_model.set_clip_options({"execution_device": self.patcher.load_device})
        o = self.cond_stage_model.encode_token_weights(tokens)
        cond, pooled = o[:2]
@ -299,8 +299,11 @@ class CLIP:
            sd_clip[k] = sd_tokenizer[k]
        return sd_clip

-    def load_model(self):
-        model_management.load_model_gpu(self.patcher)
+    def load_model(self, tokens={}):
+        memory_used = 0
+        if hasattr(self.cond_stage_model, "memory_estimation_function"):
+            memory_used = self.cond_stage_model.memory_estimation_function(tokens, device=self.patcher.load_device)
+        model_management.load_models_gpu([self.patcher], memory_required=memory_used)
        return self.patcher

    def get_key_patches(self):
--- a/comfy/supported_models.py
+++ b/comfy/supported_models.py
@ -845,7 +845,7 @@ class LTXAV(LTXV):

    def __init__(self, unet_config):
        super().__init__(unet_config)
-        self.memory_usage_factor = 0.055  # TODO
+        self.memory_usage_factor = 0.061  # TODO

    def get_model(self, state_dict, prefix="", device=None):
        out = model_base.LTXAV(self, device=device)
--- a/comfy/text_encoders/lt.py
+++ b/comfy/text_encoders/lt.py
@ -36,10 +36,10 @@ class LTXAVGemmaTokenizer(sd1_clip.SD1Tokenizer):

 class Gemma3_12BModel(sd1_clip.SDClipModel):
    def __init__(self, device="cpu", layer="all", layer_idx=None, dtype=None, attention_mask=True, model_options={}):
-        llama_scaled_fp8 = model_options.get("gemma_scaled_fp8", None)
-        if llama_scaled_fp8 is not None:
+        llama_quantization_metadata = model_options.get("llama_quantization_metadata", None)
+        if llama_quantization_metadata is not None:
            model_options = model_options.copy()
-            model_options["scaled_fp8"] = llama_scaled_fp8
+            model_options["quantization_metadata"] = llama_quantization_metadata

        super().__init__(device=device, layer=layer, layer_idx=layer_idx, textmodel_json_config={}, dtype=dtype, special_tokens={"start": 2, "pad": 0}, layer_norm_hidden_state=False, model_class=comfy.text_encoders.llama.Gemma3_12B, enable_attention_masks=attention_mask, return_attention_masks=attention_mask, model_options=model_options)

@ -98,10 +98,13 @@ class LTXAVTEModel(torch.nn.Module):

        out, pooled, extra = self.gemma3_12b.encode_token_weights(token_weight_pairs)
        out_device = out.device
+        if comfy.model_management.should_use_bf16(self.execution_device):
+            out = out.to(device=self.execution_device, dtype=torch.bfloat16)
        out = out.movedim(1, -1).to(self.execution_device)
        out = 8.0 * (out - out.mean(dim=(1, 2), keepdim=True)) / (out.amax(dim=(1, 2), keepdim=True) - out.amin(dim=(1, 2), keepdim=True) + 1e-6)
        out = out.reshape((out.shape[0], out.shape[1], -1))
        out = self.text_embedding_projection(out)
+        out = out.float()
        out_vid = self.video_embeddings_connector(out)[0]
        out_audio = self.audio_embeddings_connector(out)[0]
        out = torch.concat((out_vid, out_audio), dim=-1)
@ -118,13 +121,21 @@ class LTXAVTEModel(torch.nn.Module):

            return self.load_state_dict(sdo, strict=False)

+    def memory_estimation_function(self, token_weight_pairs, device=None):
+        constant = 6.0
+        if comfy.model_management.should_use_bf16(device):
+            constant /= 2.0

-def ltxav_te(dtype_llama=None, llama_scaled_fp8=None):
+        token_weight_pairs = token_weight_pairs.get("gemma3_12b", [])
+        num_tokens = sum(map(lambda a: len(a), token_weight_pairs))
+        return num_tokens * constant * 1024 * 1024
+
+def ltxav_te(dtype_llama=None, llama_quantization_metadata=None):
    class LTXAVTEModel_(LTXAVTEModel):
        def __init__(self, device="cpu", dtype=None, model_options={}):
-            if llama_scaled_fp8 is not None and "llama_scaled_fp8" not in model_options:
+            if llama_quantization_metadata is not None:
                model_options = model_options.copy()
-                model_options["llama_scaled_fp8"] = llama_scaled_fp8
+                model_options["llama_quantization_metadata"] = llama_quantization_metadata
            if dtype_llama is not None:
                dtype = dtype_llama
            super().__init__(dtype_llama=dtype_llama, device=device, dtype=dtype, model_options=model_options)
--- a/comfy_extras/nodes_lt_audio.py
+++ b/comfy_extras/nodes_lt_audio.py
@ -185,6 +185,10 @@ class LTXAVTextEncoderLoader(io.ComfyNode):
                io.Combo.Input(
                    "ckpt_name",
                    options=folder_paths.get_filename_list("checkpoints"),
+                ),
+                io.Combo.Input(
+                    "device",
+                    options=["default", "cpu"],
                )
            ],
            outputs=[io.Clip.Output()],
@ -197,7 +201,11 @@ class LTXAVTextEncoderLoader(io.ComfyNode):
        clip_path1 = folder_paths.get_full_path_or_raise("text_encoders", text_encoder)
        clip_path2 = folder_paths.get_full_path_or_raise("checkpoints", ckpt_name)

-        clip = comfy.sd.load_clip(ckpt_paths=[clip_path1, clip_path2], embedding_directory=folder_paths.get_folder_paths("embeddings"), clip_type=clip_type)
+        model_options = {}
+        if device == "cpu":
+            model_options["load_device"] = model_options["offload_device"] = torch.device("cpu")
+
+        clip = comfy.sd.load_clip(ckpt_paths=[clip_path1, clip_path2], embedding_directory=folder_paths.get_folder_paths("embeddings"), clip_type=clip_type, model_options=model_options)
        return io.NodeOutput(clip)


--- a/requirements.txt
+++ b/requirements.txt
@ -1,5 +1,5 @@
 comfyui-frontend-package==1.35.9
-comfyui-workflow-templates==0.7.67
+comfyui-workflow-templates==0.7.69
 comfyui-embedded-docs==0.3.1
 torch
 torchsde
@ -21,7 +21,7 @@ psutil
 alembic
 SQLAlchemy
 av>=14.2.0
-comfy-kitchen>=0.2.3
+comfy-kitchen>=0.2.5

 #non essential dependencies:
 kornia>=0.7.1
--- a/server.py
+++ b/server.py
@ -518,7 +518,7 @@ class PromptServer():
                            buffer.seek(0)

                            return web.Response(body=buffer.read(), content_type=f'image/{image_format}',
-                                                headers={"Content-Disposition": f"filename=\"{filename}\""})
+                                                headers={"Content-Disposition": f"attachment; filename=\"{filename}\""})

                    if 'channel' not in request.rel_url.query:
                        channel = 'rgba'
@ -538,7 +538,7 @@ class PromptServer():
                            buffer.seek(0)

                            return web.Response(body=buffer.read(), content_type='image/png',
-                                                headers={"Content-Disposition": f"filename=\"{filename}\""})
+                                                headers={"Content-Disposition": f"attachment; filename=\"{filename}\""})

                    elif channel == 'a':
                        with Image.open(file) as img:
@ -555,7 +555,7 @@ class PromptServer():
                            alpha_buffer.seek(0)

                            return web.Response(body=alpha_buffer.read(), content_type='image/png',
-                                                headers={"Content-Disposition": f"filename=\"{filename}\""})
+                                                headers={"Content-Disposition": f"attachment; filename=\"{filename}\""})
                    else:
                        # Get content type from mimetype, defaulting to 'application/octet-stream'
                        content_type = mimetypes.guess_type(filename)[0] or 'application/octet-stream'
@ -567,7 +567,7 @@ class PromptServer():
                        return web.FileResponse(
                            file,
                            headers={
-                                "Content-Disposition": f"filename=\"{filename}\"",
+                                "Content-Disposition": f"attachment; filename=\"{filename}\"",
                                "Content-Type": content_type
                            }
                        )
--- a/tests-unit/prompt_server_test/user_manager_test.py
+++ b/tests-unit/prompt_server_test/user_manager_test.py
@ -287,3 +287,72 @@ async def test_listuserdata_v2_url_encoded_path(aiohttp_client, app, tmp_path):
    assert entry["name"] == "file.txt"
    # Ensure the path is correctly decoded and uses forward slash
    assert entry["path"] == "my dir/file.txt"
+
+
+async def test_post_userdata_json_pretty_print(aiohttp_client, app, tmp_path):
+    """Test that JSON files are saved with pretty printing (indentation)"""
+    import json
+
+    client = await aiohttp_client(app)
+
+    # Create a compact JSON workflow
+    workflow_data = {
+        "nodes": [
+            {"id": "1", "type": "LoadImage", "inputs": {"image": "test.png"}},
+            {"id": "2", "type": "SaveImage", "inputs": {"images": ["1", 0]}}
+        ],
+        "metadata": {"version": "1.0", "author": "test"}
+    }
+    compact_json = json.dumps(workflow_data).encode('utf-8')
+
+    # Save as JSON file
+    resp = await client.post("/userdata/workflow.json", data=compact_json)
+    assert resp.status == 200
+
+    # Read the saved file and verify it's pretty-printed
+    with open(tmp_path / "workflow.json", "r", encoding='utf-8') as f:
+        saved_content = f.read()
+
+    # Verify the file contains indentation (pretty-printed)
+    assert "  " in saved_content  # Should have 2-space indentation
+    assert "\n" in saved_content  # Should have newlines
+
+    # Verify the content is still valid JSON and matches original data
+    saved_data = json.loads(saved_content)
+    assert saved_data == workflow_data
+
+    # Verify it's actually formatted (not compact)
+    # Compact JSON would be much shorter
+    assert len(saved_content) > len(compact_json)
+
+
+async def test_post_userdata_json_invalid_fallback(aiohttp_client, app, tmp_path):
+    """Test that invalid JSON is saved as-is without error"""
+    client = await aiohttp_client(app)
+
+    # Create invalid JSON content
+    invalid_json = b'{"invalid": json content}'
+
+    # Save as JSON file - should not fail
+    resp = await client.post("/userdata/invalid.json", data=invalid_json)
+    assert resp.status == 200
+
+    # Verify file was saved as-is
+    with open(tmp_path / "invalid.json", "rb") as f:
+        assert f.read() == invalid_json
+
+
+async def test_post_userdata_non_json_unchanged(aiohttp_client, app, tmp_path):
+    """Test that non-JSON files are saved unchanged"""
+    client = await aiohttp_client(app)
+
+    # Create binary content
+    binary_content = b'\x00\x01\x02\x03\x04\x05'
+
+    # Save as non-JSON file
+    resp = await client.post("/userdata/test.bin", data=binary_content)
+    assert resp.status == 200
+
+    # Verify file was saved exactly as-is
+    with open(tmp_path / "test.bin", "rb") as f:
+        assert f.read() == binary_content
Author	SHA1	Message	Date
Sam Pullara	fb83448eee	Merge `019eaab4c9` into `fcd9a236b0`	2026-01-08 06:03:29 +03:00
ComfyUI Wiki	fcd9a236b0	Update template to 0.7.69 (#11719 )	2026-01-07 18:22:23 -08:00
comfyanonymous	21e8425087	Add warning for old pytorch. (#11718 )	2026-01-07 21:07:26 -05:00
rattus	b6c79a648a	ops: Fix offloading with FP8MM performance (#11697 ) This logic was checking comfy_cast_weights, and going straight to to the forward_comfy_cast_weights implementation without attempting to downscale input to fp8 in the event comfy_cast_weights is set. The main reason comfy_cast_weights would be set would be for async offload, which is not a good reason to nix FP8MM. So instead, and together the underlying exclusions for FP8MM which are: * having a weight_function (usually LowVramPatch) * force_cast_weights (compute dtype override) * the weight is not Quantized * the input is already quantized * the model or layer has MM explictily disabled. If you get past all of those exclusions, quantize the input tensor. Then hand the new input, quantized or not off to forward_comfy_cast_weights to handle it. If the weight is offloaded but input is quantized you will get an offloaded MM8.	2026-01-07 21:01:16 -05:00
comfyanonymous	25bc1b5b57	Add memory estimation function to ltxav text encoder. (#11716 )	2026-01-07 20:11:22 -05:00
comfyanonymous	3cd19e99c1	Increase ltxav mem estimation by a bit. (#11715 )	2026-01-07 20:04:56 -05:00
comfyanonymous	007b87e7ac	Bump required comfy-kitchen version. (#11714 )	2026-01-07 19:48:47 -05:00
comfyanonymous	34751fe9f9	Lower ltxv text encoder vram use. (#11713 )	2026-01-07 19:12:15 -05:00
Jukka Seppänen	1c705f7bfb	Add device selection for LTXAVTextEncoderLoader (#11700 )	2026-01-07 18:39:59 -05:00
rattus	48e5ea1dfd	model_patcher: Remove confusing load stat (#11710 ) If the loader passes 1e32 as the usable memory size, it means force the full load. This happens with CPU loads and a few other misc cases. Removing the confusing number and just leave the other details.	2026-01-07 18:39:20 -05:00
comfyanonymous	3cd7b32f1b	Support gemma 12B with quant weights. (#11696 ) Some checks are pending Python Linting / Run Ruff (push) Waiting to run Details Python Linting / Run Pylint (push) Waiting to run Details Build package / Build Test (3.10) (push) Waiting to run Details Build package / Build Test (3.11) (push) Waiting to run Details Build package / Build Test (3.12) (push) Waiting to run Details Build package / Build Test (3.13) (push) Waiting to run Details Build package / Build Test (3.14) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.10, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.11, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.12, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-unix-nightly (12.1, , linux, 3.11, [self-hosted Linux], nightly) (push) Waiting to run Details Execution Tests / test (macos-latest) (push) Waiting to run Details Execution Tests / test (ubuntu-latest) (push) Waiting to run Details Execution Tests / test (windows-latest) (push) Waiting to run Details Test server launches without errors / test (push) Waiting to run Details Unit Tests / test (macos-latest) (push) Waiting to run Details Unit Tests / test (ubuntu-latest) (push) Waiting to run Details Unit Tests / test (windows-2022) (push) Waiting to run Details	2026-01-07 05:15:14 -05:00
comfyanonymous	c0c9720d77	Fix stable release workflow not pulling latest comfy kitchen. (#11695 )	2026-01-07 04:48:28 -05:00
Sam Pullara	019eaab4c9	Merge branch 'comfyanonymous:master' into master	2026-01-06 14:24:31 -08:00
Sam Pullara	f330220f66	Merge branch 'comfyanonymous:master' into master	2025-12-15 09:37:39 -08:00
Sam Pullara	d35e0fcdd7	Merge branch 'comfyanonymous:master' into master	2025-12-01 12:41:36 -08:00
Sam Pullara	3d0331813d	Merge branch 'comfyanonymous:master' into master	2025-11-26 15:18:24 -08:00
Sam Pullara	afd4b725db	Merge pull request #1 from spullara/claude/fix-download-file-extension-01KxyJS9CQduLtV75QhZiPsT Fix file download issue - add attachment disposition type to Content-…	2025-11-19 16:46:45 -08:00
Claude	149506beea	Fix file download issue - add attachment disposition type to Content-Disposition headers Files were downloading with filename "view" instead of the actual filename because the Content-Disposition header was missing the disposition type (attachment/inline). Changed from `filename="..."` to `attachment; filename="..."` in all 4 locations in the /view endpoint to ensure proper filename handling by browsers. This fixes downloads for videos, audio, and other file types served through the /view endpoint.	2025-11-20 00:40:19 +00:00
Sam Pullara	dfca61be7f	Merge branch 'comfyanonymous:master' into master	2025-11-19 13:33:33 -08:00
Sam Pullara	39a5c5621e	Merge branch 'comfyanonymous:master' into master	2025-11-12 15:06:53 -08:00
Sam Pullara	0d20e44618	Merge branch 'comfyanonymous:master' into master	2025-10-31 13:41:24 -07:00
Sam Pullara	5f415089fc	Merge branch 'comfyanonymous:master' into master	2025-10-30 15:19:16 -07:00
Sam Pullara	6d23bfde7f	add tests for saving json files formatted nicely	2025-10-29 13:26:49 -07:00
Sam Pullara	0eff10fd21	store json files pretty printed for better source control compatibiility	2025-10-29 13:17:56 -07:00