Merge upstream/master, keep local README.md

2026-05-06 15:22:31 +08:00 · 2026-02-05 00:47:02 +00:00 · 2026-02-05 00:47:02 +00:00 · f9efc0327e
commit f9efc0327e
parent 612a833a6e c8fcbd66ee
11 changed files with 39 additions and 20 deletions
--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@ -1724,11 +1724,9 @@ def soft_empty_cache(force=False):
    elif is_mlu():
        torch.mlu.empty_cache()
    elif torch.cuda.is_available():
-        if comfy.memory_management.aimdo_allocator is None:
-            #Pytorch 2.7 and earlier crashes if you try and empty_cache when mempools exist
-            torch.cuda.synchronize()
-            torch.cuda.empty_cache()
-            torch.cuda.ipc_collect()
+        torch.cuda.synchronize()
+        torch.cuda.empty_cache()
+        torch.cuda.ipc_collect()

 def unload_all_models():
    free_memory(1e30, get_torch_device())
--- a/comfy/model_patcher.py
+++ b/comfy/model_patcher.py
@ -1400,7 +1400,7 @@ class ModelPatcher:
                continue
            key = "diffusion_model." + k
            unet_state_dict[k] = LazyCastingParam(self, key, comfy.utils.get_attr(self.model, key))
-        return self.model.state_dict_for_saving(unet_state_dict)
+        return self.model.state_dict_for_saving(unet_state_dict, clip_state_dict=clip_state_dict, vae_state_dict=vae_state_dict, clip_vision_state_dict=clip_vision_state_dict)

    def __del__(self):
        self.unpin_all_weights()
--- a/comfy/ops.py
+++ b/comfy/ops.py
@ -54,6 +54,8 @@ try:
            SDPA_BACKEND_PRIORITY.insert(0, SDPBackend.CUDNN_ATTENTION)

            def scaled_dot_product_attention(q, k, v, *args, **kwargs):
+                if q.nelement() < 1024 * 128:  # arbitrary number, for small inputs cudnn attention seems slower
+                    return torch.nn.functional.scaled_dot_product_attention(q, k, v, *args, **kwargs)
                with sdpa_kernel(SDPA_BACKEND_PRIORITY, set_priority=True):
                    return torch.nn.functional.scaled_dot_product_attention(q, k, v, *args, **kwargs)
        else:
--- a/comfy/text_encoders/ace15.py
+++ b/comfy/text_encoders/ace15.py
@ -19,6 +19,7 @@ def sample_manual_loop_no_classes(
    min_tokens: int = 1,
    max_new_tokens: int = 2048,
    audio_start_id: int = 151669,  # The cutoff ID for audio codes
+    audio_end_id: int = 215669,
    eos_token_id: int = 151645,
 ):
    device = model.execution_device
@ -60,6 +61,7 @@ def sample_manual_loop_no_classes(
        remove_logit_value = torch.finfo(cfg_logits.dtype).min
        # Only generate audio tokens
        cfg_logits[:, :audio_start_id] = remove_logit_value
+        cfg_logits[:, audio_end_id:] = remove_logit_value

        if eos_token_id is not None and eos_token_id < audio_start_id and min_tokens < step:
            cfg_logits[:, eos_token_id] = eos_score
--- a/comfy/text_encoders/llama.py
+++ b/comfy/text_encoders/llama.py
@ -651,10 +651,10 @@ class Llama2_(nn.Module):
        mask = None
        if attention_mask is not None:
            mask = 1.0 - attention_mask.to(x.dtype).reshape((attention_mask.shape[0], 1, -1, attention_mask.shape[-1])).expand(attention_mask.shape[0], 1, seq_len, attention_mask.shape[-1])
-            mask = mask.masked_fill(mask.to(torch.bool), torch.finfo(x.dtype).min)
+            mask = mask.masked_fill(mask.to(torch.bool), torch.finfo(x.dtype).min / 4)

        if seq_len > 1:
-            causal_mask = torch.empty(past_len + seq_len, past_len + seq_len, dtype=x.dtype, device=x.device).fill_(torch.finfo(x.dtype).min).triu_(1)
+            causal_mask = torch.empty(past_len + seq_len, past_len + seq_len, dtype=x.dtype, device=x.device).fill_(torch.finfo(x.dtype).min / 4).triu_(1)
            if mask is not None:
                mask += causal_mask
            else:
--- a/comfy/utils.py
+++ b/comfy/utils.py
@ -82,14 +82,12 @@ _TYPES = {
 def load_safetensors(ckpt):
    f = open(ckpt, "rb")
    mapping = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ)
+    mv = memoryview(mapping)

    header_size = struct.unpack("<Q", mapping[:8])[0]
    header = json.loads(mapping[8:8+header_size].decode("utf-8"))

-    with warnings.catch_warnings():
-        #We are working with read-only RAM by design
-        warnings.filterwarnings("ignore", message="The given buffer is not writable")
-        data_area = torch.frombuffer(mapping, dtype=torch.uint8)[8 + header_size:]
+    mv = mv[8 + header_size:]

    sd = {}
    for name, info in header.items():
@ -97,7 +95,13 @@ def load_safetensors(ckpt):
            continue

        start, end = info["data_offsets"]
-        sd[name] = data_area[start:end].view(_TYPES[info["dtype"]]).view(info["shape"])
+        if start == end:
+            sd[name] = torch.empty(info["shape"], dtype =_TYPES[info["dtype"]])
+        else:
+            with warnings.catch_warnings():
+                #We are working with read-only RAM by design
+                warnings.filterwarnings("ignore", message="The given buffer is not writable")
+                sd[name] = torch.frombuffer(mv[start:end], dtype=_TYPES[info["dtype"]]).view(info["shape"])

    return sd, header.get("__metadata__", {}),

--- a/comfy_extras/nodes_hunyuan3d.py
+++ b/comfy_extras/nodes_hunyuan3d.py
@ -618,6 +618,7 @@ class SaveGLB(IO.ComfyNode):
    def define_schema(cls):
        return IO.Schema(
            node_id="SaveGLB",
+            display_name="Save 3D Model",
            search_aliases=["export 3d model", "save mesh"],
            category="3d",
            is_output_node=True,
@ -626,8 +627,14 @@ class SaveGLB(IO.ComfyNode):
                    IO.Mesh.Input("mesh"),
                    types=[
                        IO.File3DGLB,
+                        IO.File3DGLTF,
+                        IO.File3DOBJ,
+                        IO.File3DFBX,
+                        IO.File3DSTL,
+                        IO.File3DUSDZ,
+                        IO.File3DAny,
                    ],
-                    tooltip="Mesh or GLB file to save",
+                    tooltip="Mesh or 3D file to save",
                ),
                IO.String.Input("filename_prefix", default="mesh/ComfyUI"),
            ],
@ -649,7 +656,8 @@ class SaveGLB(IO.ComfyNode):

        if isinstance(mesh, Types.File3D):
            # Handle File3D input - save BytesIO data to output folder
-            f = f"{filename}_{counter:05}_.glb"
+            ext = mesh.format or "glb"
+            f = f"{filename}_{counter:05}_.{ext}"
            mesh.save_to(os.path.join(full_output_folder, f))
            results.append({
                "filename": f,
--- a/comfy_extras/nodes_load_3d.py
+++ b/comfy_extras/nodes_load_3d.py
@ -45,6 +45,7 @@ class Load3D(IO.ComfyNode):
                IO.Image.Output(display_name="normal"),
                IO.Load3DCamera.Output(display_name="camera_info"),
                IO.Video.Output(display_name="recording_video"),
+                IO.File3DAny.Output(display_name="model_3d"),
            ],
        )

@ -66,7 +67,8 @@ class Load3D(IO.ComfyNode):

            video = InputImpl.VideoFromFile(recording_video_path)

-        return IO.NodeOutput(output_image, output_mask, model_file, normal_image, image['camera_info'], video)
+        file_3d = Types.File3D(folder_paths.get_annotated_filepath(model_file))
+        return IO.NodeOutput(output_image, output_mask, model_file, normal_image, image['camera_info'], video, file_3d)

    process = execute  # TODO: remove

--- a/comfyui_version.py
+++ b/comfyui_version.py
@ -1,3 +1,3 @@
 # This file is automatically generated by the build process when version is
 # updated in pyproject.toml.
-__version__ = "0.12.1"
+__version__ = "0.12.2"
--- a/main.py
+++ b/main.py
@ -192,7 +192,10 @@ import comfy_aimdo.control
 import comfy_aimdo.torch

 if enables_dynamic_vram():
-    if comfy_aimdo.control.init_device(comfy.model_management.get_torch_device().index):
+    if comfy.model_management.torch_version_numeric < (2, 8):
+        logging.warning("Unsupported Pytorch detected. DynamicVRAM support requires Pytorch version 2.8 or later. Falling back to legacy ModelPatcher. VRAM estimates may be unreliable especially on Windows")
+        comfy.memory_management.aimdo_allocator = None
+    elif comfy_aimdo.control.init_device(comfy.model_management.get_torch_device().index):
        if args.verbose == 'DEBUG':
            comfy_aimdo.control.set_log_debug()
        elif args.verbose == 'CRITICAL':
@ -208,7 +211,7 @@ if enables_dynamic_vram():
        comfy.memory_management.aimdo_allocator = comfy_aimdo.torch.get_torch_allocator()
        logging.info("DynamicVRAM support detected and enabled")
    else:
-        logging.info("No working comfy-aimdo install detected. DynamicVRAM support disabled. Falling back to legacy ModelPatcher. VRAM estimates may be unreliable especially on Windows")
+        logging.warning("No working comfy-aimdo install detected. DynamicVRAM support disabled. Falling back to legacy ModelPatcher. VRAM estimates may be unreliable especially on Windows")
        comfy.memory_management.aimdo_allocator = None


--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,6 +1,6 @@
 [project]
 name = "ComfyUI"
-version = "0.12.1"
+version = "0.12.2"
 readme = "README.md"
 license = { file = "LICENSE" }
 requires-python = ">=3.10"