Merge branch 'master' into fix/mac-upscale-fail

2026-06-11 00:37:53 +08:00 · 2026-02-23 16:45:35 +09:00 · 2026-02-23 16:45:35 +09:00 · be61a53470
commit be61a53470
parent bd840817ac caa43d2395
15 changed files with 75 additions and 71 deletions
--- a/.coderabbit.yaml
+++ b/.coderabbit.yaml
@ -1,6 +1,7 @@
 # yaml-language-server: $schema=https://coderabbit.ai/integrations/schema.v2.json
 language: "en-US"
 early_access: false
+tone_instructions: "Only comment on issues introduced by this PR's changes. Do not flag pre-existing problems in moved, re-indented, or reformatted code."

 reviews:
  profile: "chill"
@ -35,6 +36,14 @@ reviews:
    - "!**/*.bat"

  path_instructions:
+    - path: "**"
+      instructions: |
+        IMPORTANT: Only comment on issues directly introduced by this PR's code changes.
+        Do NOT flag pre-existing issues in code that was merely moved, re-indented,
+        de-indented, or reformatted without logic changes. If code appears in the diff
+        only due to whitespace or structural reformatting (e.g., removing a `with:` block),
+        treat it as unchanged. Contributors should not feel obligated to address
+        pre-existing issues outside the scope of their contribution.
    - path: "comfy/**"
      instructions: |
        Core ML/diffusion engine. Focus on:
@ -74,7 +83,11 @@ reviews:
  auto_review:
    enabled: true
    auto_incremental_review: true
-    drafts: true
+    drafts: false
+    ignore_title_keywords:
+      - "WIP"
+      - "DO NOT REVIEW"
+      - "DO NOT MERGE"

  finishing_touches:
    docstrings:
@ -84,7 +97,7 @@ reviews:

  tools:
    ruff:
-      enabled: true
+      enabled: false
    pylint:
      enabled: false
    flake8:
--- a/app/subgraph_manager.py
+++ b/app/subgraph_manager.py
@ -53,7 +53,7 @@ class SubgraphManager:
        return entry_id, entry

    async def load_entry_data(self, entry: SubgraphEntry):
-        with open(entry['path'], 'r') as f:
+        with open(entry['path'], 'r', encoding='utf-8') as f:
            entry['data'] = f.read()
        return entry

--- a/comfy/ldm/lightricks/av_model.py
+++ b/comfy/ldm/lightricks/av_model.py
@ -9,6 +9,7 @@ from comfy.ldm.lightricks.model import (
    LTXVModel,
 )
 from comfy.ldm.lightricks.symmetric_patchifier import AudioPatchifier
+from comfy.ldm.lightricks.embeddings_connector import Embeddings1DConnector
 import comfy.ldm.common_dit

 class CompressedTimestep:
@ -450,6 +451,29 @@ class LTXAVModel(LTXVModel):
            operations=self.operations,
        )

+        self.audio_embeddings_connector = Embeddings1DConnector(
+            split_rope=True,
+            double_precision_rope=True,
+            dtype=dtype,
+            device=device,
+            operations=self.operations,
+        )
+
+        self.video_embeddings_connector = Embeddings1DConnector(
+            split_rope=True,
+            double_precision_rope=True,
+            dtype=dtype,
+            device=device,
+            operations=self.operations,
+        )
+
+    def preprocess_text_embeds(self, context):
+        if context.shape[-1] == self.caption_channels * 2:
+            return context
+        out_vid = self.video_embeddings_connector(context)[0]
+        out_audio = self.audio_embeddings_connector(context)[0]
+        return torch.concat((out_vid, out_audio), dim=-1)
+
    def _init_transformer_blocks(self, device, dtype, **kwargs):
        """Initialize transformer blocks for LTXAV."""
        self.transformer_blocks = nn.ModuleList(
--- a/comfy/ldm/lightricks/embeddings_connector.py
+++ b/comfy/ldm/lightricks/embeddings_connector.py
@ -157,11 +157,9 @@ class Embeddings1DConnector(nn.Module):
        self.num_learnable_registers = num_learnable_registers
        if self.num_learnable_registers:
            self.learnable_registers = nn.Parameter(
-                torch.rand(
+                torch.empty(
                    self.num_learnable_registers, inner_dim, dtype=dtype, device=device
                )
-                * 2.0
-                - 1.0
            )

    def get_fractional_positions(self, indices_grid):
@ -234,7 +232,7 @@ class Embeddings1DConnector(nn.Module):

        return indices

-    def precompute_freqs_cis(self, indices_grid, spacing="exp"):
+    def precompute_freqs_cis(self, indices_grid, spacing="exp", out_dtype=None):
        dim = self.inner_dim
        n_elem = 2  # 2 because of cos and sin
        freqs = self.precompute_freqs(indices_grid, spacing)
@ -247,7 +245,7 @@ class Embeddings1DConnector(nn.Module):
            )
        else:
            cos_freq, sin_freq = interleaved_freqs_cis(freqs, dim % n_elem)
-        return cos_freq.to(self.dtype), sin_freq.to(self.dtype), self.split_rope
+        return cos_freq.to(dtype=out_dtype), sin_freq.to(dtype=out_dtype), self.split_rope

    def forward(
        self,
@ -288,7 +286,7 @@ class Embeddings1DConnector(nn.Module):
            hidden_states.shape[1], dtype=torch.float32, device=hidden_states.device
        )
        indices_grid = indices_grid[None, None, :]
-        freqs_cis = self.precompute_freqs_cis(indices_grid)
+        freqs_cis = self.precompute_freqs_cis(indices_grid, out_dtype=hidden_states.dtype)

        # 2. Blocks
        for block_idx, block in enumerate(self.transformer_1d_blocks):
--- a/comfy/memory_management.py
+++ b/comfy/memory_management.py
@ -78,4 +78,4 @@ def interpret_gathered_like(tensors, gathered):

    return dest_views

-aimdo_allocator = None
+aimdo_enabled = False
--- a/comfy/model_base.py
+++ b/comfy/model_base.py
@ -988,10 +988,14 @@ class LTXAV(BaseModel):
    def extra_conds(self, **kwargs):
        out = super().extra_conds(**kwargs)
        attention_mask = kwargs.get("attention_mask", None)
+        device = kwargs["device"]
+
        if attention_mask is not None:
            out['attention_mask'] = comfy.conds.CONDRegular(attention_mask)
        cross_attn = kwargs.get("cross_attn", None)
        if cross_attn is not None:
+            if hasattr(self.diffusion_model, "preprocess_text_embeds"):
+                cross_attn = self.diffusion_model.preprocess_text_embeds(cross_attn.to(device=device, dtype=self.get_dtype_inference()))
            out['c_crossattn'] = comfy.conds.CONDRegular(cross_attn)

        out['frame_rate'] = comfy.conds.CONDConstant(kwargs.get("frame_rate", 25))
--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@ -836,7 +836,7 @@ def unet_inital_load_device(parameters, dtype):

    mem_dev = get_free_memory(torch_dev)
    mem_cpu = get_free_memory(cpu_dev)
-    if mem_dev > mem_cpu and model_size < mem_dev and comfy.memory_management.aimdo_allocator is None:
+    if mem_dev > mem_cpu and model_size < mem_dev and comfy.memory_management.aimdo_enabled:
        return torch_dev
    else:
        return cpu_dev
@ -1121,7 +1121,6 @@ def get_cast_buffer(offload_stream, device, size, ref):
            synchronize()
            del STREAM_CAST_BUFFERS[offload_stream]
            del cast_buffer
-            #FIXME: This doesn't work in Aimdo because mempool cant clear cache
            soft_empty_cache()
        with wf_context:
            cast_buffer = torch.empty((size), dtype=torch.int8, device=device)
--- a/comfy/text_encoders/lt.py
+++ b/comfy/text_encoders/lt.py
@ -3,7 +3,6 @@ import os
 from transformers import T5TokenizerFast
 from .spiece_tokenizer import SPieceTokenizer
 import comfy.text_encoders.genmo
-from comfy.ldm.lightricks.embeddings_connector import Embeddings1DConnector
 import torch
 import comfy.utils
 import math
@ -109,22 +108,6 @@ class LTXAVTEModel(torch.nn.Module):
        operations = self.gemma3_12b.operations # TODO
        self.text_embedding_projection = operations.Linear(3840 * 49, 3840, bias=False, dtype=dtype, device=device)

-        self.audio_embeddings_connector = Embeddings1DConnector(
-            split_rope=True,
-            double_precision_rope=True,
-            dtype=dtype,
-            device=device,
-            operations=operations,
-        )
-
-        self.video_embeddings_connector = Embeddings1DConnector(
-            split_rope=True,
-            double_precision_rope=True,
-            dtype=dtype,
-            device=device,
-            operations=operations,
-        )
-
    def set_clip_options(self, options):
        self.execution_device = options.get("execution_device", self.execution_device)
        self.gemma3_12b.set_clip_options(options)
@ -146,10 +129,6 @@ class LTXAVTEModel(torch.nn.Module):
        out = out.reshape((out.shape[0], out.shape[1], -1))
        out = self.text_embedding_projection(out)
        out = out.float()
-        out_vid = self.video_embeddings_connector(out)[0]
-        out_audio = self.audio_embeddings_connector(out)[0]
-        out = torch.concat((out_vid, out_audio), dim=-1)
-
        return out.to(out_device), pooled

    def generate(self, tokens, do_sample, max_length, temperature, top_k, top_p, min_p, repetition_penalty, seed):
@ -159,14 +138,14 @@ class LTXAVTEModel(torch.nn.Module):
        if "model.layers.47.self_attn.q_norm.weight" in sd:
            return self.gemma3_12b.load_sd(sd)
        else:
-            sdo = comfy.utils.state_dict_prefix_replace(sd, {"text_embedding_projection.aggregate_embed.weight": "text_embedding_projection.weight", "model.diffusion_model.video_embeddings_connector.": "video_embeddings_connector.", "model.diffusion_model.audio_embeddings_connector.": "audio_embeddings_connector."}, filter_keys=True)
+            sdo = comfy.utils.state_dict_prefix_replace(sd, {"text_embedding_projection.aggregate_embed.weight": "text_embedding_projection.weight"}, filter_keys=True)
            if len(sdo) == 0:
                sdo = sd

            missing_all = []
            unexpected_all = []

-            for prefix, component in [("text_embedding_projection.", self.text_embedding_projection), ("video_embeddings_connector.", self.video_embeddings_connector), ("audio_embeddings_connector.", self.audio_embeddings_connector)]:
+            for prefix, component in [("text_embedding_projection.", self.text_embedding_projection)]:
                component_sd = {k.replace(prefix, ""): v for k, v in sdo.items() if k.startswith(prefix)}
                if component_sd:
                    missing, unexpected = component.load_state_dict(component_sd, strict=False, assign=getattr(self, "can_assign_sd", False))
--- a/comfy/utils.py
+++ b/comfy/utils.py
@ -1154,7 +1154,7 @@ def tiled_scale(samples, function, tile_x=64, tile_y=64, overlap = 8, upscale_am
    return tiled_scale_multidim(samples, function, (tile_y, tile_x), overlap=overlap, upscale_amount=upscale_amount, out_channels=out_channels, output_device=output_device, pbar=pbar)

 def model_trange(*args, **kwargs):
-    if comfy.memory_management.aimdo_allocator is None:
+    if not comfy.memory_management.aimdo_enabled:
        return trange(*args, **kwargs)

    pbar = trange(*args, **kwargs, smoothing=1.0)
--- a/comfy_extras/nodes_glsl.py
+++ b/comfy_extras/nodes_glsl.py
@ -716,12 +716,12 @@ def _render_shader_batch(
        gl.glBindFramebuffer(gl.GL_FRAMEBUFFER, 0)
        gl.glUseProgram(0)

-        if input_textures:
-            gl.glDeleteTextures(len(input_textures), input_textures)
-        if output_textures:
-            gl.glDeleteTextures(len(output_textures), output_textures)
-        if ping_pong_textures:
-            gl.glDeleteTextures(len(ping_pong_textures), ping_pong_textures)
+        for tex in input_textures:
+            gl.glDeleteTextures(tex)
+        for tex in output_textures:
+            gl.glDeleteTextures(tex)
+        for tex in ping_pong_textures:
+            gl.glDeleteTextures(tex)
        if fbo is not None:
            gl.glDeleteFramebuffers(1, [fbo])
        for pp_fbo in ping_pong_fbos:
--- a/comfy_extras/nodes_nag.py
+++ b/comfy_extras/nodes_nag.py
@ -10,7 +10,7 @@ class NAGuidance(io.ComfyNode):
            node_id="NAGuidance",
            display_name="Normalized Attention Guidance",
            description="Applies Normalized Attention Guidance to models, enabling negative prompts on distilled/schnell models.",
-            category="",
+            category="advanced/guidance",
            is_experimental=True,
            inputs=[
                io.Model.Input("model", tooltip="The model to apply NAG to."),
--- a/cuda_malloc.py
+++ b/cuda_malloc.py
@ -1,10 +1,8 @@
 import os
 import importlib.util
-from comfy.cli_args import args, PerformanceFeature, enables_dynamic_vram
+from comfy.cli_args import args, PerformanceFeature
 import subprocess

-import comfy_aimdo.control
-
 #Can't use pytorch to get the GPU names because the cuda malloc has to be set before the first import.
 def get_gpu_names():
    if os.name == 'nt':
@ -87,10 +85,6 @@ if not args.cuda_malloc:
    except:
        pass

-if enables_dynamic_vram() and comfy_aimdo.control.init():
-    args.cuda_malloc = False
-    os.environ['PYTORCH_CUDA_ALLOC_CONF'] = ""
-
 if args.disable_cuda_malloc:
    args.cuda_malloc = False

--- a/execution.py
+++ b/execution.py
@ -9,7 +9,6 @@ import traceback
 from enum import Enum
 from typing import List, Literal, NamedTuple, Optional, Union
 import asyncio
-from contextlib import nullcontext

 import torch

@ -521,19 +520,14 @@ async def execute(server, dynprompt, caches, current_item, extra_data, executed,
                # TODO - How to handle this with async functions without contextvars (which requires Python 3.12)?
                GraphBuilder.set_default_prefix(unique_id, call_index, 0)

-            #Do comfy_aimdo mempool chunking here on the per-node level. Multi-model workflows
-            #will cause all sorts of incompatible memory shapes to fragment the pytorch alloc
-            #that we just want to cull out each model run.
-            allocator = comfy.memory_management.aimdo_allocator
-            with nullcontext() if allocator is None else torch.cuda.use_mem_pool(torch.cuda.MemPool(allocator.allocator())):
-                try:
-                    output_data, output_ui, has_subgraph, has_pending_tasks = await get_output_data(prompt_id, unique_id, obj, input_data_all, execution_block_cb=execution_block_cb, pre_execute_cb=pre_execute_cb, v3_data=v3_data)
-                finally:
-                    if allocator is not None:
-                        if args.verbose == "DEBUG":
-                            comfy_aimdo.model_vbar.vbars_analyze()
-                        comfy.model_management.reset_cast_buffers()
-                        comfy_aimdo.model_vbar.vbars_reset_watermark_limits()
+            try:
+                output_data, output_ui, has_subgraph, has_pending_tasks = await get_output_data(prompt_id, unique_id, obj, input_data_all, execution_block_cb=execution_block_cb, pre_execute_cb=pre_execute_cb, v3_data=v3_data)
+            finally:
+                if comfy.memory_management.aimdo_enabled:
+                    if args.verbose == "DEBUG":
+                        comfy_aimdo.control.analyze()
+                    comfy.model_management.reset_cast_buffers()
+                    comfy_aimdo.model_vbar.vbars_reset_watermark_limits()

            if has_pending_tasks:
                pending_async_nodes[unique_id] = output_data
--- a/main.py
+++ b/main.py
@ -173,6 +173,10 @@ import gc
 if 'torch' in sys.modules:
    logging.warning("WARNING: Potential Error in code: Torch already imported, torch should never be imported before this point.")

+import comfy_aimdo.control
+
+if enables_dynamic_vram():
+    comfy_aimdo.control.init()

 import comfy.utils

@ -188,13 +192,9 @@ import hook_breaker_ac10a0
 import comfy.memory_management
 import comfy.model_patcher

-import comfy_aimdo.control
-import comfy_aimdo.torch
-
 if enables_dynamic_vram():
    if comfy.model_management.torch_version_numeric < (2, 8):
        logging.warning("Unsupported Pytorch detected. DynamicVRAM support requires Pytorch version 2.8 or later. Falling back to legacy ModelPatcher. VRAM estimates may be unreliable especially on Windows")
-        comfy.memory_management.aimdo_allocator = None
    elif comfy_aimdo.control.init_device(comfy.model_management.get_torch_device().index):
        if args.verbose == 'DEBUG':
            comfy_aimdo.control.set_log_debug()
@ -208,11 +208,10 @@ if enables_dynamic_vram():
            comfy_aimdo.control.set_log_info()

        comfy.model_patcher.CoreModelPatcher = comfy.model_patcher.ModelPatcherDynamic
-        comfy.memory_management.aimdo_allocator = comfy_aimdo.torch.get_torch_allocator()
+        comfy.memory_management.aimdo_enabled = True
        logging.info("DynamicVRAM support detected and enabled")
    else:
        logging.warning("No working comfy-aimdo install detected. DynamicVRAM support disabled. Falling back to legacy ModelPatcher. VRAM estimates may be unreliable especially on Windows")
-        comfy.memory_management.aimdo_allocator = None


 def cuda_malloc_warning():
--- a/requirements.txt
+++ b/requirements.txt
@ -22,7 +22,7 @@ alembic
 SQLAlchemy
 av>=14.2.0
 comfy-kitchen>=0.2.7
-comfy-aimdo>=0.1.8
+comfy-aimdo>=0.2.0
 requests

 #non essential dependencies: