2026-01-23 04:40:15 +08:00
12 changed files with 29 additions and 109 deletions
--- a/.github/workflows/test-ci.yml
+++ b/.github/workflows/test-ci.yml
@ -5,7 +5,6 @@ on:
  push:
    branches:
      - master
-      - release/**
    paths-ignore:
      - 'app/**'
      - 'input/**'
--- a/.github/workflows/test-execution.yml
+++ b/.github/workflows/test-execution.yml
@ -2,9 +2,9 @@ name: Execution Tests

 on:
  push:
-    branches: [ main, master, release/** ]
+    branches: [ main, master ]
  pull_request:
-    branches: [ main, master, release/** ]
+    branches: [ main, master ]

 jobs:
  test:
--- a/.github/workflows/test-launch.yml
+++ b/.github/workflows/test-launch.yml
@ -2,9 +2,9 @@ name: Test server launches without errors

 on:
  push:
-    branches: [ main, master, release/** ]
+    branches: [ main, master ]
  pull_request:
-    branches: [ main, master, release/** ]
+    branches: [ main, master ]

 jobs:
  test:
--- a/.github/workflows/test-unit.yml
+++ b/.github/workflows/test-unit.yml
@ -2,9 +2,9 @@ name: Unit Tests

 on:
  push:
-    branches: [ main, master, release/** ]
+    branches: [ main, master ]
  pull_request:
-    branches: [ main, master, release/** ]
+    branches: [ main, master ]

 jobs:
  test:
--- a/.github/workflows/update-version.yml
+++ b/.github/workflows/update-version.yml
@ -6,7 +6,6 @@ on:
      - "pyproject.toml"
    branches:
      - master
-      - release/**

 jobs:
  update-version:
--- a/comfy/ldm/lumina/model.py
+++ b/comfy/ldm/lumina/model.py
@ -634,11 +634,8 @@ class NextDiT(nn.Module):
        img, mask, img_size, cap_size, freqs_cis = self.patchify_and_embed(x, cap_feats, cap_mask, adaln_input, num_tokens, transformer_options=transformer_options)
        freqs_cis = freqs_cis.to(img.device)

-        transformer_options["total_blocks"] = len(self.layers)
-        transformer_options["block_type"] = "double"
        img_input = img
        for i, layer in enumerate(self.layers):
-            transformer_options["block_index"] = i
            img = layer(img, mask, freqs_cis, adaln_input, transformer_options=transformer_options)
            if "double_block" in patches:
                for p in patches["double_block"]:
--- a/comfy/ldm/qwen_image/model.py
+++ b/comfy/ldm/qwen_image/model.py
@ -322,7 +322,6 @@ class QwenImageTransformer2DModel(nn.Module):
        pooled_projection_dim: int = 768,
        guidance_embeds: bool = False,
        axes_dims_rope: Tuple[int, int, int] = (16, 56, 56),
-        default_ref_method="index",
        image_model=None,
        final_layer=True,
        dtype=None,
@ -335,7 +334,6 @@ class QwenImageTransformer2DModel(nn.Module):
        self.in_channels = in_channels
        self.out_channels = out_channels or in_channels
        self.inner_dim = num_attention_heads * attention_head_dim
-        self.default_ref_method = default_ref_method

        self.pe_embedder = EmbedND(dim=attention_head_dim, theta=10000, axes_dim=list(axes_dims_rope))

@ -418,7 +416,7 @@ class QwenImageTransformer2DModel(nn.Module):
            h = 0
            w = 0
            index = 0
-            ref_method = kwargs.get("ref_latents_method", self.default_ref_method)
+            ref_method = kwargs.get("ref_latents_method", "index")
            index_ref_method = (ref_method == "index") or (ref_method == "index_timestep_zero")
            timestep_zero = ref_method == "index_timestep_zero"
            for ref in ref_latents:
--- a/comfy/ldm/wan/model.py
+++ b/comfy/ldm/wan/model.py
@ -568,10 +568,7 @@ class WanModel(torch.nn.Module):

        patches_replace = transformer_options.get("patches_replace", {})
        blocks_replace = patches_replace.get("dit", {})
-        transformer_options["total_blocks"] = len(self.blocks)
-        transformer_options["block_type"] = "double"
        for i, block in enumerate(self.blocks):
-            transformer_options["block_index"] = i
            if ("double_block", i) in blocks_replace:
                def block_wrap(args):
                    out = {}
@ -766,10 +763,7 @@ class VaceWanModel(WanModel):

        patches_replace = transformer_options.get("patches_replace", {})
        blocks_replace = patches_replace.get("dit", {})
-        transformer_options["total_blocks"] = len(self.blocks)
-        transformer_options["block_type"] = "double"
        for i, block in enumerate(self.blocks):
-            transformer_options["block_index"] = i
            if ("double_block", i) in blocks_replace:
                def block_wrap(args):
                    out = {}
@ -868,10 +862,7 @@ class CameraWanModel(WanModel):

        patches_replace = transformer_options.get("patches_replace", {})
        blocks_replace = patches_replace.get("dit", {})
-        transformer_options["total_blocks"] = len(self.blocks)
-        transformer_options["block_type"] = "double"
        for i, block in enumerate(self.blocks):
-            transformer_options["block_index"] = i
            if ("double_block", i) in blocks_replace:
                def block_wrap(args):
                    out = {}
@ -1335,19 +1326,16 @@ class WanModel_S2V(WanModel):

        patches_replace = transformer_options.get("patches_replace", {})
        blocks_replace = patches_replace.get("dit", {})
-        transformer_options["total_blocks"] = len(self.blocks)
-        transformer_options["block_type"] = "double"
        for i, block in enumerate(self.blocks):
-            transformer_options["block_index"] = i
            if ("double_block", i) in blocks_replace:
                def block_wrap(args):
                    out = {}
-                    out["img"] = block(args["img"], context=args["txt"], e=args["vec"], freqs=args["pe"], transformer_options=args["transformer_options"])
+                    out["img"] = block(args["img"], context=args["txt"], e=args["vec"], freqs=args["pe"])
                    return out
-                out = blocks_replace[("double_block", i)]({"img": x, "txt": context, "vec": e0, "pe": freqs, "transformer_options": transformer_options}, {"original_block": block_wrap})
+                out = blocks_replace[("double_block", i)]({"img": x, "txt": context, "vec": e0, "pe": freqs}, {"original_block": block_wrap})
                x = out["img"]
            else:
-                x = block(x, e=e0, freqs=freqs, context=context, transformer_options=transformer_options)
+                x = block(x, e=e0, freqs=freqs, context=context)
            if audio_emb is not None:
                x = self.audio_injector(x, i, audio_emb, audio_emb_global, seq_len)
        # head
@ -1586,10 +1574,7 @@ class HumoWanModel(WanModel):

        patches_replace = transformer_options.get("patches_replace", {})
        blocks_replace = patches_replace.get("dit", {})
-        transformer_options["total_blocks"] = len(self.blocks)
-        transformer_options["block_type"] = "double"
        for i, block in enumerate(self.blocks):
-            transformer_options["block_index"] = i
            if ("double_block", i) in blocks_replace:
                def block_wrap(args):
                    out = {}
--- a/comfy/ldm/wan/model_animate.py
+++ b/comfy/ldm/wan/model_animate.py
@ -523,10 +523,7 @@ class AnimateWanModel(WanModel):

        patches_replace = transformer_options.get("patches_replace", {})
        blocks_replace = patches_replace.get("dit", {})
-        transformer_options["total_blocks"] = len(self.blocks)
-        transformer_options["block_type"] = "double"
        for i, block in enumerate(self.blocks):
-            transformer_options["block_index"] = i
            if ("double_block", i) in blocks_replace:
                def block_wrap(args):
                    out = {}
--- a/comfy/supported_models.py
+++ b/comfy/supported_models.py
@ -28,7 +28,6 @@ from . import supported_models_base
 from . import latent_formats

 from . import diffusers_convert
-import comfy.model_management

 class SD15(supported_models_base.BASE):
    unet_config = {
@ -1029,13 +1028,7 @@ class ZImage(Lumina2):

    memory_usage_factor = 2.0

-    supported_inference_dtypes = [torch.bfloat16, torch.float32]
-
-    def __init__(self, unet_config):
-        super().__init__(unet_config)
-        if comfy.model_management.extended_fp16_support():
-            self.supported_inference_dtypes = self.supported_inference_dtypes.copy()
-            self.supported_inference_dtypes.insert(1, torch.float16)
+    supported_inference_dtypes = [torch.bfloat16, torch.float16, torch.float32]

    def clip_target(self, state_dict={}):
        pref = self.text_encoder_key_prefix[0]
--- a/comfy_extras/nodes_model_patch.py
+++ b/comfy_extras/nodes_model_patch.py
@ -248,10 +248,7 @@ class ModelPatchLoader:
                config['n_control_layers'] = 15
                config['additional_in_dim'] = 17
                config['refiner_control'] = True
-                ref_weight = sd.get("control_noise_refiner.0.after_proj.weight", None)
-                if ref_weight is not None:
-                    if torch.count_nonzero(ref_weight) == 0:
-                        config['broken'] = True
+                config['broken'] = True
            model = comfy.ldm.lumina.controlnet.ZImage_Control(device=comfy.model_management.unet_offload_device(), dtype=dtype, operations=comfy.ops.manual_cast, **config)

        model.load_state_dict(sd)
@ -313,46 +310,22 @@ class ZImageControlPatch:
        self.inpaint_image = inpaint_image
        self.mask = mask
        self.strength = strength
-        self.is_inpaint = self.model_patch.model.additional_in_dim > 0
-
-        skip_encoding = False
-        if self.image is not None and self.inpaint_image is not None:
-            if self.image.shape != self.inpaint_image.shape:
-                skip_encoding = True
-
-        if skip_encoding:
-            self.encoded_image = None
-        else:
-            self.encoded_image = self.encode_latent_cond(self.image, self.inpaint_image)
-            if self.image is None:
-                self.encoded_image_size = (self.inpaint_image.shape[1], self.inpaint_image.shape[2])
-            else:
-                self.encoded_image_size = (self.image.shape[1], self.image.shape[2])
+        self.encoded_image = self.encode_latent_cond(image)
+        self.encoded_image_size = (image.shape[1], image.shape[2])
        self.temp_data = None

-    def encode_latent_cond(self, control_image=None, inpaint_image=None):
-        latent_image = None
-        if control_image is not None:
-            latent_image = comfy.latent_formats.Flux().process_in(self.vae.encode(control_image))
-
-        if self.is_inpaint:
+    def encode_latent_cond(self, control_image, inpaint_image=None):
+        latent_image = comfy.latent_formats.Flux().process_in(self.vae.encode(control_image))
+        if self.model_patch.model.additional_in_dim > 0:
+            if self.mask is None:
+                mask_ = torch.zeros_like(latent_image)[:, :1]
+            else:
+                mask_ = comfy.utils.common_upscale(self.mask.mean(dim=1, keepdim=True), latent_image.shape[-1], latent_image.shape[-2], "bilinear", "none")
            if inpaint_image is None:
                inpaint_image = torch.ones_like(control_image) * 0.5

-            if self.mask is not None:
-                mask_inpaint = comfy.utils.common_upscale(self.mask.view(self.mask.shape[0], -1, self.mask.shape[-2], self.mask.shape[-1]).mean(dim=1, keepdim=True), inpaint_image.shape[-2], inpaint_image.shape[-3], "bilinear", "center")
-                inpaint_image = ((inpaint_image - 0.5) * mask_inpaint.movedim(1, -1).round()) + 0.5
-
            inpaint_image_latent = comfy.latent_formats.Flux().process_in(self.vae.encode(inpaint_image))

-            if self.mask is None:
-                mask_ = torch.zeros_like(inpaint_image_latent)[:, :1]
-            else:
-                mask_ = comfy.utils.common_upscale(self.mask.view(self.mask.shape[0], -1, self.mask.shape[-2], self.mask.shape[-1]).mean(dim=1, keepdim=True), inpaint_image_latent.shape[-1], inpaint_image_latent.shape[-2], "nearest", "center")
-
-            if latent_image is None:
-                latent_image = comfy.latent_formats.Flux().process_in(self.vae.encode(torch.ones_like(inpaint_image) * 0.5))
-
            return torch.cat([latent_image, mask_, inpaint_image_latent], dim=1)
        else:
            return latent_image
@ -368,18 +341,13 @@ class ZImageControlPatch:
        block_type = kwargs.get("block_type", "")
        spacial_compression = self.vae.spacial_compression_encode()
        if self.encoded_image is None or self.encoded_image_size != (x.shape[-2] * spacial_compression, x.shape[-1] * spacial_compression):
-            image_scaled = None
-            if self.image is not None:
-                image_scaled = comfy.utils.common_upscale(self.image.movedim(-1, 1), x.shape[-1] * spacial_compression, x.shape[-2] * spacial_compression, "area", "center").movedim(1, -1)
-                self.encoded_image_size = (image_scaled.shape[-3], image_scaled.shape[-2])
-
+            image_scaled = comfy.utils.common_upscale(self.image.movedim(-1, 1), x.shape[-1] * spacial_compression, x.shape[-2] * spacial_compression, "area", "center")
            inpaint_scaled = None
            if self.inpaint_image is not None:
                inpaint_scaled = comfy.utils.common_upscale(self.inpaint_image.movedim(-1, 1), x.shape[-1] * spacial_compression, x.shape[-2] * spacial_compression, "area", "center").movedim(1, -1)
-                self.encoded_image_size = (inpaint_scaled.shape[-3], inpaint_scaled.shape[-2])
-
            loaded_models = comfy.model_management.loaded_models(only_currently_used=True)
-            self.encoded_image = self.encode_latent_cond(image_scaled, inpaint_scaled)
+            self.encoded_image = self.encode_latent_cond(image_scaled.movedim(1, -1), inpaint_scaled)
+            self.encoded_image_size = (image_scaled.shape[-2], image_scaled.shape[-1])
            comfy.model_management.load_models_gpu(loaded_models)

        cnet_blocks = self.model_patch.model.n_control_layers
@ -420,8 +388,7 @@ class ZImageControlPatch:

    def to(self, device_or_dtype):
        if isinstance(device_or_dtype, torch.device):
-            if self.encoded_image is not None:
-                self.encoded_image = self.encoded_image.to(device_or_dtype)
+            self.encoded_image = self.encoded_image.to(device_or_dtype)
            self.temp_data = None
        return self

@ -444,12 +411,9 @@ class QwenImageDiffsynthControlnet:

    CATEGORY = "advanced/loaders/qwen"

-    def diffsynth_controlnet(self, model, model_patch, vae, image=None, strength=1.0, inpaint_image=None, mask=None):
+    def diffsynth_controlnet(self, model, model_patch, vae, image, strength, mask=None):
        model_patched = model.clone()
-        if image is not None:
-            image = image[:, :, :, :3]
-        if inpaint_image is not None:
-            inpaint_image = inpaint_image[:, :, :, :3]
+        image = image[:, :, :, :3]
        if mask is not None:
            if mask.ndim == 3:
                mask = mask.unsqueeze(1)
@ -458,24 +422,13 @@ class QwenImageDiffsynthControlnet:
            mask = 1.0 - mask

        if isinstance(model_patch.model, comfy.ldm.lumina.controlnet.ZImage_Control):
-            patch = ZImageControlPatch(model_patch, vae, image, strength, inpaint_image=inpaint_image, mask=mask)
+            patch = ZImageControlPatch(model_patch, vae, image, strength, mask=mask)
            model_patched.set_model_noise_refiner_patch(patch)
            model_patched.set_model_double_block_patch(patch)
        else:
            model_patched.set_model_double_block_patch(DiffSynthCnetPatch(model_patch, vae, image, strength, mask))
        return (model_patched,)

-class ZImageFunControlnet(QwenImageDiffsynthControlnet):
-    @classmethod
-    def INPUT_TYPES(s):
-        return {"required": { "model": ("MODEL",),
-                              "model_patch": ("MODEL_PATCH",),
-                              "vae": ("VAE",),
-                              "strength": ("FLOAT", {"default": 1.0, "min": -10.0, "max": 10.0, "step": 0.01}),
-                              },
-                "optional": {"image": ("IMAGE",), "inpaint_image": ("IMAGE",), "mask": ("MASK",)}}
-
-    CATEGORY = "advanced/loaders/zimage"

 class UsoStyleProjectorPatch:
    def __init__(self, model_patch, encoded_image):
@ -523,6 +476,5 @@ class USOStyleReference:
 NODE_CLASS_MAPPINGS = {
    "ModelPatchLoader": ModelPatchLoader,
    "QwenImageDiffsynthControlnet": QwenImageDiffsynthControlnet,
-    "ZImageFunControlnet": ZImageFunControlnet,
    "USOStyleReference": USOStyleReference,
 }
--- a/requirements.txt
+++ b/requirements.txt
@ -1,4 +1,4 @@
-comfyui-frontend-package==1.34.9
+comfyui-frontend-package==1.34.8
 comfyui-workflow-templates==0.7.59
 comfyui-embedded-docs==0.3.1
 torch