restored whitespace and fixed logging

2026-03-10 11:47:34 +08:00 · 2025-11-04 15:46:52 -08:00 · 2025-11-04 15:46:52 -08:00 · 16adfe2153
commit 16adfe2153
parent 2d550102fc
3 changed files with 30 additions and 21 deletions
--- a/comfy/ldm/qwen_image/model.py
+++ b/comfy/ldm/qwen_image/model.py
@ -12,8 +12,6 @@ from comfy.ldm.flux.layers import EmbedND
 import comfy.ldm.common_dit
 import comfy.patcher_extension

-logger = logging.getLogger(__name__)
-

 class GELU(nn.Module):
    def __init__(self, dim_in: int, dim_out: int, approximate: str = "none", bias: bool = True, dtype=None, device=None, operations=None):
@ -401,7 +399,7 @@ class QwenImageTransformer2DModel(nn.Module):
        has_negative = cond_or_uncond and 1 in cond_or_uncond
        is_cfg_batched = has_positive and has_negative

-        logger.debug(
+        logging.debug(
            f"[EliGen Model] Processing {num_entities} entities for {height}x{width}px, "
            f"batch_size={actual_batch_size}, CFG_batched={is_cfg_batched}"
        )
@ -457,7 +455,7 @@ class QwenImageTransformer2DModel(nn.Module):

        img_rope = self.pe_embedder(img_ids).squeeze(1).squeeze(0)

-        logger.debug(f"[EliGen Model] RoPE shapes - img: {img_rope.shape}, txt: {txt_rotary_emb.shape}")
+        logging.debug(f"[EliGen Model] RoPE shapes - img: {img_rope.shape}, txt: {txt_rotary_emb.shape}")

        # Concatenate text and image RoPE embeddings
        # Convert to latent dtype to match queries/keys
@ -473,7 +471,7 @@ class QwenImageTransformer2DModel(nn.Module):
        if entity_masks.shape[3] != padded_h or entity_masks.shape[4] != padded_w:
            pad_h = padded_h - entity_masks.shape[3]
            pad_w = padded_w - entity_masks.shape[4]
-            logger.debug(f"[EliGen Model] Padding masks by ({pad_h}, {pad_w})")
+            logging.debug(f"[EliGen Model] Padding masks by ({pad_h}, {pad_w})")
            entity_masks = torch.nn.functional.pad(entity_masks, (0, pad_w, 0, pad_h), mode='constant', value=0)

        entity_masks = [entity_masks[:, i, None].squeeze(1) for i in range(max_masks)]
@ -488,7 +486,7 @@ class QwenImageTransformer2DModel(nn.Module):
        seq_lens = entity_seq_lens + [global_seq_len]
        total_seq_len = int(sum(seq_lens) + image.shape[1])

-        logger.debug(f"[EliGen Model] total_seq={total_seq_len}")
+        logging.debug(f"[EliGen Model] total_seq={total_seq_len}")

        patched_masks = []
        for i in range(N):
@ -557,7 +555,7 @@ class QwenImageTransformer2DModel(nn.Module):
            # CFG batch: [positive, negative] - need different masks for each
            # Positive gets entity constraints, negative gets standard attention (all zeros)

-            logger.debug(
+            logging.debug(
                "[EliGen Model] CFG batched detected - creating separate masks. "
                "Positive (index 0) gets entity mask, Negative (index 1) gets standard mask"
            )
@ -576,7 +574,7 @@ class QwenImageTransformer2DModel(nn.Module):
            # Concatenate masks to match batch
            attention_mask = torch.cat(mask_list, dim=0)

-            logger.debug(
+            logging.debug(
                f"[EliGen Model] Created {len(mask_list)} masks for CFG batch. "
                f"Final shape: {attention_mask.shape}"
            )
@ -584,7 +582,7 @@ class QwenImageTransformer2DModel(nn.Module):
        # Add head dimension: [B, 1, seq, seq]
        attention_mask = attention_mask.unsqueeze(1)

-        logger.debug(
+        logging.debug(
            f"[EliGen Model] Attention mask created: shape={attention_mask.shape}, "
            f"valid_connections={num_valid_connections}/{total_seq_len * total_seq_len}"
        )
@ -654,7 +652,7 @@ class QwenImageTransformer2DModel(nn.Module):
        batch_size = x.shape[0]

        if entity_prompt_emb is not None:
-            logger.debug(
+            logging.debug(
                f"[EliGen Forward] batch_size={batch_size}, cond_or_uncond={cond_or_uncond}, "
                f"has_positive={is_positive_cond}, has_negative={is_negative_cond}"
            )
--- a/comfy/model_base.py
+++ b/comfy/model_base.py
@ -119,6 +119,7 @@ def convert_tensor(extra, dtype, device):
            extra = comfy.model_management.cast_to_device(extra, device, None)
    return extra

+
 class BaseModel(torch.nn.Module):
    def __init__(self, model_config, model_type=ModelType.EPS, device=None, unet_model=UNetModel):
        super().__init__()
@ -380,6 +381,7 @@ class BaseModel(torch.nn.Module):
    def extra_conds_shapes(self, **kwargs):
        return {}

+
 def unclip_adm(unclip_conditioning, device, noise_augmentor, noise_augment_merge=0.0, seed=None):
    adm_inputs = []
    weights = []
@ -475,6 +477,7 @@ class SDXL(BaseModel):
        flat = torch.flatten(torch.cat(out)).unsqueeze(dim=0).repeat(clip_pooled.shape[0], 1)
        return torch.cat((clip_pooled.to(flat.device), flat), dim=1)

+
 class SVD_img2vid(BaseModel):
    def __init__(self, model_config, model_type=ModelType.V_PREDICTION_EDM, device=None):
        super().__init__(model_config, model_type, device=device)
@ -551,6 +554,7 @@ class SV3D_p(SVD_img2vid):
        out = list(map(lambda a: utils.resize_to_batch_size(a, noise.shape[0]), out))
        return torch.cat(out, dim=1)

+
 class Stable_Zero123(BaseModel):
    def __init__(self, model_config, model_type=ModelType.EPS, device=None, cc_projection_weight=None, cc_projection_bias=None):
        super().__init__(model_config, model_type, device=device)
@ -634,11 +638,13 @@ class IP2P:
        image = utils.resize_to_batch_size(image, noise.shape[0])
        return self.process_ip2p_image_in(image)

+
 class SD15_instructpix2pix(IP2P, BaseModel):
    def __init__(self, model_config, model_type=ModelType.EPS, device=None):
        super().__init__(model_config, model_type, device=device)
        self.process_ip2p_image_in = lambda image: image

+
 class SDXL_instructpix2pix(IP2P, SDXL):
    def __init__(self, model_config, model_type=ModelType.EPS, device=None):
        super().__init__(model_config, model_type, device=device)
@ -688,6 +694,7 @@ class StableCascade_C(BaseModel):
            out['clip_text'] = comfy.conds.CONDCrossAttn(cross_attn)
        return out

+
 class StableCascade_B(BaseModel):
    def __init__(self, model_config, model_type=ModelType.STABLE_CASCADE, device=None):
        super().__init__(model_config, model_type, device=device, unet_model=StageB)
@ -707,6 +714,7 @@ class StableCascade_B(BaseModel):
        out["sca"] = comfy.conds.CONDRegular(torch.zeros((1,)))
        return out

+
 class SD3(BaseModel):
    def __init__(self, model_config, model_type=ModelType.FLOW, device=None):
        super().__init__(model_config, model_type, device=device, unet_model=OpenAISignatureMMDITWrapper)
@ -721,6 +729,7 @@ class SD3(BaseModel):
            out['c_crossattn'] = comfy.conds.CONDRegular(cross_attn)
        return out

+
 class AuraFlow(BaseModel):
    def __init__(self, model_config, model_type=ModelType.FLOW, device=None):
        super().__init__(model_config, model_type, device=device, unet_model=comfy.ldm.aura.mmdit.MMDiT)
@ -732,6 +741,7 @@ class AuraFlow(BaseModel):
            out['c_crossattn'] = comfy.conds.CONDRegular(cross_attn)
        return out

+
 class StableAudio1(BaseModel):
    def __init__(self, model_config, seconds_start_embedder_weights, seconds_total_embedder_weights, model_type=ModelType.V_PREDICTION_CONTINUOUS, device=None):
        super().__init__(model_config, model_type, device=device, unet_model=comfy.ldm.audio.dit.AudioDiffusionTransformer)
@ -770,6 +780,7 @@ class StableAudio1(BaseModel):
                sd["{}{}".format(k, l)] = s[l]
        return sd

+
 class HunyuanDiT(BaseModel):
    def __init__(self, model_config, model_type=ModelType.V_PREDICTION, device=None):
        super().__init__(model_config, model_type, device=device, unet_model=comfy.ldm.hydit.models.HunYuanDiT)
@ -903,6 +914,7 @@ class Flux(BaseModel):
            out['ref_latents'] = list([1, 16, sum(map(lambda a: math.prod(a.size()), ref_latents)) // 16])
        return out

+
 class GenmoMochi(BaseModel):
    def __init__(self, model_config, model_type=ModelType.FLOW, device=None):
        super().__init__(model_config, model_type, device=device, unet_model=comfy.ldm.genmo.joint_model.asymm_models_joint.AsymmDiTJoint)
@ -1154,6 +1166,7 @@ class WAN21(BaseModel):

        return out

+
 class WAN21_Vace(WAN21):
    def __init__(self, model_config, model_type=ModelType.FLOW, image_to_video=False, device=None):
        super(WAN21, self).__init__(model_config, model_type, device=device, unet_model=comfy.ldm.wan.model.VaceWanModel)
--- a/comfy_extras/nodes_qwen.py
+++ b/comfy_extras/nodes_qwen.py
@ -8,8 +8,6 @@ from typing import Optional
 from typing_extensions import override
 from comfy_api.latest import ComfyExtension, io

-logger = logging.getLogger(__name__)
-

 class TextEncodeQwenImageEdit(io.ComfyNode):
    @classmethod
@ -186,8 +184,8 @@ class TextEncodeQwenImageEliGen(io.ComfyNode):
        width = latent_width * 8

        if pad_h > 0 or pad_w > 0:
-            logger.debug(f"[EliGen] Latent padding detected: {unpadded_latent_height}x{unpadded_latent_width} → {latent_height}x{latent_width}")
-        logger.debug(f"[EliGen] Target generation dimensions: {height}x{width} pixels ({latent_height}x{latent_width} latent)")
+            logging.debug(f"[EliGen] Latent padding detected: {unpadded_latent_height}x{unpadded_latent_width} → {latent_height}x{latent_width}")
+        logging.debug(f"[EliGen] Target generation dimensions: {height}x{width} pixels ({latent_height}x{latent_width} latent)")

        # Collect entity prompts and masks
        entity_prompts = [entity_prompt_1, entity_prompt_2, entity_prompt_3]
@ -202,7 +200,7 @@ class TextEncodeQwenImageEliGen(io.ComfyNode):
        # Log warning if some entities were skipped
        total_prompts_provided = len([p for p in entity_prompts if p.strip()])
        if len(valid_entities) < total_prompts_provided:
-            logger.warning(f"[EliGen] Only {len(valid_entities)} of {total_prompts_provided} entity prompts have valid masks")
+            logging.warning(f"[EliGen] Only {len(valid_entities)} of {total_prompts_provided} entity prompts have valid masks")

        # If no valid entities, return standard conditioning
        if len(valid_entities) == 0:
@ -244,7 +242,7 @@ class TextEncodeQwenImageEliGen(io.ComfyNode):
                )

            # Log original mask statistics
-            logger.debug(
+            logging.debug(
                f"[EliGen] Entity {i+1} input mask: shape={mask_tensor.shape}, "
                f"dtype={mask_tensor.dtype}, min={mask_tensor.min():.4f}, max={mask_tensor.max():.4f}"
            )
@ -260,7 +258,7 @@ class TextEncodeQwenImageEliGen(io.ComfyNode):

            # Check for constant masks (no variation)
            if mask_tensor.min() == mask_tensor.max() and mask_tensor.max() > 0:
-                logger.warning(
+                logging.warning(
                    f"[EliGen] Entity {i+1} mask has no variation (all pixels = {mask_tensor.min():.4f}). "
                    f"This entity will affect the entire image."
                )
@ -284,12 +282,12 @@ class TextEncodeQwenImageEliGen(io.ComfyNode):
            # Log size mismatch if mask doesn't match expected latent dimensions
            expected_h, expected_w = latent_height * 8, latent_width * 8
            if orig_h != expected_h or orig_w != expected_w:
-                logger.info(
+                logging.info(
                    f"[EliGen] Entity {i+1} mask size mismatch: {orig_h}x{orig_w} vs expected {expected_h}x{expected_w}. "
                    f"Will resize to {latent_height}x{latent_width} latent space."
                )
            else:
-                logger.debug(f"[EliGen] Entity {i+1} mask: {orig_h}x{orig_w} → will resize to {latent_height}x{latent_width} latent")
+                logging.debug(f"[EliGen] Entity {i+1} mask: {orig_h}x{orig_w} → will resize to {latent_height}x{latent_width} latent")

            # Convert MASK format [batch, height, width] to [batch, 1, height, width] for common_upscale
            # common_upscale expects [batch, channels, height, width]
@ -319,7 +317,7 @@ class TextEncodeQwenImageEliGen(io.ComfyNode):
                    f"Original mask may have been too small or all black."
                )

-            logger.debug(
+            logging.debug(
                f"[EliGen] Entity {i+1} mask coverage: {active_pixels}/{total_pixels} pixels ({coverage_pct:.1f}%)"
            )

@ -332,7 +330,7 @@ class TextEncodeQwenImageEliGen(io.ComfyNode):
        entity_masks_tensor = torch.stack(processed_entity_masks_no_batch, dim=0)  # [num_entities, 1, H, W]
        entity_masks_tensor = entity_masks_tensor.unsqueeze(0)  # [1, num_entities, 1, H, W]

-        logger.debug(
+        logging.debug(
            f"[EliGen] Stacked {len(valid_entities)} entity masks into tensor: "
            f"shape={entity_masks_tensor.shape} (expected: [1, {len(valid_entities)}, 1, {latent_height}, {latent_width}])"
        )