From 16adfe2153e03b437e39bb81ab3909d25ffdde6c Mon Sep 17 00:00:00 2001 From: nolan4 Date: Tue, 4 Nov 2025 15:46:52 -0800 Subject: [PATCH] restored whitespace and fixed logging --- comfy/ldm/qwen_image/model.py | 18 ++++++++---------- comfy/model_base.py | 13 +++++++++++++ comfy_extras/nodes_qwen.py | 20 +++++++++----------- 3 files changed, 30 insertions(+), 21 deletions(-) diff --git a/comfy/ldm/qwen_image/model.py b/comfy/ldm/qwen_image/model.py index ffa4743dd..461dde58f 100644 --- a/comfy/ldm/qwen_image/model.py +++ b/comfy/ldm/qwen_image/model.py @@ -12,8 +12,6 @@ from comfy.ldm.flux.layers import EmbedND import comfy.ldm.common_dit import comfy.patcher_extension -logger = logging.getLogger(__name__) - class GELU(nn.Module): def __init__(self, dim_in: int, dim_out: int, approximate: str = "none", bias: bool = True, dtype=None, device=None, operations=None): @@ -401,7 +399,7 @@ class QwenImageTransformer2DModel(nn.Module): has_negative = cond_or_uncond and 1 in cond_or_uncond is_cfg_batched = has_positive and has_negative - logger.debug( + logging.debug( f"[EliGen Model] Processing {num_entities} entities for {height}x{width}px, " f"batch_size={actual_batch_size}, CFG_batched={is_cfg_batched}" ) @@ -457,7 +455,7 @@ class QwenImageTransformer2DModel(nn.Module): img_rope = self.pe_embedder(img_ids).squeeze(1).squeeze(0) - logger.debug(f"[EliGen Model] RoPE shapes - img: {img_rope.shape}, txt: {txt_rotary_emb.shape}") + logging.debug(f"[EliGen Model] RoPE shapes - img: {img_rope.shape}, txt: {txt_rotary_emb.shape}") # Concatenate text and image RoPE embeddings # Convert to latent dtype to match queries/keys @@ -473,7 +471,7 @@ class QwenImageTransformer2DModel(nn.Module): if entity_masks.shape[3] != padded_h or entity_masks.shape[4] != padded_w: pad_h = padded_h - entity_masks.shape[3] pad_w = padded_w - entity_masks.shape[4] - logger.debug(f"[EliGen Model] Padding masks by ({pad_h}, {pad_w})") + logging.debug(f"[EliGen Model] Padding masks by ({pad_h}, {pad_w})") entity_masks = torch.nn.functional.pad(entity_masks, (0, pad_w, 0, pad_h), mode='constant', value=0) entity_masks = [entity_masks[:, i, None].squeeze(1) for i in range(max_masks)] @@ -488,7 +486,7 @@ class QwenImageTransformer2DModel(nn.Module): seq_lens = entity_seq_lens + [global_seq_len] total_seq_len = int(sum(seq_lens) + image.shape[1]) - logger.debug(f"[EliGen Model] total_seq={total_seq_len}") + logging.debug(f"[EliGen Model] total_seq={total_seq_len}") patched_masks = [] for i in range(N): @@ -557,7 +555,7 @@ class QwenImageTransformer2DModel(nn.Module): # CFG batch: [positive, negative] - need different masks for each # Positive gets entity constraints, negative gets standard attention (all zeros) - logger.debug( + logging.debug( "[EliGen Model] CFG batched detected - creating separate masks. " "Positive (index 0) gets entity mask, Negative (index 1) gets standard mask" ) @@ -576,7 +574,7 @@ class QwenImageTransformer2DModel(nn.Module): # Concatenate masks to match batch attention_mask = torch.cat(mask_list, dim=0) - logger.debug( + logging.debug( f"[EliGen Model] Created {len(mask_list)} masks for CFG batch. " f"Final shape: {attention_mask.shape}" ) @@ -584,7 +582,7 @@ class QwenImageTransformer2DModel(nn.Module): # Add head dimension: [B, 1, seq, seq] attention_mask = attention_mask.unsqueeze(1) - logger.debug( + logging.debug( f"[EliGen Model] Attention mask created: shape={attention_mask.shape}, " f"valid_connections={num_valid_connections}/{total_seq_len * total_seq_len}" ) @@ -654,7 +652,7 @@ class QwenImageTransformer2DModel(nn.Module): batch_size = x.shape[0] if entity_prompt_emb is not None: - logger.debug( + logging.debug( f"[EliGen Forward] batch_size={batch_size}, cond_or_uncond={cond_or_uncond}, " f"has_positive={is_positive_cond}, has_negative={is_negative_cond}" ) diff --git a/comfy/model_base.py b/comfy/model_base.py index 9a4010843..ab30a6f97 100644 --- a/comfy/model_base.py +++ b/comfy/model_base.py @@ -119,6 +119,7 @@ def convert_tensor(extra, dtype, device): extra = comfy.model_management.cast_to_device(extra, device, None) return extra + class BaseModel(torch.nn.Module): def __init__(self, model_config, model_type=ModelType.EPS, device=None, unet_model=UNetModel): super().__init__() @@ -380,6 +381,7 @@ class BaseModel(torch.nn.Module): def extra_conds_shapes(self, **kwargs): return {} + def unclip_adm(unclip_conditioning, device, noise_augmentor, noise_augment_merge=0.0, seed=None): adm_inputs = [] weights = [] @@ -475,6 +477,7 @@ class SDXL(BaseModel): flat = torch.flatten(torch.cat(out)).unsqueeze(dim=0).repeat(clip_pooled.shape[0], 1) return torch.cat((clip_pooled.to(flat.device), flat), dim=1) + class SVD_img2vid(BaseModel): def __init__(self, model_config, model_type=ModelType.V_PREDICTION_EDM, device=None): super().__init__(model_config, model_type, device=device) @@ -551,6 +554,7 @@ class SV3D_p(SVD_img2vid): out = list(map(lambda a: utils.resize_to_batch_size(a, noise.shape[0]), out)) return torch.cat(out, dim=1) + class Stable_Zero123(BaseModel): def __init__(self, model_config, model_type=ModelType.EPS, device=None, cc_projection_weight=None, cc_projection_bias=None): super().__init__(model_config, model_type, device=device) @@ -634,11 +638,13 @@ class IP2P: image = utils.resize_to_batch_size(image, noise.shape[0]) return self.process_ip2p_image_in(image) + class SD15_instructpix2pix(IP2P, BaseModel): def __init__(self, model_config, model_type=ModelType.EPS, device=None): super().__init__(model_config, model_type, device=device) self.process_ip2p_image_in = lambda image: image + class SDXL_instructpix2pix(IP2P, SDXL): def __init__(self, model_config, model_type=ModelType.EPS, device=None): super().__init__(model_config, model_type, device=device) @@ -688,6 +694,7 @@ class StableCascade_C(BaseModel): out['clip_text'] = comfy.conds.CONDCrossAttn(cross_attn) return out + class StableCascade_B(BaseModel): def __init__(self, model_config, model_type=ModelType.STABLE_CASCADE, device=None): super().__init__(model_config, model_type, device=device, unet_model=StageB) @@ -707,6 +714,7 @@ class StableCascade_B(BaseModel): out["sca"] = comfy.conds.CONDRegular(torch.zeros((1,))) return out + class SD3(BaseModel): def __init__(self, model_config, model_type=ModelType.FLOW, device=None): super().__init__(model_config, model_type, device=device, unet_model=OpenAISignatureMMDITWrapper) @@ -721,6 +729,7 @@ class SD3(BaseModel): out['c_crossattn'] = comfy.conds.CONDRegular(cross_attn) return out + class AuraFlow(BaseModel): def __init__(self, model_config, model_type=ModelType.FLOW, device=None): super().__init__(model_config, model_type, device=device, unet_model=comfy.ldm.aura.mmdit.MMDiT) @@ -732,6 +741,7 @@ class AuraFlow(BaseModel): out['c_crossattn'] = comfy.conds.CONDRegular(cross_attn) return out + class StableAudio1(BaseModel): def __init__(self, model_config, seconds_start_embedder_weights, seconds_total_embedder_weights, model_type=ModelType.V_PREDICTION_CONTINUOUS, device=None): super().__init__(model_config, model_type, device=device, unet_model=comfy.ldm.audio.dit.AudioDiffusionTransformer) @@ -770,6 +780,7 @@ class StableAudio1(BaseModel): sd["{}{}".format(k, l)] = s[l] return sd + class HunyuanDiT(BaseModel): def __init__(self, model_config, model_type=ModelType.V_PREDICTION, device=None): super().__init__(model_config, model_type, device=device, unet_model=comfy.ldm.hydit.models.HunYuanDiT) @@ -903,6 +914,7 @@ class Flux(BaseModel): out['ref_latents'] = list([1, 16, sum(map(lambda a: math.prod(a.size()), ref_latents)) // 16]) return out + class GenmoMochi(BaseModel): def __init__(self, model_config, model_type=ModelType.FLOW, device=None): super().__init__(model_config, model_type, device=device, unet_model=comfy.ldm.genmo.joint_model.asymm_models_joint.AsymmDiTJoint) @@ -1154,6 +1166,7 @@ class WAN21(BaseModel): return out + class WAN21_Vace(WAN21): def __init__(self, model_config, model_type=ModelType.FLOW, image_to_video=False, device=None): super(WAN21, self).__init__(model_config, model_type, device=device, unet_model=comfy.ldm.wan.model.VaceWanModel) diff --git a/comfy_extras/nodes_qwen.py b/comfy_extras/nodes_qwen.py index 03ca00f73..8671d60ae 100644 --- a/comfy_extras/nodes_qwen.py +++ b/comfy_extras/nodes_qwen.py @@ -8,8 +8,6 @@ from typing import Optional from typing_extensions import override from comfy_api.latest import ComfyExtension, io -logger = logging.getLogger(__name__) - class TextEncodeQwenImageEdit(io.ComfyNode): @classmethod @@ -186,8 +184,8 @@ class TextEncodeQwenImageEliGen(io.ComfyNode): width = latent_width * 8 if pad_h > 0 or pad_w > 0: - logger.debug(f"[EliGen] Latent padding detected: {unpadded_latent_height}x{unpadded_latent_width} → {latent_height}x{latent_width}") - logger.debug(f"[EliGen] Target generation dimensions: {height}x{width} pixels ({latent_height}x{latent_width} latent)") + logging.debug(f"[EliGen] Latent padding detected: {unpadded_latent_height}x{unpadded_latent_width} → {latent_height}x{latent_width}") + logging.debug(f"[EliGen] Target generation dimensions: {height}x{width} pixels ({latent_height}x{latent_width} latent)") # Collect entity prompts and masks entity_prompts = [entity_prompt_1, entity_prompt_2, entity_prompt_3] @@ -202,7 +200,7 @@ class TextEncodeQwenImageEliGen(io.ComfyNode): # Log warning if some entities were skipped total_prompts_provided = len([p for p in entity_prompts if p.strip()]) if len(valid_entities) < total_prompts_provided: - logger.warning(f"[EliGen] Only {len(valid_entities)} of {total_prompts_provided} entity prompts have valid masks") + logging.warning(f"[EliGen] Only {len(valid_entities)} of {total_prompts_provided} entity prompts have valid masks") # If no valid entities, return standard conditioning if len(valid_entities) == 0: @@ -244,7 +242,7 @@ class TextEncodeQwenImageEliGen(io.ComfyNode): ) # Log original mask statistics - logger.debug( + logging.debug( f"[EliGen] Entity {i+1} input mask: shape={mask_tensor.shape}, " f"dtype={mask_tensor.dtype}, min={mask_tensor.min():.4f}, max={mask_tensor.max():.4f}" ) @@ -260,7 +258,7 @@ class TextEncodeQwenImageEliGen(io.ComfyNode): # Check for constant masks (no variation) if mask_tensor.min() == mask_tensor.max() and mask_tensor.max() > 0: - logger.warning( + logging.warning( f"[EliGen] Entity {i+1} mask has no variation (all pixels = {mask_tensor.min():.4f}). " f"This entity will affect the entire image." ) @@ -284,12 +282,12 @@ class TextEncodeQwenImageEliGen(io.ComfyNode): # Log size mismatch if mask doesn't match expected latent dimensions expected_h, expected_w = latent_height * 8, latent_width * 8 if orig_h != expected_h or orig_w != expected_w: - logger.info( + logging.info( f"[EliGen] Entity {i+1} mask size mismatch: {orig_h}x{orig_w} vs expected {expected_h}x{expected_w}. " f"Will resize to {latent_height}x{latent_width} latent space." ) else: - logger.debug(f"[EliGen] Entity {i+1} mask: {orig_h}x{orig_w} → will resize to {latent_height}x{latent_width} latent") + logging.debug(f"[EliGen] Entity {i+1} mask: {orig_h}x{orig_w} → will resize to {latent_height}x{latent_width} latent") # Convert MASK format [batch, height, width] to [batch, 1, height, width] for common_upscale # common_upscale expects [batch, channels, height, width] @@ -319,7 +317,7 @@ class TextEncodeQwenImageEliGen(io.ComfyNode): f"Original mask may have been too small or all black." ) - logger.debug( + logging.debug( f"[EliGen] Entity {i+1} mask coverage: {active_pixels}/{total_pixels} pixels ({coverage_pct:.1f}%)" ) @@ -332,7 +330,7 @@ class TextEncodeQwenImageEliGen(io.ComfyNode): entity_masks_tensor = torch.stack(processed_entity_masks_no_batch, dim=0) # [num_entities, 1, H, W] entity_masks_tensor = entity_masks_tensor.unsqueeze(0) # [1, num_entities, 1, H, W] - logger.debug( + logging.debug( f"[EliGen] Stacked {len(valid_entities)} entity masks into tensor: " f"shape={entity_masks_tensor.shape} (expected: [1, {len(valid_entities)}, 1, {latent_height}, {latent_width}])" )