mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2025-12-20 19:42:59 +08:00
restored whitespace and fixed logging
This commit is contained in:
parent
2d550102fc
commit
16adfe2153
@ -12,8 +12,6 @@ from comfy.ldm.flux.layers import EmbedND
|
||||
import comfy.ldm.common_dit
|
||||
import comfy.patcher_extension
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class GELU(nn.Module):
|
||||
def __init__(self, dim_in: int, dim_out: int, approximate: str = "none", bias: bool = True, dtype=None, device=None, operations=None):
|
||||
@ -401,7 +399,7 @@ class QwenImageTransformer2DModel(nn.Module):
|
||||
has_negative = cond_or_uncond and 1 in cond_or_uncond
|
||||
is_cfg_batched = has_positive and has_negative
|
||||
|
||||
logger.debug(
|
||||
logging.debug(
|
||||
f"[EliGen Model] Processing {num_entities} entities for {height}x{width}px, "
|
||||
f"batch_size={actual_batch_size}, CFG_batched={is_cfg_batched}"
|
||||
)
|
||||
@ -457,7 +455,7 @@ class QwenImageTransformer2DModel(nn.Module):
|
||||
|
||||
img_rope = self.pe_embedder(img_ids).squeeze(1).squeeze(0)
|
||||
|
||||
logger.debug(f"[EliGen Model] RoPE shapes - img: {img_rope.shape}, txt: {txt_rotary_emb.shape}")
|
||||
logging.debug(f"[EliGen Model] RoPE shapes - img: {img_rope.shape}, txt: {txt_rotary_emb.shape}")
|
||||
|
||||
# Concatenate text and image RoPE embeddings
|
||||
# Convert to latent dtype to match queries/keys
|
||||
@ -473,7 +471,7 @@ class QwenImageTransformer2DModel(nn.Module):
|
||||
if entity_masks.shape[3] != padded_h or entity_masks.shape[4] != padded_w:
|
||||
pad_h = padded_h - entity_masks.shape[3]
|
||||
pad_w = padded_w - entity_masks.shape[4]
|
||||
logger.debug(f"[EliGen Model] Padding masks by ({pad_h}, {pad_w})")
|
||||
logging.debug(f"[EliGen Model] Padding masks by ({pad_h}, {pad_w})")
|
||||
entity_masks = torch.nn.functional.pad(entity_masks, (0, pad_w, 0, pad_h), mode='constant', value=0)
|
||||
|
||||
entity_masks = [entity_masks[:, i, None].squeeze(1) for i in range(max_masks)]
|
||||
@ -488,7 +486,7 @@ class QwenImageTransformer2DModel(nn.Module):
|
||||
seq_lens = entity_seq_lens + [global_seq_len]
|
||||
total_seq_len = int(sum(seq_lens) + image.shape[1])
|
||||
|
||||
logger.debug(f"[EliGen Model] total_seq={total_seq_len}")
|
||||
logging.debug(f"[EliGen Model] total_seq={total_seq_len}")
|
||||
|
||||
patched_masks = []
|
||||
for i in range(N):
|
||||
@ -557,7 +555,7 @@ class QwenImageTransformer2DModel(nn.Module):
|
||||
# CFG batch: [positive, negative] - need different masks for each
|
||||
# Positive gets entity constraints, negative gets standard attention (all zeros)
|
||||
|
||||
logger.debug(
|
||||
logging.debug(
|
||||
"[EliGen Model] CFG batched detected - creating separate masks. "
|
||||
"Positive (index 0) gets entity mask, Negative (index 1) gets standard mask"
|
||||
)
|
||||
@ -576,7 +574,7 @@ class QwenImageTransformer2DModel(nn.Module):
|
||||
# Concatenate masks to match batch
|
||||
attention_mask = torch.cat(mask_list, dim=0)
|
||||
|
||||
logger.debug(
|
||||
logging.debug(
|
||||
f"[EliGen Model] Created {len(mask_list)} masks for CFG batch. "
|
||||
f"Final shape: {attention_mask.shape}"
|
||||
)
|
||||
@ -584,7 +582,7 @@ class QwenImageTransformer2DModel(nn.Module):
|
||||
# Add head dimension: [B, 1, seq, seq]
|
||||
attention_mask = attention_mask.unsqueeze(1)
|
||||
|
||||
logger.debug(
|
||||
logging.debug(
|
||||
f"[EliGen Model] Attention mask created: shape={attention_mask.shape}, "
|
||||
f"valid_connections={num_valid_connections}/{total_seq_len * total_seq_len}"
|
||||
)
|
||||
@ -654,7 +652,7 @@ class QwenImageTransformer2DModel(nn.Module):
|
||||
batch_size = x.shape[0]
|
||||
|
||||
if entity_prompt_emb is not None:
|
||||
logger.debug(
|
||||
logging.debug(
|
||||
f"[EliGen Forward] batch_size={batch_size}, cond_or_uncond={cond_or_uncond}, "
|
||||
f"has_positive={is_positive_cond}, has_negative={is_negative_cond}"
|
||||
)
|
||||
|
||||
@ -119,6 +119,7 @@ def convert_tensor(extra, dtype, device):
|
||||
extra = comfy.model_management.cast_to_device(extra, device, None)
|
||||
return extra
|
||||
|
||||
|
||||
class BaseModel(torch.nn.Module):
|
||||
def __init__(self, model_config, model_type=ModelType.EPS, device=None, unet_model=UNetModel):
|
||||
super().__init__()
|
||||
@ -380,6 +381,7 @@ class BaseModel(torch.nn.Module):
|
||||
def extra_conds_shapes(self, **kwargs):
|
||||
return {}
|
||||
|
||||
|
||||
def unclip_adm(unclip_conditioning, device, noise_augmentor, noise_augment_merge=0.0, seed=None):
|
||||
adm_inputs = []
|
||||
weights = []
|
||||
@ -475,6 +477,7 @@ class SDXL(BaseModel):
|
||||
flat = torch.flatten(torch.cat(out)).unsqueeze(dim=0).repeat(clip_pooled.shape[0], 1)
|
||||
return torch.cat((clip_pooled.to(flat.device), flat), dim=1)
|
||||
|
||||
|
||||
class SVD_img2vid(BaseModel):
|
||||
def __init__(self, model_config, model_type=ModelType.V_PREDICTION_EDM, device=None):
|
||||
super().__init__(model_config, model_type, device=device)
|
||||
@ -551,6 +554,7 @@ class SV3D_p(SVD_img2vid):
|
||||
out = list(map(lambda a: utils.resize_to_batch_size(a, noise.shape[0]), out))
|
||||
return torch.cat(out, dim=1)
|
||||
|
||||
|
||||
class Stable_Zero123(BaseModel):
|
||||
def __init__(self, model_config, model_type=ModelType.EPS, device=None, cc_projection_weight=None, cc_projection_bias=None):
|
||||
super().__init__(model_config, model_type, device=device)
|
||||
@ -634,11 +638,13 @@ class IP2P:
|
||||
image = utils.resize_to_batch_size(image, noise.shape[0])
|
||||
return self.process_ip2p_image_in(image)
|
||||
|
||||
|
||||
class SD15_instructpix2pix(IP2P, BaseModel):
|
||||
def __init__(self, model_config, model_type=ModelType.EPS, device=None):
|
||||
super().__init__(model_config, model_type, device=device)
|
||||
self.process_ip2p_image_in = lambda image: image
|
||||
|
||||
|
||||
class SDXL_instructpix2pix(IP2P, SDXL):
|
||||
def __init__(self, model_config, model_type=ModelType.EPS, device=None):
|
||||
super().__init__(model_config, model_type, device=device)
|
||||
@ -688,6 +694,7 @@ class StableCascade_C(BaseModel):
|
||||
out['clip_text'] = comfy.conds.CONDCrossAttn(cross_attn)
|
||||
return out
|
||||
|
||||
|
||||
class StableCascade_B(BaseModel):
|
||||
def __init__(self, model_config, model_type=ModelType.STABLE_CASCADE, device=None):
|
||||
super().__init__(model_config, model_type, device=device, unet_model=StageB)
|
||||
@ -707,6 +714,7 @@ class StableCascade_B(BaseModel):
|
||||
out["sca"] = comfy.conds.CONDRegular(torch.zeros((1,)))
|
||||
return out
|
||||
|
||||
|
||||
class SD3(BaseModel):
|
||||
def __init__(self, model_config, model_type=ModelType.FLOW, device=None):
|
||||
super().__init__(model_config, model_type, device=device, unet_model=OpenAISignatureMMDITWrapper)
|
||||
@ -721,6 +729,7 @@ class SD3(BaseModel):
|
||||
out['c_crossattn'] = comfy.conds.CONDRegular(cross_attn)
|
||||
return out
|
||||
|
||||
|
||||
class AuraFlow(BaseModel):
|
||||
def __init__(self, model_config, model_type=ModelType.FLOW, device=None):
|
||||
super().__init__(model_config, model_type, device=device, unet_model=comfy.ldm.aura.mmdit.MMDiT)
|
||||
@ -732,6 +741,7 @@ class AuraFlow(BaseModel):
|
||||
out['c_crossattn'] = comfy.conds.CONDRegular(cross_attn)
|
||||
return out
|
||||
|
||||
|
||||
class StableAudio1(BaseModel):
|
||||
def __init__(self, model_config, seconds_start_embedder_weights, seconds_total_embedder_weights, model_type=ModelType.V_PREDICTION_CONTINUOUS, device=None):
|
||||
super().__init__(model_config, model_type, device=device, unet_model=comfy.ldm.audio.dit.AudioDiffusionTransformer)
|
||||
@ -770,6 +780,7 @@ class StableAudio1(BaseModel):
|
||||
sd["{}{}".format(k, l)] = s[l]
|
||||
return sd
|
||||
|
||||
|
||||
class HunyuanDiT(BaseModel):
|
||||
def __init__(self, model_config, model_type=ModelType.V_PREDICTION, device=None):
|
||||
super().__init__(model_config, model_type, device=device, unet_model=comfy.ldm.hydit.models.HunYuanDiT)
|
||||
@ -903,6 +914,7 @@ class Flux(BaseModel):
|
||||
out['ref_latents'] = list([1, 16, sum(map(lambda a: math.prod(a.size()), ref_latents)) // 16])
|
||||
return out
|
||||
|
||||
|
||||
class GenmoMochi(BaseModel):
|
||||
def __init__(self, model_config, model_type=ModelType.FLOW, device=None):
|
||||
super().__init__(model_config, model_type, device=device, unet_model=comfy.ldm.genmo.joint_model.asymm_models_joint.AsymmDiTJoint)
|
||||
@ -1154,6 +1166,7 @@ class WAN21(BaseModel):
|
||||
|
||||
return out
|
||||
|
||||
|
||||
class WAN21_Vace(WAN21):
|
||||
def __init__(self, model_config, model_type=ModelType.FLOW, image_to_video=False, device=None):
|
||||
super(WAN21, self).__init__(model_config, model_type, device=device, unet_model=comfy.ldm.wan.model.VaceWanModel)
|
||||
|
||||
@ -8,8 +8,6 @@ from typing import Optional
|
||||
from typing_extensions import override
|
||||
from comfy_api.latest import ComfyExtension, io
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class TextEncodeQwenImageEdit(io.ComfyNode):
|
||||
@classmethod
|
||||
@ -186,8 +184,8 @@ class TextEncodeQwenImageEliGen(io.ComfyNode):
|
||||
width = latent_width * 8
|
||||
|
||||
if pad_h > 0 or pad_w > 0:
|
||||
logger.debug(f"[EliGen] Latent padding detected: {unpadded_latent_height}x{unpadded_latent_width} → {latent_height}x{latent_width}")
|
||||
logger.debug(f"[EliGen] Target generation dimensions: {height}x{width} pixels ({latent_height}x{latent_width} latent)")
|
||||
logging.debug(f"[EliGen] Latent padding detected: {unpadded_latent_height}x{unpadded_latent_width} → {latent_height}x{latent_width}")
|
||||
logging.debug(f"[EliGen] Target generation dimensions: {height}x{width} pixels ({latent_height}x{latent_width} latent)")
|
||||
|
||||
# Collect entity prompts and masks
|
||||
entity_prompts = [entity_prompt_1, entity_prompt_2, entity_prompt_3]
|
||||
@ -202,7 +200,7 @@ class TextEncodeQwenImageEliGen(io.ComfyNode):
|
||||
# Log warning if some entities were skipped
|
||||
total_prompts_provided = len([p for p in entity_prompts if p.strip()])
|
||||
if len(valid_entities) < total_prompts_provided:
|
||||
logger.warning(f"[EliGen] Only {len(valid_entities)} of {total_prompts_provided} entity prompts have valid masks")
|
||||
logging.warning(f"[EliGen] Only {len(valid_entities)} of {total_prompts_provided} entity prompts have valid masks")
|
||||
|
||||
# If no valid entities, return standard conditioning
|
||||
if len(valid_entities) == 0:
|
||||
@ -244,7 +242,7 @@ class TextEncodeQwenImageEliGen(io.ComfyNode):
|
||||
)
|
||||
|
||||
# Log original mask statistics
|
||||
logger.debug(
|
||||
logging.debug(
|
||||
f"[EliGen] Entity {i+1} input mask: shape={mask_tensor.shape}, "
|
||||
f"dtype={mask_tensor.dtype}, min={mask_tensor.min():.4f}, max={mask_tensor.max():.4f}"
|
||||
)
|
||||
@ -260,7 +258,7 @@ class TextEncodeQwenImageEliGen(io.ComfyNode):
|
||||
|
||||
# Check for constant masks (no variation)
|
||||
if mask_tensor.min() == mask_tensor.max() and mask_tensor.max() > 0:
|
||||
logger.warning(
|
||||
logging.warning(
|
||||
f"[EliGen] Entity {i+1} mask has no variation (all pixels = {mask_tensor.min():.4f}). "
|
||||
f"This entity will affect the entire image."
|
||||
)
|
||||
@ -284,12 +282,12 @@ class TextEncodeQwenImageEliGen(io.ComfyNode):
|
||||
# Log size mismatch if mask doesn't match expected latent dimensions
|
||||
expected_h, expected_w = latent_height * 8, latent_width * 8
|
||||
if orig_h != expected_h or orig_w != expected_w:
|
||||
logger.info(
|
||||
logging.info(
|
||||
f"[EliGen] Entity {i+1} mask size mismatch: {orig_h}x{orig_w} vs expected {expected_h}x{expected_w}. "
|
||||
f"Will resize to {latent_height}x{latent_width} latent space."
|
||||
)
|
||||
else:
|
||||
logger.debug(f"[EliGen] Entity {i+1} mask: {orig_h}x{orig_w} → will resize to {latent_height}x{latent_width} latent")
|
||||
logging.debug(f"[EliGen] Entity {i+1} mask: {orig_h}x{orig_w} → will resize to {latent_height}x{latent_width} latent")
|
||||
|
||||
# Convert MASK format [batch, height, width] to [batch, 1, height, width] for common_upscale
|
||||
# common_upscale expects [batch, channels, height, width]
|
||||
@ -319,7 +317,7 @@ class TextEncodeQwenImageEliGen(io.ComfyNode):
|
||||
f"Original mask may have been too small or all black."
|
||||
)
|
||||
|
||||
logger.debug(
|
||||
logging.debug(
|
||||
f"[EliGen] Entity {i+1} mask coverage: {active_pixels}/{total_pixels} pixels ({coverage_pct:.1f}%)"
|
||||
)
|
||||
|
||||
@ -332,7 +330,7 @@ class TextEncodeQwenImageEliGen(io.ComfyNode):
|
||||
entity_masks_tensor = torch.stack(processed_entity_masks_no_batch, dim=0) # [num_entities, 1, H, W]
|
||||
entity_masks_tensor = entity_masks_tensor.unsqueeze(0) # [1, num_entities, 1, H, W]
|
||||
|
||||
logger.debug(
|
||||
logging.debug(
|
||||
f"[EliGen] Stacked {len(valid_entities)} entity masks into tensor: "
|
||||
f"shape={entity_masks_tensor.shape} (expected: [1, {len(valid_entities)}, 1, {latent_height}, {latent_width}])"
|
||||
)
|
||||
|
||||
Loading…
Reference in New Issue
Block a user