mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2025-12-20 19:42:59 +08:00
restored whitespace and fixed logging
This commit is contained in:
parent
2d550102fc
commit
16adfe2153
@ -12,8 +12,6 @@ from comfy.ldm.flux.layers import EmbedND
|
|||||||
import comfy.ldm.common_dit
|
import comfy.ldm.common_dit
|
||||||
import comfy.patcher_extension
|
import comfy.patcher_extension
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
class GELU(nn.Module):
|
class GELU(nn.Module):
|
||||||
def __init__(self, dim_in: int, dim_out: int, approximate: str = "none", bias: bool = True, dtype=None, device=None, operations=None):
|
def __init__(self, dim_in: int, dim_out: int, approximate: str = "none", bias: bool = True, dtype=None, device=None, operations=None):
|
||||||
@ -401,7 +399,7 @@ class QwenImageTransformer2DModel(nn.Module):
|
|||||||
has_negative = cond_or_uncond and 1 in cond_or_uncond
|
has_negative = cond_or_uncond and 1 in cond_or_uncond
|
||||||
is_cfg_batched = has_positive and has_negative
|
is_cfg_batched = has_positive and has_negative
|
||||||
|
|
||||||
logger.debug(
|
logging.debug(
|
||||||
f"[EliGen Model] Processing {num_entities} entities for {height}x{width}px, "
|
f"[EliGen Model] Processing {num_entities} entities for {height}x{width}px, "
|
||||||
f"batch_size={actual_batch_size}, CFG_batched={is_cfg_batched}"
|
f"batch_size={actual_batch_size}, CFG_batched={is_cfg_batched}"
|
||||||
)
|
)
|
||||||
@ -457,7 +455,7 @@ class QwenImageTransformer2DModel(nn.Module):
|
|||||||
|
|
||||||
img_rope = self.pe_embedder(img_ids).squeeze(1).squeeze(0)
|
img_rope = self.pe_embedder(img_ids).squeeze(1).squeeze(0)
|
||||||
|
|
||||||
logger.debug(f"[EliGen Model] RoPE shapes - img: {img_rope.shape}, txt: {txt_rotary_emb.shape}")
|
logging.debug(f"[EliGen Model] RoPE shapes - img: {img_rope.shape}, txt: {txt_rotary_emb.shape}")
|
||||||
|
|
||||||
# Concatenate text and image RoPE embeddings
|
# Concatenate text and image RoPE embeddings
|
||||||
# Convert to latent dtype to match queries/keys
|
# Convert to latent dtype to match queries/keys
|
||||||
@ -473,7 +471,7 @@ class QwenImageTransformer2DModel(nn.Module):
|
|||||||
if entity_masks.shape[3] != padded_h or entity_masks.shape[4] != padded_w:
|
if entity_masks.shape[3] != padded_h or entity_masks.shape[4] != padded_w:
|
||||||
pad_h = padded_h - entity_masks.shape[3]
|
pad_h = padded_h - entity_masks.shape[3]
|
||||||
pad_w = padded_w - entity_masks.shape[4]
|
pad_w = padded_w - entity_masks.shape[4]
|
||||||
logger.debug(f"[EliGen Model] Padding masks by ({pad_h}, {pad_w})")
|
logging.debug(f"[EliGen Model] Padding masks by ({pad_h}, {pad_w})")
|
||||||
entity_masks = torch.nn.functional.pad(entity_masks, (0, pad_w, 0, pad_h), mode='constant', value=0)
|
entity_masks = torch.nn.functional.pad(entity_masks, (0, pad_w, 0, pad_h), mode='constant', value=0)
|
||||||
|
|
||||||
entity_masks = [entity_masks[:, i, None].squeeze(1) for i in range(max_masks)]
|
entity_masks = [entity_masks[:, i, None].squeeze(1) for i in range(max_masks)]
|
||||||
@ -488,7 +486,7 @@ class QwenImageTransformer2DModel(nn.Module):
|
|||||||
seq_lens = entity_seq_lens + [global_seq_len]
|
seq_lens = entity_seq_lens + [global_seq_len]
|
||||||
total_seq_len = int(sum(seq_lens) + image.shape[1])
|
total_seq_len = int(sum(seq_lens) + image.shape[1])
|
||||||
|
|
||||||
logger.debug(f"[EliGen Model] total_seq={total_seq_len}")
|
logging.debug(f"[EliGen Model] total_seq={total_seq_len}")
|
||||||
|
|
||||||
patched_masks = []
|
patched_masks = []
|
||||||
for i in range(N):
|
for i in range(N):
|
||||||
@ -557,7 +555,7 @@ class QwenImageTransformer2DModel(nn.Module):
|
|||||||
# CFG batch: [positive, negative] - need different masks for each
|
# CFG batch: [positive, negative] - need different masks for each
|
||||||
# Positive gets entity constraints, negative gets standard attention (all zeros)
|
# Positive gets entity constraints, negative gets standard attention (all zeros)
|
||||||
|
|
||||||
logger.debug(
|
logging.debug(
|
||||||
"[EliGen Model] CFG batched detected - creating separate masks. "
|
"[EliGen Model] CFG batched detected - creating separate masks. "
|
||||||
"Positive (index 0) gets entity mask, Negative (index 1) gets standard mask"
|
"Positive (index 0) gets entity mask, Negative (index 1) gets standard mask"
|
||||||
)
|
)
|
||||||
@ -576,7 +574,7 @@ class QwenImageTransformer2DModel(nn.Module):
|
|||||||
# Concatenate masks to match batch
|
# Concatenate masks to match batch
|
||||||
attention_mask = torch.cat(mask_list, dim=0)
|
attention_mask = torch.cat(mask_list, dim=0)
|
||||||
|
|
||||||
logger.debug(
|
logging.debug(
|
||||||
f"[EliGen Model] Created {len(mask_list)} masks for CFG batch. "
|
f"[EliGen Model] Created {len(mask_list)} masks for CFG batch. "
|
||||||
f"Final shape: {attention_mask.shape}"
|
f"Final shape: {attention_mask.shape}"
|
||||||
)
|
)
|
||||||
@ -584,7 +582,7 @@ class QwenImageTransformer2DModel(nn.Module):
|
|||||||
# Add head dimension: [B, 1, seq, seq]
|
# Add head dimension: [B, 1, seq, seq]
|
||||||
attention_mask = attention_mask.unsqueeze(1)
|
attention_mask = attention_mask.unsqueeze(1)
|
||||||
|
|
||||||
logger.debug(
|
logging.debug(
|
||||||
f"[EliGen Model] Attention mask created: shape={attention_mask.shape}, "
|
f"[EliGen Model] Attention mask created: shape={attention_mask.shape}, "
|
||||||
f"valid_connections={num_valid_connections}/{total_seq_len * total_seq_len}"
|
f"valid_connections={num_valid_connections}/{total_seq_len * total_seq_len}"
|
||||||
)
|
)
|
||||||
@ -654,7 +652,7 @@ class QwenImageTransformer2DModel(nn.Module):
|
|||||||
batch_size = x.shape[0]
|
batch_size = x.shape[0]
|
||||||
|
|
||||||
if entity_prompt_emb is not None:
|
if entity_prompt_emb is not None:
|
||||||
logger.debug(
|
logging.debug(
|
||||||
f"[EliGen Forward] batch_size={batch_size}, cond_or_uncond={cond_or_uncond}, "
|
f"[EliGen Forward] batch_size={batch_size}, cond_or_uncond={cond_or_uncond}, "
|
||||||
f"has_positive={is_positive_cond}, has_negative={is_negative_cond}"
|
f"has_positive={is_positive_cond}, has_negative={is_negative_cond}"
|
||||||
)
|
)
|
||||||
|
|||||||
@ -119,6 +119,7 @@ def convert_tensor(extra, dtype, device):
|
|||||||
extra = comfy.model_management.cast_to_device(extra, device, None)
|
extra = comfy.model_management.cast_to_device(extra, device, None)
|
||||||
return extra
|
return extra
|
||||||
|
|
||||||
|
|
||||||
class BaseModel(torch.nn.Module):
|
class BaseModel(torch.nn.Module):
|
||||||
def __init__(self, model_config, model_type=ModelType.EPS, device=None, unet_model=UNetModel):
|
def __init__(self, model_config, model_type=ModelType.EPS, device=None, unet_model=UNetModel):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
@ -380,6 +381,7 @@ class BaseModel(torch.nn.Module):
|
|||||||
def extra_conds_shapes(self, **kwargs):
|
def extra_conds_shapes(self, **kwargs):
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
|
|
||||||
def unclip_adm(unclip_conditioning, device, noise_augmentor, noise_augment_merge=0.0, seed=None):
|
def unclip_adm(unclip_conditioning, device, noise_augmentor, noise_augment_merge=0.0, seed=None):
|
||||||
adm_inputs = []
|
adm_inputs = []
|
||||||
weights = []
|
weights = []
|
||||||
@ -475,6 +477,7 @@ class SDXL(BaseModel):
|
|||||||
flat = torch.flatten(torch.cat(out)).unsqueeze(dim=0).repeat(clip_pooled.shape[0], 1)
|
flat = torch.flatten(torch.cat(out)).unsqueeze(dim=0).repeat(clip_pooled.shape[0], 1)
|
||||||
return torch.cat((clip_pooled.to(flat.device), flat), dim=1)
|
return torch.cat((clip_pooled.to(flat.device), flat), dim=1)
|
||||||
|
|
||||||
|
|
||||||
class SVD_img2vid(BaseModel):
|
class SVD_img2vid(BaseModel):
|
||||||
def __init__(self, model_config, model_type=ModelType.V_PREDICTION_EDM, device=None):
|
def __init__(self, model_config, model_type=ModelType.V_PREDICTION_EDM, device=None):
|
||||||
super().__init__(model_config, model_type, device=device)
|
super().__init__(model_config, model_type, device=device)
|
||||||
@ -551,6 +554,7 @@ class SV3D_p(SVD_img2vid):
|
|||||||
out = list(map(lambda a: utils.resize_to_batch_size(a, noise.shape[0]), out))
|
out = list(map(lambda a: utils.resize_to_batch_size(a, noise.shape[0]), out))
|
||||||
return torch.cat(out, dim=1)
|
return torch.cat(out, dim=1)
|
||||||
|
|
||||||
|
|
||||||
class Stable_Zero123(BaseModel):
|
class Stable_Zero123(BaseModel):
|
||||||
def __init__(self, model_config, model_type=ModelType.EPS, device=None, cc_projection_weight=None, cc_projection_bias=None):
|
def __init__(self, model_config, model_type=ModelType.EPS, device=None, cc_projection_weight=None, cc_projection_bias=None):
|
||||||
super().__init__(model_config, model_type, device=device)
|
super().__init__(model_config, model_type, device=device)
|
||||||
@ -634,11 +638,13 @@ class IP2P:
|
|||||||
image = utils.resize_to_batch_size(image, noise.shape[0])
|
image = utils.resize_to_batch_size(image, noise.shape[0])
|
||||||
return self.process_ip2p_image_in(image)
|
return self.process_ip2p_image_in(image)
|
||||||
|
|
||||||
|
|
||||||
class SD15_instructpix2pix(IP2P, BaseModel):
|
class SD15_instructpix2pix(IP2P, BaseModel):
|
||||||
def __init__(self, model_config, model_type=ModelType.EPS, device=None):
|
def __init__(self, model_config, model_type=ModelType.EPS, device=None):
|
||||||
super().__init__(model_config, model_type, device=device)
|
super().__init__(model_config, model_type, device=device)
|
||||||
self.process_ip2p_image_in = lambda image: image
|
self.process_ip2p_image_in = lambda image: image
|
||||||
|
|
||||||
|
|
||||||
class SDXL_instructpix2pix(IP2P, SDXL):
|
class SDXL_instructpix2pix(IP2P, SDXL):
|
||||||
def __init__(self, model_config, model_type=ModelType.EPS, device=None):
|
def __init__(self, model_config, model_type=ModelType.EPS, device=None):
|
||||||
super().__init__(model_config, model_type, device=device)
|
super().__init__(model_config, model_type, device=device)
|
||||||
@ -688,6 +694,7 @@ class StableCascade_C(BaseModel):
|
|||||||
out['clip_text'] = comfy.conds.CONDCrossAttn(cross_attn)
|
out['clip_text'] = comfy.conds.CONDCrossAttn(cross_attn)
|
||||||
return out
|
return out
|
||||||
|
|
||||||
|
|
||||||
class StableCascade_B(BaseModel):
|
class StableCascade_B(BaseModel):
|
||||||
def __init__(self, model_config, model_type=ModelType.STABLE_CASCADE, device=None):
|
def __init__(self, model_config, model_type=ModelType.STABLE_CASCADE, device=None):
|
||||||
super().__init__(model_config, model_type, device=device, unet_model=StageB)
|
super().__init__(model_config, model_type, device=device, unet_model=StageB)
|
||||||
@ -707,6 +714,7 @@ class StableCascade_B(BaseModel):
|
|||||||
out["sca"] = comfy.conds.CONDRegular(torch.zeros((1,)))
|
out["sca"] = comfy.conds.CONDRegular(torch.zeros((1,)))
|
||||||
return out
|
return out
|
||||||
|
|
||||||
|
|
||||||
class SD3(BaseModel):
|
class SD3(BaseModel):
|
||||||
def __init__(self, model_config, model_type=ModelType.FLOW, device=None):
|
def __init__(self, model_config, model_type=ModelType.FLOW, device=None):
|
||||||
super().__init__(model_config, model_type, device=device, unet_model=OpenAISignatureMMDITWrapper)
|
super().__init__(model_config, model_type, device=device, unet_model=OpenAISignatureMMDITWrapper)
|
||||||
@ -721,6 +729,7 @@ class SD3(BaseModel):
|
|||||||
out['c_crossattn'] = comfy.conds.CONDRegular(cross_attn)
|
out['c_crossattn'] = comfy.conds.CONDRegular(cross_attn)
|
||||||
return out
|
return out
|
||||||
|
|
||||||
|
|
||||||
class AuraFlow(BaseModel):
|
class AuraFlow(BaseModel):
|
||||||
def __init__(self, model_config, model_type=ModelType.FLOW, device=None):
|
def __init__(self, model_config, model_type=ModelType.FLOW, device=None):
|
||||||
super().__init__(model_config, model_type, device=device, unet_model=comfy.ldm.aura.mmdit.MMDiT)
|
super().__init__(model_config, model_type, device=device, unet_model=comfy.ldm.aura.mmdit.MMDiT)
|
||||||
@ -732,6 +741,7 @@ class AuraFlow(BaseModel):
|
|||||||
out['c_crossattn'] = comfy.conds.CONDRegular(cross_attn)
|
out['c_crossattn'] = comfy.conds.CONDRegular(cross_attn)
|
||||||
return out
|
return out
|
||||||
|
|
||||||
|
|
||||||
class StableAudio1(BaseModel):
|
class StableAudio1(BaseModel):
|
||||||
def __init__(self, model_config, seconds_start_embedder_weights, seconds_total_embedder_weights, model_type=ModelType.V_PREDICTION_CONTINUOUS, device=None):
|
def __init__(self, model_config, seconds_start_embedder_weights, seconds_total_embedder_weights, model_type=ModelType.V_PREDICTION_CONTINUOUS, device=None):
|
||||||
super().__init__(model_config, model_type, device=device, unet_model=comfy.ldm.audio.dit.AudioDiffusionTransformer)
|
super().__init__(model_config, model_type, device=device, unet_model=comfy.ldm.audio.dit.AudioDiffusionTransformer)
|
||||||
@ -770,6 +780,7 @@ class StableAudio1(BaseModel):
|
|||||||
sd["{}{}".format(k, l)] = s[l]
|
sd["{}{}".format(k, l)] = s[l]
|
||||||
return sd
|
return sd
|
||||||
|
|
||||||
|
|
||||||
class HunyuanDiT(BaseModel):
|
class HunyuanDiT(BaseModel):
|
||||||
def __init__(self, model_config, model_type=ModelType.V_PREDICTION, device=None):
|
def __init__(self, model_config, model_type=ModelType.V_PREDICTION, device=None):
|
||||||
super().__init__(model_config, model_type, device=device, unet_model=comfy.ldm.hydit.models.HunYuanDiT)
|
super().__init__(model_config, model_type, device=device, unet_model=comfy.ldm.hydit.models.HunYuanDiT)
|
||||||
@ -903,6 +914,7 @@ class Flux(BaseModel):
|
|||||||
out['ref_latents'] = list([1, 16, sum(map(lambda a: math.prod(a.size()), ref_latents)) // 16])
|
out['ref_latents'] = list([1, 16, sum(map(lambda a: math.prod(a.size()), ref_latents)) // 16])
|
||||||
return out
|
return out
|
||||||
|
|
||||||
|
|
||||||
class GenmoMochi(BaseModel):
|
class GenmoMochi(BaseModel):
|
||||||
def __init__(self, model_config, model_type=ModelType.FLOW, device=None):
|
def __init__(self, model_config, model_type=ModelType.FLOW, device=None):
|
||||||
super().__init__(model_config, model_type, device=device, unet_model=comfy.ldm.genmo.joint_model.asymm_models_joint.AsymmDiTJoint)
|
super().__init__(model_config, model_type, device=device, unet_model=comfy.ldm.genmo.joint_model.asymm_models_joint.AsymmDiTJoint)
|
||||||
@ -1154,6 +1166,7 @@ class WAN21(BaseModel):
|
|||||||
|
|
||||||
return out
|
return out
|
||||||
|
|
||||||
|
|
||||||
class WAN21_Vace(WAN21):
|
class WAN21_Vace(WAN21):
|
||||||
def __init__(self, model_config, model_type=ModelType.FLOW, image_to_video=False, device=None):
|
def __init__(self, model_config, model_type=ModelType.FLOW, image_to_video=False, device=None):
|
||||||
super(WAN21, self).__init__(model_config, model_type, device=device, unet_model=comfy.ldm.wan.model.VaceWanModel)
|
super(WAN21, self).__init__(model_config, model_type, device=device, unet_model=comfy.ldm.wan.model.VaceWanModel)
|
||||||
|
|||||||
@ -8,8 +8,6 @@ from typing import Optional
|
|||||||
from typing_extensions import override
|
from typing_extensions import override
|
||||||
from comfy_api.latest import ComfyExtension, io
|
from comfy_api.latest import ComfyExtension, io
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
class TextEncodeQwenImageEdit(io.ComfyNode):
|
class TextEncodeQwenImageEdit(io.ComfyNode):
|
||||||
@classmethod
|
@classmethod
|
||||||
@ -186,8 +184,8 @@ class TextEncodeQwenImageEliGen(io.ComfyNode):
|
|||||||
width = latent_width * 8
|
width = latent_width * 8
|
||||||
|
|
||||||
if pad_h > 0 or pad_w > 0:
|
if pad_h > 0 or pad_w > 0:
|
||||||
logger.debug(f"[EliGen] Latent padding detected: {unpadded_latent_height}x{unpadded_latent_width} → {latent_height}x{latent_width}")
|
logging.debug(f"[EliGen] Latent padding detected: {unpadded_latent_height}x{unpadded_latent_width} → {latent_height}x{latent_width}")
|
||||||
logger.debug(f"[EliGen] Target generation dimensions: {height}x{width} pixels ({latent_height}x{latent_width} latent)")
|
logging.debug(f"[EliGen] Target generation dimensions: {height}x{width} pixels ({latent_height}x{latent_width} latent)")
|
||||||
|
|
||||||
# Collect entity prompts and masks
|
# Collect entity prompts and masks
|
||||||
entity_prompts = [entity_prompt_1, entity_prompt_2, entity_prompt_3]
|
entity_prompts = [entity_prompt_1, entity_prompt_2, entity_prompt_3]
|
||||||
@ -202,7 +200,7 @@ class TextEncodeQwenImageEliGen(io.ComfyNode):
|
|||||||
# Log warning if some entities were skipped
|
# Log warning if some entities were skipped
|
||||||
total_prompts_provided = len([p for p in entity_prompts if p.strip()])
|
total_prompts_provided = len([p for p in entity_prompts if p.strip()])
|
||||||
if len(valid_entities) < total_prompts_provided:
|
if len(valid_entities) < total_prompts_provided:
|
||||||
logger.warning(f"[EliGen] Only {len(valid_entities)} of {total_prompts_provided} entity prompts have valid masks")
|
logging.warning(f"[EliGen] Only {len(valid_entities)} of {total_prompts_provided} entity prompts have valid masks")
|
||||||
|
|
||||||
# If no valid entities, return standard conditioning
|
# If no valid entities, return standard conditioning
|
||||||
if len(valid_entities) == 0:
|
if len(valid_entities) == 0:
|
||||||
@ -244,7 +242,7 @@ class TextEncodeQwenImageEliGen(io.ComfyNode):
|
|||||||
)
|
)
|
||||||
|
|
||||||
# Log original mask statistics
|
# Log original mask statistics
|
||||||
logger.debug(
|
logging.debug(
|
||||||
f"[EliGen] Entity {i+1} input mask: shape={mask_tensor.shape}, "
|
f"[EliGen] Entity {i+1} input mask: shape={mask_tensor.shape}, "
|
||||||
f"dtype={mask_tensor.dtype}, min={mask_tensor.min():.4f}, max={mask_tensor.max():.4f}"
|
f"dtype={mask_tensor.dtype}, min={mask_tensor.min():.4f}, max={mask_tensor.max():.4f}"
|
||||||
)
|
)
|
||||||
@ -260,7 +258,7 @@ class TextEncodeQwenImageEliGen(io.ComfyNode):
|
|||||||
|
|
||||||
# Check for constant masks (no variation)
|
# Check for constant masks (no variation)
|
||||||
if mask_tensor.min() == mask_tensor.max() and mask_tensor.max() > 0:
|
if mask_tensor.min() == mask_tensor.max() and mask_tensor.max() > 0:
|
||||||
logger.warning(
|
logging.warning(
|
||||||
f"[EliGen] Entity {i+1} mask has no variation (all pixels = {mask_tensor.min():.4f}). "
|
f"[EliGen] Entity {i+1} mask has no variation (all pixels = {mask_tensor.min():.4f}). "
|
||||||
f"This entity will affect the entire image."
|
f"This entity will affect the entire image."
|
||||||
)
|
)
|
||||||
@ -284,12 +282,12 @@ class TextEncodeQwenImageEliGen(io.ComfyNode):
|
|||||||
# Log size mismatch if mask doesn't match expected latent dimensions
|
# Log size mismatch if mask doesn't match expected latent dimensions
|
||||||
expected_h, expected_w = latent_height * 8, latent_width * 8
|
expected_h, expected_w = latent_height * 8, latent_width * 8
|
||||||
if orig_h != expected_h or orig_w != expected_w:
|
if orig_h != expected_h or orig_w != expected_w:
|
||||||
logger.info(
|
logging.info(
|
||||||
f"[EliGen] Entity {i+1} mask size mismatch: {orig_h}x{orig_w} vs expected {expected_h}x{expected_w}. "
|
f"[EliGen] Entity {i+1} mask size mismatch: {orig_h}x{orig_w} vs expected {expected_h}x{expected_w}. "
|
||||||
f"Will resize to {latent_height}x{latent_width} latent space."
|
f"Will resize to {latent_height}x{latent_width} latent space."
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
logger.debug(f"[EliGen] Entity {i+1} mask: {orig_h}x{orig_w} → will resize to {latent_height}x{latent_width} latent")
|
logging.debug(f"[EliGen] Entity {i+1} mask: {orig_h}x{orig_w} → will resize to {latent_height}x{latent_width} latent")
|
||||||
|
|
||||||
# Convert MASK format [batch, height, width] to [batch, 1, height, width] for common_upscale
|
# Convert MASK format [batch, height, width] to [batch, 1, height, width] for common_upscale
|
||||||
# common_upscale expects [batch, channels, height, width]
|
# common_upscale expects [batch, channels, height, width]
|
||||||
@ -319,7 +317,7 @@ class TextEncodeQwenImageEliGen(io.ComfyNode):
|
|||||||
f"Original mask may have been too small or all black."
|
f"Original mask may have been too small or all black."
|
||||||
)
|
)
|
||||||
|
|
||||||
logger.debug(
|
logging.debug(
|
||||||
f"[EliGen] Entity {i+1} mask coverage: {active_pixels}/{total_pixels} pixels ({coverage_pct:.1f}%)"
|
f"[EliGen] Entity {i+1} mask coverage: {active_pixels}/{total_pixels} pixels ({coverage_pct:.1f}%)"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -332,7 +330,7 @@ class TextEncodeQwenImageEliGen(io.ComfyNode):
|
|||||||
entity_masks_tensor = torch.stack(processed_entity_masks_no_batch, dim=0) # [num_entities, 1, H, W]
|
entity_masks_tensor = torch.stack(processed_entity_masks_no_batch, dim=0) # [num_entities, 1, H, W]
|
||||||
entity_masks_tensor = entity_masks_tensor.unsqueeze(0) # [1, num_entities, 1, H, W]
|
entity_masks_tensor = entity_masks_tensor.unsqueeze(0) # [1, num_entities, 1, H, W]
|
||||||
|
|
||||||
logger.debug(
|
logging.debug(
|
||||||
f"[EliGen] Stacked {len(valid_entities)} entity masks into tensor: "
|
f"[EliGen] Stacked {len(valid_entities)} entity masks into tensor: "
|
||||||
f"shape={entity_masks_tensor.shape} (expected: [1, {len(valid_entities)}, 1, {latent_height}, {latent_width}])"
|
f"shape={entity_masks_tensor.shape} (expected: [1, {len(valid_entities)}, 1, {latent_height}, {latent_width}])"
|
||||||
)
|
)
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user