feat: Add optional attention_mask input to LTXVAddGuide (CORE-220) (#13965)
Some checks are pending
Python Linting / Run Ruff (push) Waiting to run
Python Linting / Run Pylint (push) Waiting to run
Execution Tests / test (ubuntu-latest) (push) Waiting to run
Execution Tests / test (windows-latest) (push) Waiting to run
Unit Tests / test (macos-latest) (push) Waiting to run
Unit Tests / test (ubuntu-latest) (push) Waiting to run
Unit Tests / test (windows-2022) (push) Waiting to run
Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.10, [self-hosted Linux], stable) (push) Waiting to run
Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.11, [self-hosted Linux], stable) (push) Waiting to run
Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.12, [self-hosted Linux], stable) (push) Waiting to run
Full Comfy CI Workflow Runs / test-unix-nightly (12.1, , linux, 3.11, [self-hosted Linux], nightly) (push) Waiting to run
Execution Tests / test (macos-latest) (push) Waiting to run
Test server launches without errors / test (push) Waiting to run

This commit is contained in:
drozbay 2026-05-18 15:07:04 -06:00 committed by GitHub
parent 187e5237e1
commit 292814c31e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -175,7 +175,7 @@ class LTXVImgToVideoInplace(io.ComfyNode):
generate = execute # TODO: remove generate = execute # TODO: remove
def _append_guide_attention_entry(positive, negative, pre_filter_count, latent_shape, strength=1.0): def _append_guide_attention_entry(positive, negative, pre_filter_count, latent_shape, strength=1.0, attention_mask=None):
"""Append a guide_attention_entry to both positive and negative conditioning. """Append a guide_attention_entry to both positive and negative conditioning.
Each entry tracks one guide reference for per-reference attention control. Each entry tracks one guide reference for per-reference attention control.
@ -184,9 +184,10 @@ def _append_guide_attention_entry(positive, negative, pre_filter_count, latent_s
new_entry = { new_entry = {
"pre_filter_count": pre_filter_count, "pre_filter_count": pre_filter_count,
"strength": strength, "strength": strength,
"pixel_mask": None, "pixel_mask": attention_mask.unsqueeze(0).unsqueeze(0) if attention_mask is not None else None, # reshape to (1, 1, F, H, W)
"latent_shape": latent_shape, "latent_shape": latent_shape,
} }
results = [] results = []
for cond in (positive, negative): for cond in (positive, negative):
# Read existing entries from this specific conditioning # Read existing entries from this specific conditioning
@ -196,8 +197,7 @@ def _append_guide_attention_entry(positive, negative, pre_filter_count, latent_s
if found is not None: if found is not None:
existing = found existing = found
break break
# Shallow copy and append (no deepcopy needed — entries contain # Shallow copy only and append (pixel_mask is never mutated).
# only scalars and None for pixel_mask at this call site).
entries = [*existing, new_entry] entries = [*existing, new_entry]
results.append(node_helpers.conditioning_set_values( results.append(node_helpers.conditioning_set_values(
cond, {"guide_attention_entries": entries} cond, {"guide_attention_entries": entries}
@ -263,6 +263,12 @@ class LTXVAddGuide(io.ComfyNode):
"down to the nearest multiple of 8. Negative values are counted from the end of the video.", "down to the nearest multiple of 8. Negative values are counted from the end of the video.",
), ),
io.Float.Input("strength", default=1.0, min=0.0, max=10.0, step=0.01), io.Float.Input("strength", default=1.0, min=0.0, max=10.0, step=0.01),
io.Mask.Input(
"attention_mask",
optional=True,
tooltip="Optional pixel-space spatial mask. Controls per-region "
"conditioning influence via self-attention, multiplied by strength.",
),
ICLoRAParameters.Input( ICLoRAParameters.Input(
"iclora_parameters", "iclora_parameters",
optional=True, optional=True,
@ -410,7 +416,7 @@ class LTXVAddGuide(io.ComfyNode):
return latent_image, noise_mask return latent_image, noise_mask
@classmethod @classmethod
def execute(cls, positive, negative, vae, latent, image, frame_idx, strength, iclora_parameters=None) -> io.NodeOutput: def execute(cls, positive, negative, vae, latent, image, frame_idx, strength, attention_mask=None, iclora_parameters=None) -> io.NodeOutput:
scale_factors = vae.downscale_index_formula scale_factors = vae.downscale_index_formula
latent_image = latent["samples"] latent_image = latent["samples"]
noise_mask = get_noise_mask(latent) noise_mask = get_noise_mask(latent)
@ -469,6 +475,7 @@ class LTXVAddGuide(io.ComfyNode):
pre_filter_count = t.shape[2] * t.shape[3] * t.shape[4] pre_filter_count = t.shape[2] * t.shape[3] * t.shape[4]
positive, negative = _append_guide_attention_entry( positive, negative = _append_guide_attention_entry(
positive, negative, pre_filter_count, guide_latent_shape, strength=strength, positive, negative, pre_filter_count, guide_latent_shape, strength=strength,
attention_mask=attention_mask,
) )
return io.NodeOutput(positive, negative, {"samples": latent_image, "noise_mask": noise_mask}) return io.NodeOutput(positive, negative, {"samples": latent_image, "noise_mask": noise_mask})