Compare commits

...

5 Commits

Author SHA1 Message Date
Yousef R. Gamaleldin
a2c75abe57
Merge 96d0cfe0d7 into c5ecd231a2 2026-05-08 17:02:56 +00:00
Yousef Rafat
96d0cfe0d7 . 2026-05-08 20:02:09 +03:00
Alexis Rolland
c5ecd231a2
fix: Fix bug when mask not on same device (CORE-181) (#13801)
Some checks are pending
Python Linting / Run Ruff (push) Waiting to run
Python Linting / Run Pylint (push) Waiting to run
Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.10, [self-hosted Linux], stable) (push) Waiting to run
Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.11, [self-hosted Linux], stable) (push) Waiting to run
Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.12, [self-hosted Linux], stable) (push) Waiting to run
Full Comfy CI Workflow Runs / test-unix-nightly (12.1, , linux, 3.11, [self-hosted Linux], nightly) (push) Waiting to run
Execution Tests / test (macos-latest) (push) Waiting to run
Execution Tests / test (ubuntu-latest) (push) Waiting to run
Execution Tests / test (windows-latest) (push) Waiting to run
Test server launches without errors / test (push) Waiting to run
Unit Tests / test (macos-latest) (push) Waiting to run
Unit Tests / test (ubuntu-latest) (push) Waiting to run
Unit Tests / test (windows-2022) (push) Waiting to run
2026-05-08 23:06:29 +08:00
drozbay
9864f5ac86
fix: Stop LTXVImgToVideoInplace from mutating input latents and dropping noise_mask (#13793) 2026-05-08 23:02:17 +08:00
drozbay
05cd076bc1
fix: Make LTXVAddGuide center-crop guide images to match other LTXV nodes (#13794) 2026-05-08 22:48:59 +08:00
4 changed files with 6 additions and 54 deletions

View File

@ -47,7 +47,7 @@ class BackgroundRemovalModel():
out = self.model(pixel_values=pixel_values)
out = torch.nn.functional.interpolate(out, size=(H, W), mode="bicubic", antialias=False)
mask = out.sigmoid()
mask = out.sigmoid().to(device=comfy.model_management.intermediate_device(), dtype=comfy.model_management.intermediate_dtype())
if mask.ndim == 3:
mask = mask.unsqueeze(0)
if mask.shape[1] != 1:

View File

@ -7,50 +7,6 @@ import logging
import comfy.nested_tensor
def prepare_noise_inner(latent_image, generator, noise_inds=None):
coord_counts = getattr(latent_image, "trellis_coord_counts", None)
if coord_counts is not None:
if coord_counts.ndim != 1:
raise ValueError(f"Trellis2 coord_counts must be 1D, got shape {tuple(coord_counts.shape)}")
if coord_counts.shape[0] != latent_image.size(0):
raise ValueError(
f"Trellis2 coord_counts length {coord_counts.shape[0]} does not match latent batch {latent_image.size(0)}"
)
if (coord_counts < 0).any() or (coord_counts > latent_image.size(2)).any():
raise ValueError(
f"Trellis2 coord_counts must be within [0, {latent_image.size(2)}], got {coord_counts.tolist()}"
)
noise = torch.zeros(latent_image.size(), dtype=torch.float32, layout=latent_image.layout, device="cpu")
if noise_inds is None:
noise_inds = np.arange(latent_image.size(0), dtype=np.int64)
else:
noise_inds = np.asarray(noise_inds, dtype=np.int64)
if noise_inds.shape[0] != latent_image.size(0):
raise ValueError(
f"Trellis2 noise_inds length {noise_inds.shape[0]} does not match latent batch {latent_image.size(0)}"
)
base_seed = int(generator.initial_seed())
unique_inds = np.unique(noise_inds)
sample_noises = {}
for noise_index in unique_inds.tolist():
rows = np.flatnonzero(noise_inds == noise_index)
max_count = max(int(coord_counts[row].item()) for row in rows.tolist())
local_generator = torch.Generator(device="cpu")
local_generator.manual_seed(base_seed + int(noise_index))
sample_noises[int(noise_index)] = torch.randn(
[1, latent_image.size(1), max_count, latent_image.size(3)],
dtype=torch.float32,
layout=latent_image.layout,
generator=local_generator,
device="cpu",
)
for batch_index, noise_index in enumerate(noise_inds.tolist()):
count = int(coord_counts[batch_index].item())
sample_noise = sample_noises[int(noise_index)]
noise[batch_index:batch_index + 1, :, :count, :] = sample_noise[:, :, :count, :]
return noise.to(dtype=latent_image.dtype)
if noise_inds is None:
return torch.randn(latent_image.size(), dtype=torch.float32, layout=latent_image.layout, generator=generator, device="cpu").to(dtype=latent_image.dtype)

View File

@ -203,7 +203,7 @@ class JoinImageWithAlpha(io.ComfyNode):
@classmethod
def execute(cls, image: torch.Tensor, alpha: torch.Tensor) -> io.NodeOutput:
batch_size = max(len(image), len(alpha))
alpha = 1.0 - resize_mask(alpha, image.shape[1:])
alpha = 1.0 - resize_mask(alpha.to(image), image.shape[1:])
alpha = comfy.utils.repeat_to_batch_size(alpha, batch_size)
image = comfy.utils.repeat_to_batch_size(image, batch_size)
return io.NodeOutput(torch.cat((image[..., :3], alpha.unsqueeze(-1)), dim=-1))

View File

@ -106,12 +106,12 @@ class LTXVImgToVideoInplace(io.ComfyNode):
if bypass:
return (latent,)
samples = latent["samples"]
samples = latent["samples"].clone()
_, height_scale_factor, width_scale_factor = (
vae.downscale_index_formula
)
batch, _, latent_frames, latent_height, latent_width = samples.shape
_, _, _, latent_height, latent_width = samples.shape
width = latent_width * width_scale_factor
height = latent_height * height_scale_factor
@ -124,11 +124,7 @@ class LTXVImgToVideoInplace(io.ComfyNode):
samples[:, :, :t.shape[2]] = t
conditioning_latent_frames_mask = torch.ones(
(batch, 1, latent_frames, 1, 1),
dtype=torch.float32,
device=samples.device,
)
conditioning_latent_frames_mask = get_noise_mask(latent)
conditioning_latent_frames_mask[:, :, :t.shape[2]] = 1.0 - strength
return io.NodeOutput({"samples": samples, "noise_mask": conditioning_latent_frames_mask})
@ -236,7 +232,7 @@ class LTXVAddGuide(io.ComfyNode):
def encode(cls, vae, latent_width, latent_height, images, scale_factors):
time_scale_factor, width_scale_factor, height_scale_factor = scale_factors
images = images[:(images.shape[0] - 1) // time_scale_factor * time_scale_factor + 1]
pixels = comfy.utils.common_upscale(images.movedim(-1, 1), latent_width * width_scale_factor, latent_height * height_scale_factor, "bilinear", crop="disabled").movedim(1, -1)
pixels = comfy.utils.common_upscale(images.movedim(-1, 1), latent_width * width_scale_factor, latent_height * height_scale_factor, "bilinear", crop="center").movedim(1, -1)
encode_pixels = pixels[:, :, :, :3]
t = vae.encode(encode_pixels)
return encode_pixels, t