Merge 96d0cfe0d7 into c5ecd231a2

.
fix: Fix bug when mask not on same device (CORE-181) (#13801 )
2026-05-10 01:02:56 +08:00 · 2026-05-08 17:02:56 +00:00 · 2026-05-08 20:02:09 +03:00 · 2026-05-08 23:06:29 +08:00 · 2026-05-08 23:02:17 +08:00 · 2026-05-08 22:48:59 +08:00
4 changed files with 6 additions and 54 deletions
--- a/comfy/bg_removal_model.py
+++ b/comfy/bg_removal_model.py
@ -47,7 +47,7 @@ class BackgroundRemovalModel():
        out = self.model(pixel_values=pixel_values)
        out = torch.nn.functional.interpolate(out, size=(H, W), mode="bicubic", antialias=False)

-        mask = out.sigmoid()
+        mask = out.sigmoid().to(device=comfy.model_management.intermediate_device(), dtype=comfy.model_management.intermediate_dtype())
        if mask.ndim == 3:
            mask = mask.unsqueeze(0)
        if mask.shape[1] != 1:
--- a/comfy/sample.py
+++ b/comfy/sample.py
@ -7,50 +7,6 @@ import logging
 import comfy.nested_tensor

 def prepare_noise_inner(latent_image, generator, noise_inds=None):
-    coord_counts = getattr(latent_image, "trellis_coord_counts", None)
-    if coord_counts is not None:
-        if coord_counts.ndim != 1:
-            raise ValueError(f"Trellis2 coord_counts must be 1D, got shape {tuple(coord_counts.shape)}")
-        if coord_counts.shape[0] != latent_image.size(0):
-            raise ValueError(
-                f"Trellis2 coord_counts length {coord_counts.shape[0]} does not match latent batch {latent_image.size(0)}"
-            )
-        if (coord_counts < 0).any() or (coord_counts > latent_image.size(2)).any():
-            raise ValueError(
-                f"Trellis2 coord_counts must be within [0, {latent_image.size(2)}], got {coord_counts.tolist()}"
-            )
-        noise = torch.zeros(latent_image.size(), dtype=torch.float32, layout=latent_image.layout, device="cpu")
-        if noise_inds is None:
-            noise_inds = np.arange(latent_image.size(0), dtype=np.int64)
-        else:
-            noise_inds = np.asarray(noise_inds, dtype=np.int64)
-            if noise_inds.shape[0] != latent_image.size(0):
-                raise ValueError(
-                    f"Trellis2 noise_inds length {noise_inds.shape[0]} does not match latent batch {latent_image.size(0)}"
-                )
-
-        base_seed = int(generator.initial_seed())
-        unique_inds = np.unique(noise_inds)
-        sample_noises = {}
-        for noise_index in unique_inds.tolist():
-            rows = np.flatnonzero(noise_inds == noise_index)
-            max_count = max(int(coord_counts[row].item()) for row in rows.tolist())
-            local_generator = torch.Generator(device="cpu")
-            local_generator.manual_seed(base_seed + int(noise_index))
-            sample_noises[int(noise_index)] = torch.randn(
-                [1, latent_image.size(1), max_count, latent_image.size(3)],
-                dtype=torch.float32,
-                layout=latent_image.layout,
-                generator=local_generator,
-                device="cpu",
-            )
-
-        for batch_index, noise_index in enumerate(noise_inds.tolist()):
-            count = int(coord_counts[batch_index].item())
-            sample_noise = sample_noises[int(noise_index)]
-            noise[batch_index:batch_index + 1, :, :count, :] = sample_noise[:, :, :count, :]
-        return noise.to(dtype=latent_image.dtype)
-
    if noise_inds is None:
        return torch.randn(latent_image.size(), dtype=torch.float32, layout=latent_image.layout, generator=generator, device="cpu").to(dtype=latent_image.dtype)

--- a/comfy_extras/nodes_compositing.py
+++ b/comfy_extras/nodes_compositing.py
@ -203,7 +203,7 @@ class JoinImageWithAlpha(io.ComfyNode):
    @classmethod
    def execute(cls, image: torch.Tensor, alpha: torch.Tensor) -> io.NodeOutput:
        batch_size = max(len(image), len(alpha))
-        alpha = 1.0 - resize_mask(alpha, image.shape[1:])
+        alpha = 1.0 - resize_mask(alpha.to(image), image.shape[1:])
        alpha = comfy.utils.repeat_to_batch_size(alpha, batch_size)
        image = comfy.utils.repeat_to_batch_size(image, batch_size)
        return io.NodeOutput(torch.cat((image[..., :3], alpha.unsqueeze(-1)), dim=-1))
--- a/comfy_extras/nodes_lt.py
+++ b/comfy_extras/nodes_lt.py
@ -106,12 +106,12 @@ class LTXVImgToVideoInplace(io.ComfyNode):
        if bypass:
            return (latent,)

-        samples = latent["samples"]
+        samples = latent["samples"].clone()
        _, height_scale_factor, width_scale_factor = (
            vae.downscale_index_formula
        )

-        batch, _, latent_frames, latent_height, latent_width = samples.shape
+        _, _, _, latent_height, latent_width = samples.shape
        width = latent_width * width_scale_factor
        height = latent_height * height_scale_factor

@ -124,11 +124,7 @@ class LTXVImgToVideoInplace(io.ComfyNode):

        samples[:, :, :t.shape[2]] = t

-        conditioning_latent_frames_mask = torch.ones(
-            (batch, 1, latent_frames, 1, 1),
-            dtype=torch.float32,
-            device=samples.device,
-        )
+        conditioning_latent_frames_mask = get_noise_mask(latent)
        conditioning_latent_frames_mask[:, :, :t.shape[2]] = 1.0 - strength

        return io.NodeOutput({"samples": samples, "noise_mask": conditioning_latent_frames_mask})
@ -236,7 +232,7 @@ class LTXVAddGuide(io.ComfyNode):
    def encode(cls, vae, latent_width, latent_height, images, scale_factors):
        time_scale_factor, width_scale_factor, height_scale_factor = scale_factors
        images = images[:(images.shape[0] - 1) // time_scale_factor * time_scale_factor + 1]
-        pixels = comfy.utils.common_upscale(images.movedim(-1, 1), latent_width * width_scale_factor, latent_height * height_scale_factor, "bilinear", crop="disabled").movedim(1, -1)
+        pixels = comfy.utils.common_upscale(images.movedim(-1, 1), latent_width * width_scale_factor, latent_height * height_scale_factor, "bilinear", crop="center").movedim(1, -1)
        encode_pixels = pixels[:, :, :, :3]
        t = vae.encode(encode_pixels)
        return encode_pixels, t
Author	SHA1	Message	Date
Yousef R. Gamaleldin	a2c75abe57	Merge `96d0cfe0d7` into `c5ecd231a2`	2026-05-08 17:02:56 +00:00
Yousef Rafat	96d0cfe0d7	.	2026-05-08 20:02:09 +03:00
Alexis Rolland	c5ecd231a2	fix: Fix bug when mask not on same device (CORE-181) (#13801 ) Some checks are pending Python Linting / Run Ruff (push) Waiting to run Details Python Linting / Run Pylint (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.10, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.11, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.12, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-unix-nightly (12.1, , linux, 3.11, [self-hosted Linux], nightly) (push) Waiting to run Details Execution Tests / test (macos-latest) (push) Waiting to run Details Execution Tests / test (ubuntu-latest) (push) Waiting to run Details Execution Tests / test (windows-latest) (push) Waiting to run Details Test server launches without errors / test (push) Waiting to run Details Unit Tests / test (macos-latest) (push) Waiting to run Details Unit Tests / test (ubuntu-latest) (push) Waiting to run Details Unit Tests / test (windows-2022) (push) Waiting to run Details	2026-05-08 23:06:29 +08:00
drozbay	9864f5ac86	fix: Stop LTXVImgToVideoInplace from mutating input latents and dropping noise_mask (#13793 )	2026-05-08 23:02:17 +08:00
drozbay	05cd076bc1	fix: Make LTXVAddGuide center-crop guide images to match other LTXV nodes (#13794 )	2026-05-08 22:48:59 +08:00