mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2026-05-10 01:02:56 +08:00
Compare commits
5 Commits
6c38b75720
...
a2c75abe57
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a2c75abe57 | ||
|
|
96d0cfe0d7 | ||
|
|
c5ecd231a2 | ||
|
|
9864f5ac86 | ||
|
|
05cd076bc1 |
@ -47,7 +47,7 @@ class BackgroundRemovalModel():
|
||||
out = self.model(pixel_values=pixel_values)
|
||||
out = torch.nn.functional.interpolate(out, size=(H, W), mode="bicubic", antialias=False)
|
||||
|
||||
mask = out.sigmoid()
|
||||
mask = out.sigmoid().to(device=comfy.model_management.intermediate_device(), dtype=comfy.model_management.intermediate_dtype())
|
||||
if mask.ndim == 3:
|
||||
mask = mask.unsqueeze(0)
|
||||
if mask.shape[1] != 1:
|
||||
|
||||
@ -7,50 +7,6 @@ import logging
|
||||
import comfy.nested_tensor
|
||||
|
||||
def prepare_noise_inner(latent_image, generator, noise_inds=None):
|
||||
coord_counts = getattr(latent_image, "trellis_coord_counts", None)
|
||||
if coord_counts is not None:
|
||||
if coord_counts.ndim != 1:
|
||||
raise ValueError(f"Trellis2 coord_counts must be 1D, got shape {tuple(coord_counts.shape)}")
|
||||
if coord_counts.shape[0] != latent_image.size(0):
|
||||
raise ValueError(
|
||||
f"Trellis2 coord_counts length {coord_counts.shape[0]} does not match latent batch {latent_image.size(0)}"
|
||||
)
|
||||
if (coord_counts < 0).any() or (coord_counts > latent_image.size(2)).any():
|
||||
raise ValueError(
|
||||
f"Trellis2 coord_counts must be within [0, {latent_image.size(2)}], got {coord_counts.tolist()}"
|
||||
)
|
||||
noise = torch.zeros(latent_image.size(), dtype=torch.float32, layout=latent_image.layout, device="cpu")
|
||||
if noise_inds is None:
|
||||
noise_inds = np.arange(latent_image.size(0), dtype=np.int64)
|
||||
else:
|
||||
noise_inds = np.asarray(noise_inds, dtype=np.int64)
|
||||
if noise_inds.shape[0] != latent_image.size(0):
|
||||
raise ValueError(
|
||||
f"Trellis2 noise_inds length {noise_inds.shape[0]} does not match latent batch {latent_image.size(0)}"
|
||||
)
|
||||
|
||||
base_seed = int(generator.initial_seed())
|
||||
unique_inds = np.unique(noise_inds)
|
||||
sample_noises = {}
|
||||
for noise_index in unique_inds.tolist():
|
||||
rows = np.flatnonzero(noise_inds == noise_index)
|
||||
max_count = max(int(coord_counts[row].item()) for row in rows.tolist())
|
||||
local_generator = torch.Generator(device="cpu")
|
||||
local_generator.manual_seed(base_seed + int(noise_index))
|
||||
sample_noises[int(noise_index)] = torch.randn(
|
||||
[1, latent_image.size(1), max_count, latent_image.size(3)],
|
||||
dtype=torch.float32,
|
||||
layout=latent_image.layout,
|
||||
generator=local_generator,
|
||||
device="cpu",
|
||||
)
|
||||
|
||||
for batch_index, noise_index in enumerate(noise_inds.tolist()):
|
||||
count = int(coord_counts[batch_index].item())
|
||||
sample_noise = sample_noises[int(noise_index)]
|
||||
noise[batch_index:batch_index + 1, :, :count, :] = sample_noise[:, :, :count, :]
|
||||
return noise.to(dtype=latent_image.dtype)
|
||||
|
||||
if noise_inds is None:
|
||||
return torch.randn(latent_image.size(), dtype=torch.float32, layout=latent_image.layout, generator=generator, device="cpu").to(dtype=latent_image.dtype)
|
||||
|
||||
|
||||
@ -203,7 +203,7 @@ class JoinImageWithAlpha(io.ComfyNode):
|
||||
@classmethod
|
||||
def execute(cls, image: torch.Tensor, alpha: torch.Tensor) -> io.NodeOutput:
|
||||
batch_size = max(len(image), len(alpha))
|
||||
alpha = 1.0 - resize_mask(alpha, image.shape[1:])
|
||||
alpha = 1.0 - resize_mask(alpha.to(image), image.shape[1:])
|
||||
alpha = comfy.utils.repeat_to_batch_size(alpha, batch_size)
|
||||
image = comfy.utils.repeat_to_batch_size(image, batch_size)
|
||||
return io.NodeOutput(torch.cat((image[..., :3], alpha.unsqueeze(-1)), dim=-1))
|
||||
|
||||
@ -106,12 +106,12 @@ class LTXVImgToVideoInplace(io.ComfyNode):
|
||||
if bypass:
|
||||
return (latent,)
|
||||
|
||||
samples = latent["samples"]
|
||||
samples = latent["samples"].clone()
|
||||
_, height_scale_factor, width_scale_factor = (
|
||||
vae.downscale_index_formula
|
||||
)
|
||||
|
||||
batch, _, latent_frames, latent_height, latent_width = samples.shape
|
||||
_, _, _, latent_height, latent_width = samples.shape
|
||||
width = latent_width * width_scale_factor
|
||||
height = latent_height * height_scale_factor
|
||||
|
||||
@ -124,11 +124,7 @@ class LTXVImgToVideoInplace(io.ComfyNode):
|
||||
|
||||
samples[:, :, :t.shape[2]] = t
|
||||
|
||||
conditioning_latent_frames_mask = torch.ones(
|
||||
(batch, 1, latent_frames, 1, 1),
|
||||
dtype=torch.float32,
|
||||
device=samples.device,
|
||||
)
|
||||
conditioning_latent_frames_mask = get_noise_mask(latent)
|
||||
conditioning_latent_frames_mask[:, :, :t.shape[2]] = 1.0 - strength
|
||||
|
||||
return io.NodeOutput({"samples": samples, "noise_mask": conditioning_latent_frames_mask})
|
||||
@ -236,7 +232,7 @@ class LTXVAddGuide(io.ComfyNode):
|
||||
def encode(cls, vae, latent_width, latent_height, images, scale_factors):
|
||||
time_scale_factor, width_scale_factor, height_scale_factor = scale_factors
|
||||
images = images[:(images.shape[0] - 1) // time_scale_factor * time_scale_factor + 1]
|
||||
pixels = comfy.utils.common_upscale(images.movedim(-1, 1), latent_width * width_scale_factor, latent_height * height_scale_factor, "bilinear", crop="disabled").movedim(1, -1)
|
||||
pixels = comfy.utils.common_upscale(images.movedim(-1, 1), latent_width * width_scale_factor, latent_height * height_scale_factor, "bilinear", crop="center").movedim(1, -1)
|
||||
encode_pixels = pixels[:, :, :, :3]
|
||||
t = vae.encode(encode_pixels)
|
||||
return encode_pixels, t
|
||||
|
||||
Loading…
Reference in New Issue
Block a user