mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2026-04-23 08:52:32 +08:00
Initial void pass 2 commit.
This commit is contained in:
parent
a0a69c9b34
commit
b4518ea697
@ -146,11 +146,13 @@ class VOIDInpaintConditioning(io.ComfyNode):
|
|||||||
|
|
||||||
inpaint_latents = torch.cat([mask_latents, masked_video_latents], dim=1)
|
inpaint_latents = torch.cat([mask_latents, masked_video_latents], dim=1)
|
||||||
|
|
||||||
# CogVideoX.concat_cond() applies process_latent_in (x scale_factor) to
|
# CogVideoX-Fun was trained with Diffusers convention where VAE latents
|
||||||
# concat_latent_image before feeding it to the transformer. Pre-divide here
|
# are scaled by 0.7 (vae.config.scaling_factor). CogVideoX.concat_cond()
|
||||||
# so the net scaling is identity — the VOID model expects raw VAE latents.
|
# applies process_latent_in (×sf=1.153) to the stored conditioning.
|
||||||
scale_factor = comfy.latent_formats.CogVideoX().scale_factor
|
# Pre-multiply by 0.7 so the model sees the correct magnitude:
|
||||||
inpaint_latents = inpaint_latents / scale_factor
|
# stored = vae_output × 0.7 → after process_in: (vae_output×0.7)×sf = raw×0.7
|
||||||
|
DIFFUSERS_SCALING_FACTOR = 0.7
|
||||||
|
inpaint_latents = inpaint_latents * DIFFUSERS_SCALING_FACTOR
|
||||||
|
|
||||||
positive = node_helpers.conditioning_set_values(
|
positive = node_helpers.conditioning_set_values(
|
||||||
positive, {"concat_latent_image": inpaint_latents}
|
positive, {"concat_latent_image": inpaint_latents}
|
||||||
@ -167,12 +169,151 @@ class VOIDInpaintConditioning(io.ComfyNode):
|
|||||||
return io.NodeOutput(positive, negative, {"samples": noise_latent})
|
return io.NodeOutput(positive, negative, {"samples": noise_latent})
|
||||||
|
|
||||||
|
|
||||||
|
class VOIDWarpedNoise(io.ComfyNode):
|
||||||
|
"""Generate optical-flow warped noise for VOID Pass 2 refinement.
|
||||||
|
|
||||||
|
Takes the Pass 1 output video and produces temporally-correlated noise
|
||||||
|
by warping Gaussian noise along optical flow vectors. This noise is used
|
||||||
|
as the initial latent for Pass 2, resulting in better temporal consistency.
|
||||||
|
|
||||||
|
Requires: pip install rp (auto-installs Go-with-the-Flow dependencies)
|
||||||
|
"""
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def define_schema(cls):
|
||||||
|
return io.Schema(
|
||||||
|
node_id="VOIDWarpedNoise",
|
||||||
|
category="latent/video",
|
||||||
|
inputs=[
|
||||||
|
io.Image.Input("video", tooltip="Pass 1 output video frames [T, H, W, 3]"),
|
||||||
|
io.Int.Input("width", default=672, min=16, max=nodes.MAX_RESOLUTION, step=8),
|
||||||
|
io.Int.Input("height", default=384, min=16, max=nodes.MAX_RESOLUTION, step=8),
|
||||||
|
io.Int.Input("length", default=49, min=1, max=nodes.MAX_RESOLUTION, step=1,
|
||||||
|
tooltip="Number of pixel frames"),
|
||||||
|
io.Int.Input("batch_size", default=1, min=1, max=64),
|
||||||
|
],
|
||||||
|
outputs=[
|
||||||
|
io.Latent.Output(display_name="warped_noise"),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def execute(cls, video, width, height, length, batch_size) -> io.NodeOutput:
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
try:
|
||||||
|
import rp
|
||||||
|
rp.r._pip_import_autoyes = True
|
||||||
|
rp.git_import('CommonSource')
|
||||||
|
import rp.git.CommonSource.noise_warp as nw
|
||||||
|
except ImportError:
|
||||||
|
raise RuntimeError(
|
||||||
|
"VOIDWarpedNoise requires the 'rp' package. Install with: pip install rp"
|
||||||
|
)
|
||||||
|
|
||||||
|
temporal_compression = 4
|
||||||
|
latent_t = ((length - 1) // temporal_compression) + 1
|
||||||
|
latent_h = height // 8
|
||||||
|
latent_w = width // 8
|
||||||
|
|
||||||
|
vid = video[:length].cpu().numpy()
|
||||||
|
vid_uint8 = (vid * 255).clip(0, 255).astype(np.uint8)
|
||||||
|
|
||||||
|
frames = [vid_uint8[i] for i in range(vid_uint8.shape[0])]
|
||||||
|
frames = rp.resize_images_to_hold(frames, height=height, width=width)
|
||||||
|
frames = rp.crop_images(frames, height=height, width=width, origin='center')
|
||||||
|
frames = rp.as_numpy_array(frames)
|
||||||
|
|
||||||
|
FRAME = 2**-1
|
||||||
|
FLOW = 2**3
|
||||||
|
LATENT_SCALE = 8
|
||||||
|
|
||||||
|
warp_output = nw.get_noise_from_video(
|
||||||
|
frames,
|
||||||
|
remove_background=False,
|
||||||
|
visualize=False,
|
||||||
|
save_files=False,
|
||||||
|
noise_channels=16,
|
||||||
|
output_folder=None,
|
||||||
|
resize_frames=FRAME,
|
||||||
|
resize_flow=FLOW,
|
||||||
|
downscale_factor=round(FRAME * FLOW) * LATENT_SCALE,
|
||||||
|
)
|
||||||
|
|
||||||
|
warped_np = warp_output.numpy_noises # (T, H, W, C)
|
||||||
|
if warped_np.dtype == np.float16:
|
||||||
|
warped_np = warped_np.astype(np.float32)
|
||||||
|
|
||||||
|
import cv2
|
||||||
|
|
||||||
|
if warped_np.shape[0] != latent_t:
|
||||||
|
indices = np.linspace(0, warped_np.shape[0] - 1, latent_t).astype(int)
|
||||||
|
warped_np = warped_np[indices]
|
||||||
|
|
||||||
|
if warped_np.shape[1] != latent_h or warped_np.shape[2] != latent_w:
|
||||||
|
resized = []
|
||||||
|
for t_idx in range(latent_t):
|
||||||
|
frame = warped_np[t_idx]
|
||||||
|
ch_resized = [
|
||||||
|
cv2.resize(frame[:, :, c], (latent_w, latent_h),
|
||||||
|
interpolation=cv2.INTER_LINEAR)
|
||||||
|
for c in range(frame.shape[2])
|
||||||
|
]
|
||||||
|
resized.append(np.stack(ch_resized, axis=2))
|
||||||
|
warped_np = np.stack(resized, axis=0)
|
||||||
|
|
||||||
|
# (T, H, W, C) -> (B, C, T, H, W)
|
||||||
|
warped_tensor = torch.from_numpy(
|
||||||
|
warped_np.transpose(3, 0, 1, 2)
|
||||||
|
).float().unsqueeze(0)
|
||||||
|
|
||||||
|
if batch_size > 1:
|
||||||
|
warped_tensor = warped_tensor.repeat(batch_size, 1, 1, 1, 1)
|
||||||
|
|
||||||
|
warped_tensor = warped_tensor.to(comfy.model_management.intermediate_device())
|
||||||
|
|
||||||
|
return io.NodeOutput({"samples": warped_tensor})
|
||||||
|
|
||||||
|
|
||||||
|
class Noise_FromLatent:
|
||||||
|
"""Wraps a pre-computed LATENT tensor as a NOISE source."""
|
||||||
|
def __init__(self, latent_dict):
|
||||||
|
self.seed = 0
|
||||||
|
self._samples = latent_dict["samples"]
|
||||||
|
|
||||||
|
def generate_noise(self, input_latent):
|
||||||
|
return self._samples.clone().cpu()
|
||||||
|
|
||||||
|
|
||||||
|
class VOIDWarpedNoiseSource(io.ComfyNode):
|
||||||
|
"""Convert a LATENT (e.g. from VOIDWarpedNoise) into a NOISE source
|
||||||
|
for use with SamplerCustomAdvanced."""
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def define_schema(cls):
|
||||||
|
return io.Schema(
|
||||||
|
node_id="VOIDWarpedNoiseSource",
|
||||||
|
category="sampling/custom_sampling/noise",
|
||||||
|
inputs=[
|
||||||
|
io.Latent.Input("warped_noise",
|
||||||
|
tooltip="Warped noise latent from VOIDWarpedNoise"),
|
||||||
|
],
|
||||||
|
outputs=[io.Noise.Output()],
|
||||||
|
)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def execute(cls, warped_noise) -> io.NodeOutput:
|
||||||
|
return io.NodeOutput(Noise_FromLatent(warped_noise))
|
||||||
|
|
||||||
|
|
||||||
class VOIDExtension(ComfyExtension):
|
class VOIDExtension(ComfyExtension):
|
||||||
@override
|
@override
|
||||||
async def get_node_list(self) -> list[type[io.ComfyNode]]:
|
async def get_node_list(self) -> list[type[io.ComfyNode]]:
|
||||||
return [
|
return [
|
||||||
VOIDQuadmaskPreprocess,
|
VOIDQuadmaskPreprocess,
|
||||||
VOIDInpaintConditioning,
|
VOIDInpaintConditioning,
|
||||||
|
VOIDWarpedNoise,
|
||||||
|
VOIDWarpedNoiseSource,
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user