diff --git a/.spectral.yaml b/.spectral.yaml index 4bb4a4a94..a4b137628 100644 --- a/.spectral.yaml +++ b/.spectral.yaml @@ -89,3 +89,12 @@ rules: then: field: description function: truthy + +overrides: + # /ws uses HTTP 101 (Switching Protocols) — a legitimate response for a + # WebSocket upgrade, but not a 2xx, so operation-success-response fires + # as a false positive. OpenAPI 3.x has no native WebSocket support. + - files: + - "openapi.yaml#/paths/~1ws" + rules: + operation-success-response: off diff --git a/comfy/supported_models.py b/comfy/supported_models.py index ab2fd0578..4bf2f0f30 100644 --- a/comfy/supported_models.py +++ b/comfy/supported_models.py @@ -1452,7 +1452,7 @@ class HiDreamO1(supported_models_base.BASE): } latent_format = latent_formats.HiDreamO1Pixel - memory_usage_factor = 0.6 + memory_usage_factor = 0.033 # fp16 not supported: LM MLP down_proj activations fp16 overflow, causing NaNs supported_inference_dtypes = [torch.bfloat16, torch.float32] diff --git a/comfy_extras/nodes_audio.py b/comfy_extras/nodes_audio.py index 5f514716f..6382dd618 100644 --- a/comfy_extras/nodes_audio.py +++ b/comfy_extras/nodes_audio.py @@ -297,6 +297,7 @@ class LoadAudio(IO.ComfyNode): @classmethod def define_schema(cls): input_dir = folder_paths.get_input_directory() + os.makedirs(input_dir, exist_ok=True) files = folder_paths.filter_files_content_types(os.listdir(input_dir), ["audio", "video"]) return IO.Schema( node_id="LoadAudio", diff --git a/comfy_extras/nodes_lt.py b/comfy_extras/nodes_lt.py index a4c85db77..3dc1199c2 100644 --- a/comfy_extras/nodes_lt.py +++ b/comfy_extras/nodes_lt.py @@ -338,8 +338,25 @@ class LTXVAddGuide(io.ComfyNode): noise_mask = get_noise_mask(latent) _, _, latent_length, latent_height, latent_width = latent_image.shape + + # For mid-video multi-frame guides, prepend+strip a throwaway first frame so the VAE's "first latent = 1 pixel frame" asymmetry lands on the discarded slot + time_scale_factor = scale_factors[0] + num_frames_to_keep = ((image.shape[0] - 1) // time_scale_factor) * time_scale_factor + 1 + resolved_frame_idx = frame_idx + if frame_idx < 0: + _, num_keyframes = get_keyframe_idxs(positive) + resolved_frame_idx = max((latent_length - num_keyframes - 1) * time_scale_factor + 1 + frame_idx, 0) + causal_fix = resolved_frame_idx == 0 or num_frames_to_keep == 1 + + if not causal_fix: + image = torch.cat([image[:1], image], dim=0) + image, t = cls.encode(vae, latent_width, latent_height, image, scale_factors) + if not causal_fix: + t = t[:, :, 1:, :, :] + image = image[1:] + frame_idx, latent_idx = cls.get_latent_index(positive, latent_length, len(image), frame_idx, scale_factors) assert latent_idx + t.shape[2] <= latent_length, "Conditioning frames exceed the length of the latent sequence." @@ -352,6 +369,7 @@ class LTXVAddGuide(io.ComfyNode): t, strength, scale_factors, + causal_fix=causal_fix, ) # Track this guide for per-reference attention control. diff --git a/comfy_extras/nodes_mask.py b/comfy_extras/nodes_mask.py index c9b2a84d9..96ee1a0f8 100644 --- a/comfy_extras/nodes_mask.py +++ b/comfy_extras/nodes_mask.py @@ -40,23 +40,13 @@ def composite(destination, source, x, y, mask = None, multiplier = 8, resize_sou inverse_mask = torch.ones_like(mask) - mask - source_rgb = source[:, :3, :visible_height, :visible_width] - dest_slice = destination[..., top:bottom, left:right] - - if destination.shape[1] == 4: - if torch.max(dest_slice) == 0: - destination[:, :3, top:bottom, left:right] = source_rgb - destination[:, 3:4, top:bottom, left:right] = mask - else: - destination[:, :3, top:bottom, left:right] = (mask * source_rgb) + (inverse_mask * dest_slice[:, :3]) - destination[:, 3:4, top:bottom, left:right] = torch.max(mask, dest_slice[:, 3:4]) - else: - source_portion = mask * source_rgb - destination_portion = inverse_mask * dest_slice - destination[..., top:bottom, left:right] = source_portion + destination_portion + source_portion = mask * source[..., :visible_height, :visible_width] + destination_portion = inverse_mask * destination[..., top:bottom, left:right] + destination[..., top:bottom, left:right] = source_portion + destination_portion return destination + class LatentCompositeMasked(IO.ComfyNode): @classmethod def define_schema(cls): @@ -95,23 +85,18 @@ class ImageCompositeMasked(IO.ComfyNode): display_name="Image Composite Masked", category="image", inputs=[ + IO.Image.Input("destination"), IO.Image.Input("source"), IO.Int.Input("x", default=0, min=0, max=nodes.MAX_RESOLUTION, step=1), IO.Int.Input("y", default=0, min=0, max=nodes.MAX_RESOLUTION, step=1), IO.Boolean.Input("resize_source", default=False), - IO.Image.Input("destination", optional=True), IO.Mask.Input("mask", optional=True), ], outputs=[IO.Image.Output()], ) @classmethod - def execute(cls, source, x, y, resize_source, destination = None, mask = None) -> IO.NodeOutput: - if destination is None: # transparent rgba - B, H, W, C = source.shape - destination = torch.zeros((B, H, W, 4), dtype=source.dtype, device=source.device) - if C == 3: - source = torch.nn.functional.pad(source, (0, 1), value=1.0) + def execute(cls, destination, source, x, y, resize_source, mask = None) -> IO.NodeOutput: destination, source = node_helpers.image_alpha_fix(destination, source) destination = destination.clone().movedim(-1, 1) output = composite(destination, source.movedim(-1, 1), x, y, mask, 1, resize_source).movedim(1, -1) diff --git a/comfy_extras/nodes_video.py b/comfy_extras/nodes_video.py index 719acf2f1..78a2a28f8 100644 --- a/comfy_extras/nodes_video.py +++ b/comfy_extras/nodes_video.py @@ -123,6 +123,7 @@ class CreateVideo(io.ComfyNode): search_aliases=["images to video"], display_name="Create Video", category="video", + essentials_category="Video Tools", description="Create a video from images.", inputs=[ io.Image.Input("images", tooltip="The images to create a video from."), diff --git a/requirements.txt b/requirements.txt index c5a6f4cec..86c0a3c72 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ comfyui-frontend-package==1.43.18 comfyui-workflow-templates==0.9.73 -comfyui-embedded-docs==0.4.4 +comfyui-embedded-docs==0.5.0 torch torchsde torchvision