From acc152b674fd1c983acc6efd8aedbeb380660c0c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jukka=20Sepp=C3=A4nen?= <40791699+kijai@users.noreply.github.com> Date: Wed, 19 Feb 2025 00:06:54 +0200 Subject: [PATCH 1/2] Support loading and using SkyReels-V1-Hunyuan-I2V (#6862) * Support SkyReels-V1-Hunyuan-I2V * VAE scaling * Fix T2V oops * Proper latent scaling --- comfy/ldm/hunyuan_video/model.py | 2 +- comfy/model_base.py | 9 +++++++++ comfy/model_detection.py | 2 +- 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/comfy/ldm/hunyuan_video/model.py b/comfy/ldm/hunyuan_video/model.py index fc3a67444..f3f445843 100644 --- a/comfy/ldm/hunyuan_video/model.py +++ b/comfy/ldm/hunyuan_video/model.py @@ -310,7 +310,7 @@ class HunyuanVideo(nn.Module): shape[i] = shape[i] // self.patch_size[i] img = img.reshape([img.shape[0]] + shape + [self.out_channels] + self.patch_size) img = img.permute(0, 4, 1, 5, 2, 6, 3, 7) - img = img.reshape(initial_shape) + img = img.reshape(initial_shape[0], self.out_channels, initial_shape[2], initial_shape[3], initial_shape[4]) return img def forward(self, x, timestep, context, y, guidance=None, attention_mask=None, control=None, transformer_options={}, **kwargs): diff --git a/comfy/model_base.py b/comfy/model_base.py index 98f462b32..0eeaed790 100644 --- a/comfy/model_base.py +++ b/comfy/model_base.py @@ -871,6 +871,15 @@ class HunyuanVideo(BaseModel): if cross_attn is not None: out['c_crossattn'] = comfy.conds.CONDRegular(cross_attn) + image = kwargs.get("concat_latent_image", None) + noise = kwargs.get("noise", None) + + if image is not None: + padding_shape = (noise.shape[0], 16, noise.shape[2] - 1, noise.shape[3], noise.shape[4]) + latent_padding = torch.zeros(padding_shape, device=noise.device, dtype=noise.dtype) + image_latents = torch.cat([image.to(noise), latent_padding], dim=2) + out['c_concat'] = comfy.conds.CONDNoiseShape(self.process_latent_in(image_latents)) + guidance = kwargs.get("guidance", 6.0) if guidance is not None: out['guidance'] = comfy.conds.CONDRegular(torch.FloatTensor([guidance])) diff --git a/comfy/model_detection.py b/comfy/model_detection.py index 2644dd0dc..5051f821d 100644 --- a/comfy/model_detection.py +++ b/comfy/model_detection.py @@ -136,7 +136,7 @@ def detect_unet_config(state_dict, key_prefix): if '{}txt_in.individual_token_refiner.blocks.0.norm1.weight'.format(key_prefix) in state_dict_keys: #Hunyuan Video dit_config = {} dit_config["image_model"] = "hunyuan_video" - dit_config["in_channels"] = 16 + dit_config["in_channels"] = state_dict["img_in.proj.weight"].shape[1] #SkyReels img2video has 32 input channels dit_config["patch_size"] = [1, 2, 2] dit_config["out_channels"] = 16 dit_config["vec_in_dim"] = 768 From afc85cdeb64e1c758cd1d0fa8c99f0e3a9e9f9cd Mon Sep 17 00:00:00 2001 From: bymyself Date: Tue, 18 Feb 2025 15:53:01 -0700 Subject: [PATCH 2/2] Add Load Image Output node (#6790) * add LoadImageOutput node * add route for input/output/temp files * update node_typing.py * use literal type for image_folder field * mark node as beta --- api_server/routes/internal/internal_routes.py | 17 +++++++++- comfy/comfy_types/node_typing.py | 21 ++++++++++++ nodes.py | 32 +++++++++++++++++++ 3 files changed, 69 insertions(+), 1 deletion(-) diff --git a/api_server/routes/internal/internal_routes.py b/api_server/routes/internal/internal_routes.py index a66fe529b..613b0f7c7 100644 --- a/api_server/routes/internal/internal_routes.py +++ b/api_server/routes/internal/internal_routes.py @@ -1,8 +1,9 @@ from aiohttp import web from typing import Optional -from folder_paths import folder_names_and_paths +from folder_paths import folder_names_and_paths, get_directory_by_type from api_server.services.terminal_service import TerminalService import app.logger +import os class InternalRoutes: ''' @@ -50,6 +51,20 @@ class InternalRoutes: response[key] = folder_names_and_paths[key][0] return web.json_response(response) + @self.routes.get('/files/{directory_type}') + async def get_files(request: web.Request) -> web.Response: + directory_type = request.match_info['directory_type'] + if directory_type not in ("output", "input", "temp"): + return web.json_response({"error": "Invalid directory type"}, status=400) + + directory = get_directory_by_type(directory_type) + sorted_files = sorted( + (entry for entry in os.scandir(directory) if entry.is_file()), + key=lambda entry: -entry.stat().st_mtime + ) + return web.json_response([entry.name for entry in sorted_files], status=200) + + def get_app(self): if self._app is None: self._app = web.Application() diff --git a/comfy/comfy_types/node_typing.py b/comfy/comfy_types/node_typing.py index 056b1aa65..0f70fdb23 100644 --- a/comfy/comfy_types/node_typing.py +++ b/comfy/comfy_types/node_typing.py @@ -66,6 +66,19 @@ class IO(StrEnum): b = frozenset(value.split(",")) return not (b.issubset(a) or a.issubset(b)) +class RemoteInputOptions(TypedDict): + route: str + """The route to the remote source.""" + refresh_button: bool + """Specifies whether to show a refresh button in the UI below the widget.""" + control_after_refresh: Literal["first", "last"] + """Specifies the control after the refresh button is clicked. If "first", the first item will be automatically selected, and so on.""" + timeout: int + """The maximum amount of time to wait for a response from the remote source in milliseconds.""" + max_retries: int + """The maximum number of retries before aborting the request.""" + refresh: int + """The TTL of the remote input's value in milliseconds. Specifies the interval at which the remote input's value is refreshed.""" class InputTypeOptions(TypedDict): """Provides type hinting for the return type of the INPUT_TYPES node function. @@ -113,6 +126,14 @@ class InputTypeOptions(TypedDict): # defaultVal: str dynamicPrompts: bool """Causes the front-end to evaluate dynamic prompts (``STRING``)""" + # class InputTypeCombo(InputTypeOptions): + image_upload: bool + """Specifies whether the input should have an image upload button and image preview attached to it. Requires that the input's name is `image`.""" + image_folder: Literal["input", "output", "temp"] + """Specifies which folder to get preview images from if the input has the ``image_upload`` flag. + """ + remote: RemoteInputOptions + """Specifies the configuration for a remote input.""" class HiddenInputTypeDict(TypedDict): diff --git a/nodes.py b/nodes.py index 504a3376e..b39adc654 100644 --- a/nodes.py +++ b/nodes.py @@ -1763,6 +1763,36 @@ class LoadImageMask: return True + +class LoadImageOutput(LoadImage): + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "image": ("COMBO", { + "image_upload": True, + "image_folder": "output", + "remote": { + "route": "/internal/files/output", + "refresh_button": True, + "control_after_refresh": "first", + }, + }), + } + } + + DESCRIPTION = "Load an image from the output folder. When the refresh button is clicked, the node will update the image list and automatically select the first image, allowing for easy iteration." + EXPERIMENTAL = True + FUNCTION = "load_image_output" + + def load_image_output(self, image): + return self.load_image(f"{image} [output]") + + @classmethod + def VALIDATE_INPUTS(s, image): + return True + + class ImageScale: upscale_methods = ["nearest-exact", "bilinear", "area", "bicubic", "lanczos"] crop_methods = ["disabled", "center"] @@ -1949,6 +1979,7 @@ NODE_CLASS_MAPPINGS = { "PreviewImage": PreviewImage, "LoadImage": LoadImage, "LoadImageMask": LoadImageMask, + "LoadImageOutput": LoadImageOutput, "ImageScale": ImageScale, "ImageScaleBy": ImageScaleBy, "ImageInvert": ImageInvert, @@ -2049,6 +2080,7 @@ NODE_DISPLAY_NAME_MAPPINGS = { "PreviewImage": "Preview Image", "LoadImage": "Load Image", "LoadImageMask": "Load Image (as Mask)", + "LoadImageOutput": "Load Image (from Outputs)", "ImageScale": "Upscale Image", "ImageScaleBy": "Upscale Image By", "ImageUpscaleWithModel": "Upscale Image (using Model)",