Merge branch 'master' into dr-support-pip-cm

2026-02-24 12:57:34 +08:00 · 2025-10-05 07:36:33 +09:00 · 2025-10-05 07:36:33 +09:00 · 2dc24f9870
commit 2dc24f9870
parent 8634b19bc7 b1fa1922df
6 changed files with 589 additions and 585 deletions
--- a/README.md
+++ b/README.md
@ -206,14 +206,32 @@ Put your SD checkpoints (the huge ckpt/safetensors files) in: models/checkpoints
 Put your VAE in: models/vae


-### AMD GPUs (Linux only)
+### AMD GPUs (Linux)
+
 AMD users can install rocm and pytorch with pip if you don't have it already installed, this is the command to install the stable version:

 ```pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm6.4```

-This is the command to install the nightly with ROCm 6.4 which might have some performance improvements:
+This is the command to install the nightly with ROCm 7.0 which might have some performance improvements:

-```pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/rocm6.4```
+```pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/rocm7.0```
+
+
+### AMD GPUs (Experimental: Windows and Linux), RDNA 3, 3.5 and 4 only.
+
+These have less hardware support than the builds above but they work on windows. You also need to install the pytorch version specific to your hardware.
+
+RDNA 3 (RX 7000 series):
+
+```pip install --pre torch torchvision torchaudio --index-url https://rocm.nightlies.amd.com/v2/gfx110X-dgpu/```
+
+RDNA 3.5 (Strix halo/Ryzen AI Max+ 365):
+
+```pip install --pre torch torchvision torchaudio --index-url https://rocm.nightlies.amd.com/v2/gfx1151/```
+
+RDNA 4 (RX 9000 series):
+
+```pip install --pre torch torchvision torchaudio --index-url https://rocm.nightlies.amd.com/v2/gfx120X-all/```

 ### Intel GPUs (Windows and Linux)

@ -270,12 +288,6 @@ You can install ComfyUI in Apple Mac silicon (M1 or M2) with any recent macOS ve

 > **Note**: Remember to add your models, VAE, LoRAs etc. to the corresponding Comfy folders, as discussed in [ComfyUI manual installation](#manual-install-windows-linux).

-#### DirectML (AMD Cards on Windows)
-
-This is very badly supported and is not recommended. There are some unofficial builds of pytorch ROCm on windows that exist that will give you a much better experience than this. This readme will be updated once official pytorch ROCm builds for windows come out.
-
-```pip install torch-directml``` Then you can launch ComfyUI with: ```python main.py --directml```
-
 #### Ascend NPUs

 For models compatible with Ascend Extension for PyTorch (torch_npu). To get started, ensure your environment meets the prerequisites outlined on the [installation](https://ascend.github.io/docs/sources/ascend/quick_install.html) page. Here's a step-by-step guide tailored to your platform and installation method:
--- a/comfy_api_nodes/nodes_gemini.py
+++ b/comfy_api_nodes/nodes_gemini.py
@ -39,6 +39,7 @@ from comfy_api_nodes.apinode_utils import (
    tensor_to_base64_string,
    bytesio_to_image_tensor,
 )
+from comfy_api.util import VideoContainer, VideoCodec


 GEMINI_BASE_ENDPOINT = "/proxy/vertexai/gemini"
@ -310,7 +311,7 @@ class GeminiNode(ComfyNodeABC):
        Returns:
            List of GeminiPart objects containing the encoded video.
        """
-        from comfy_api.util import VideoContainer, VideoCodec
+
        base_64_string = video_to_base64_string(
            video_input,
            container_format=VideoContainer.MP4,
--- a/comfy_api_nodes/nodes_moonvalley.py
+++ b/comfy_api_nodes/nodes_moonvalley.py
@ -2,11 +2,7 @@ import logging
 from typing import Any, Callable, Optional, TypeVar
 import torch
 from typing_extensions import override
-from comfy_api_nodes.util.validation_utils import (
-    get_image_dimensions,
-    validate_image_dimensions,
-)
-
+from comfy_api_nodes.util.validation_utils import validate_image_dimensions

 from comfy_api_nodes.apis import (
    MoonvalleyTextToVideoRequest,
@ -132,47 +128,6 @@ def validate_prompts(
    return True


-def validate_input_media(width, height, with_frame_conditioning, num_frames_in=None):
-    # inference validation
-    # T = num_frames
-    # in all cases, the following must be true: T divisible by 16 and H,W by 8. in addition...
-    # with image conditioning: H*W must be divisible by 8192
-    # without image conditioning: T divisible by 32
-    if num_frames_in and not num_frames_in % 16 == 0:
-        return False, ("The input video total frame count must be divisible by 16!")
-
-    if height % 8 != 0 or width % 8 != 0:
-        return False, (
-            f"Height ({height}) and width ({width}) must be " "divisible by 8"
-        )
-
-    if with_frame_conditioning:
-        if (height * width) % 8192 != 0:
-            return False, (
-                f"Height * width ({height * width}) must be "
-                "divisible by 8192 for frame conditioning"
-            )
-    else:
-        if num_frames_in and not num_frames_in % 32 == 0:
-            return False, ("The input video total frame count must be divisible by 32!")
-
-
-def validate_input_image(
-    image: torch.Tensor, with_frame_conditioning: bool = False
-) -> None:
-    """
-    Validates the input image adheres to the expectations of the API:
-    - The image resolution should not be less than 300*300px
-    - The aspect ratio of the image should be between 1:2.5 ~ 2.5:1
-
-    """
-    height, width = get_image_dimensions(image)
-    validate_input_media(width, height, with_frame_conditioning)
-    validate_image_dimensions(
-        image, min_width=300, min_height=300, max_height=MAX_HEIGHT, max_width=MAX_WIDTH
-    )
-
-
 def validate_video_to_video_input(video: VideoInput) -> VideoInput:
    """
    Validates and processes video input for Moonvalley Video-to-Video generation.
@ -499,7 +454,7 @@ class MoonvalleyImg2VideoNode(comfy_io.ComfyNode):
        seed: int,
        steps: int,
    ) -> comfy_io.NodeOutput:
-        validate_input_image(image, True)
+        validate_image_dimensions(image, min_width=300, min_height=300, max_height=MAX_HEIGHT, max_width=MAX_WIDTH)
        validate_prompts(prompt, negative_prompt, MOONVALLEY_MAREY_MAX_PROMPT_LENGTH)
        width_height = parse_width_height_from_res(resolution)

--- a/comfy_api_nodes/nodes_rodin.py
+++ b/comfy_api_nodes/nodes_rodin.py
--- a/comfy_extras/nodes_stable3d.py
+++ b/comfy_extras/nodes_stable3d.py
@ -1,6 +1,8 @@
 import torch
 import nodes
 import comfy.utils
+from typing_extensions import override
+from comfy_api.latest import ComfyExtension, io

 def camera_embeddings(elevation, azimuth):
    elevation = torch.as_tensor([elevation])
@ -20,26 +22,31 @@ def camera_embeddings(elevation, azimuth):
    return embeddings


-class StableZero123_Conditioning:
+class StableZero123_Conditioning(io.ComfyNode):
    @classmethod
-    def INPUT_TYPES(s):
-        return {"required": { "clip_vision": ("CLIP_VISION",),
-                              "init_image": ("IMAGE",),
-                              "vae": ("VAE",),
-                              "width": ("INT", {"default": 256, "min": 16, "max": nodes.MAX_RESOLUTION, "step": 8}),
-                              "height": ("INT", {"default": 256, "min": 16, "max": nodes.MAX_RESOLUTION, "step": 8}),
-                              "batch_size": ("INT", {"default": 1, "min": 1, "max": 4096}),
-                              "elevation": ("FLOAT", {"default": 0.0, "min": -180.0, "max": 180.0, "step": 0.1, "round": False}),
-                              "azimuth": ("FLOAT", {"default": 0.0, "min": -180.0, "max": 180.0, "step": 0.1, "round": False}),
-                             }}
-    RETURN_TYPES = ("CONDITIONING", "CONDITIONING", "LATENT")
-    RETURN_NAMES = ("positive", "negative", "latent")
+    def define_schema(cls):
+        return io.Schema(
+            node_id="StableZero123_Conditioning",
+            category="conditioning/3d_models",
+            inputs=[
+                io.ClipVision.Input("clip_vision"),
+                io.Image.Input("init_image"),
+                io.Vae.Input("vae"),
+                io.Int.Input("width", default=256, min=16, max=nodes.MAX_RESOLUTION, step=8),
+                io.Int.Input("height", default=256, min=16, max=nodes.MAX_RESOLUTION, step=8),
+                io.Int.Input("batch_size", default=1, min=1, max=4096),
+                io.Float.Input("elevation", default=0.0, min=-180.0, max=180.0, step=0.1, round=False),
+                io.Float.Input("azimuth", default=0.0, min=-180.0, max=180.0, step=0.1, round=False)
+            ],
+            outputs=[
+                io.Conditioning.Output(display_name="positive"),
+                io.Conditioning.Output(display_name="negative"),
+                io.Latent.Output(display_name="latent")
+            ]
+        )

-    FUNCTION = "encode"
-
-    CATEGORY = "conditioning/3d_models"
-
-    def encode(self, clip_vision, init_image, vae, width, height, batch_size, elevation, azimuth):
+    @classmethod
+    def execute(cls, clip_vision, init_image, vae, width, height, batch_size, elevation, azimuth) -> io.NodeOutput:
        output = clip_vision.encode_image(init_image)
        pooled = output.image_embeds.unsqueeze(0)
        pixels = comfy.utils.common_upscale(init_image.movedim(-1,1), width, height, "bilinear", "center").movedim(1,-1)
@ -51,30 +58,35 @@ class StableZero123_Conditioning:
        positive = [[cond, {"concat_latent_image": t}]]
        negative = [[torch.zeros_like(pooled), {"concat_latent_image": torch.zeros_like(t)}]]
        latent = torch.zeros([batch_size, 4, height // 8, width // 8])
-        return (positive, negative, {"samples":latent})
+        return io.NodeOutput(positive, negative, {"samples":latent})

-class StableZero123_Conditioning_Batched:
+class StableZero123_Conditioning_Batched(io.ComfyNode):
    @classmethod
-    def INPUT_TYPES(s):
-        return {"required": { "clip_vision": ("CLIP_VISION",),
-                              "init_image": ("IMAGE",),
-                              "vae": ("VAE",),
-                              "width": ("INT", {"default": 256, "min": 16, "max": nodes.MAX_RESOLUTION, "step": 8}),
-                              "height": ("INT", {"default": 256, "min": 16, "max": nodes.MAX_RESOLUTION, "step": 8}),
-                              "batch_size": ("INT", {"default": 1, "min": 1, "max": 4096}),
-                              "elevation": ("FLOAT", {"default": 0.0, "min": -180.0, "max": 180.0, "step": 0.1, "round": False}),
-                              "azimuth": ("FLOAT", {"default": 0.0, "min": -180.0, "max": 180.0, "step": 0.1, "round": False}),
-                              "elevation_batch_increment": ("FLOAT", {"default": 0.0, "min": -180.0, "max": 180.0, "step": 0.1, "round": False}),
-                              "azimuth_batch_increment": ("FLOAT", {"default": 0.0, "min": -180.0, "max": 180.0, "step": 0.1, "round": False}),
-                             }}
-    RETURN_TYPES = ("CONDITIONING", "CONDITIONING", "LATENT")
-    RETURN_NAMES = ("positive", "negative", "latent")
+    def define_schema(cls):
+        return io.Schema(
+            node_id="StableZero123_Conditioning_Batched",
+            category="conditioning/3d_models",
+            inputs=[
+                io.ClipVision.Input("clip_vision"),
+                io.Image.Input("init_image"),
+                io.Vae.Input("vae"),
+                io.Int.Input("width", default=256, min=16, max=nodes.MAX_RESOLUTION, step=8),
+                io.Int.Input("height", default=256, min=16, max=nodes.MAX_RESOLUTION, step=8),
+                io.Int.Input("batch_size", default=1, min=1, max=4096),
+                io.Float.Input("elevation", default=0.0, min=-180.0, max=180.0, step=0.1, round=False),
+                io.Float.Input("azimuth", default=0.0, min=-180.0, max=180.0, step=0.1, round=False),
+                io.Float.Input("elevation_batch_increment", default=0.0, min=-180.0, max=180.0, step=0.1, round=False),
+                io.Float.Input("azimuth_batch_increment", default=0.0, min=-180.0, max=180.0, step=0.1, round=False)
+            ],
+            outputs=[
+                io.Conditioning.Output(display_name="positive"),
+                io.Conditioning.Output(display_name="negative"),
+                io.Latent.Output(display_name="latent")
+            ]
+        )

-    FUNCTION = "encode"
-
-    CATEGORY = "conditioning/3d_models"
-
-    def encode(self, clip_vision, init_image, vae, width, height, batch_size, elevation, azimuth, elevation_batch_increment, azimuth_batch_increment):
+    @classmethod
+    def execute(cls, clip_vision, init_image, vae, width, height, batch_size, elevation, azimuth, elevation_batch_increment, azimuth_batch_increment) -> io.NodeOutput:
        output = clip_vision.encode_image(init_image)
        pooled = output.image_embeds.unsqueeze(0)
        pixels = comfy.utils.common_upscale(init_image.movedim(-1,1), width, height, "bilinear", "center").movedim(1,-1)
@ -93,27 +105,32 @@ class StableZero123_Conditioning_Batched:
        positive = [[cond, {"concat_latent_image": t}]]
        negative = [[torch.zeros_like(pooled), {"concat_latent_image": torch.zeros_like(t)}]]
        latent = torch.zeros([batch_size, 4, height // 8, width // 8])
-        return (positive, negative, {"samples":latent, "batch_index": [0] * batch_size})
+        return io.NodeOutput(positive, negative, {"samples":latent, "batch_index": [0] * batch_size})

-class SV3D_Conditioning:
+class SV3D_Conditioning(io.ComfyNode):
    @classmethod
-    def INPUT_TYPES(s):
-        return {"required": { "clip_vision": ("CLIP_VISION",),
-                              "init_image": ("IMAGE",),
-                              "vae": ("VAE",),
-                              "width": ("INT", {"default": 576, "min": 16, "max": nodes.MAX_RESOLUTION, "step": 8}),
-                              "height": ("INT", {"default": 576, "min": 16, "max": nodes.MAX_RESOLUTION, "step": 8}),
-                              "video_frames": ("INT", {"default": 21, "min": 1, "max": 4096}),
-                              "elevation": ("FLOAT", {"default": 0.0, "min": -90.0, "max": 90.0, "step": 0.1, "round": False}),
-                             }}
-    RETURN_TYPES = ("CONDITIONING", "CONDITIONING", "LATENT")
-    RETURN_NAMES = ("positive", "negative", "latent")
+    def define_schema(cls):
+        return io.Schema(
+            node_id="SV3D_Conditioning",
+            category="conditioning/3d_models",
+            inputs=[
+                io.ClipVision.Input("clip_vision"),
+                io.Image.Input("init_image"),
+                io.Vae.Input("vae"),
+                io.Int.Input("width", default=576, min=16, max=nodes.MAX_RESOLUTION, step=8),
+                io.Int.Input("height", default=576, min=16, max=nodes.MAX_RESOLUTION, step=8),
+                io.Int.Input("video_frames", default=21, min=1, max=4096),
+                io.Float.Input("elevation", default=0.0, min=-90.0, max=90.0, step=0.1, round=False)
+            ],
+            outputs=[
+                io.Conditioning.Output(display_name="positive"),
+                io.Conditioning.Output(display_name="negative"),
+                io.Latent.Output(display_name="latent")
+            ]
+        )

-    FUNCTION = "encode"
-
-    CATEGORY = "conditioning/3d_models"
-
-    def encode(self, clip_vision, init_image, vae, width, height, video_frames, elevation):
+    @classmethod
+    def execute(cls, clip_vision, init_image, vae, width, height, video_frames, elevation) -> io.NodeOutput:
        output = clip_vision.encode_image(init_image)
        pooled = output.image_embeds.unsqueeze(0)
        pixels = comfy.utils.common_upscale(init_image.movedim(-1,1), width, height, "bilinear", "center").movedim(1,-1)
@ -133,11 +150,17 @@ class SV3D_Conditioning:
        positive = [[pooled, {"concat_latent_image": t, "elevation": elevations, "azimuth": azimuths}]]
        negative = [[torch.zeros_like(pooled), {"concat_latent_image": torch.zeros_like(t), "elevation": elevations, "azimuth": azimuths}]]
        latent = torch.zeros([video_frames, 4, height // 8, width // 8])
-        return (positive, negative, {"samples":latent})
+        return io.NodeOutput(positive, negative, {"samples":latent})


-NODE_CLASS_MAPPINGS = {
-    "StableZero123_Conditioning": StableZero123_Conditioning,
-    "StableZero123_Conditioning_Batched": StableZero123_Conditioning_Batched,
-    "SV3D_Conditioning": SV3D_Conditioning,
-}
+class Stable3DExtension(ComfyExtension):
+    @override
+    async def get_node_list(self) -> list[type[io.ComfyNode]]:
+        return [
+            StableZero123_Conditioning,
+            StableZero123_Conditioning_Batched,
+            SV3D_Conditioning,
+        ]
+
+async def comfy_entrypoint() -> Stable3DExtension:
+    return Stable3DExtension()
--- a/pyproject.toml
+++ b/pyproject.toml
@ -70,7 +70,5 @@ messages_control.disable = [
  "invalid-overridden-method",
  "unused-variable",
  "pointless-string-statement",
-  "inconsistent-return-statements",
-  "import-outside-toplevel",
  "redefined-outer-name",
 ]