Merge branch 'master' of https://github.com/siliconflow/ComfyUI into refine_offload

This commit is contained in:
strint 2025-10-22 21:15:28 +08:00
commit f3c673d086
13 changed files with 140 additions and 69 deletions

View File

@ -197,7 +197,7 @@ comfy install
## Manual Install (Windows, Linux) ## Manual Install (Windows, Linux)
Python 3.14 will work if you comment out the `kornia` dependency in the requirements.txt file (breaks the canny node) and install pytorch nightly but it is not recommended. Python 3.14 will work if you comment out the `kornia` dependency in the requirements.txt file (breaks the canny node) but it is not recommended.
Python 3.13 is very well supported. If you have trouble with some custom node dependencies on 3.13 you can try 3.12 Python 3.13 is very well supported. If you have trouble with some custom node dependencies on 3.13 you can try 3.12

View File

@ -189,15 +189,15 @@ class ChromaRadiance(Chroma):
nerf_pixels = nn.functional.unfold(img_orig, kernel_size=patch_size, stride=patch_size) nerf_pixels = nn.functional.unfold(img_orig, kernel_size=patch_size, stride=patch_size)
nerf_pixels = nerf_pixels.transpose(1, 2) # -> [B, NumPatches, C * P * P] nerf_pixels = nerf_pixels.transpose(1, 2) # -> [B, NumPatches, C * P * P]
# Reshape for per-patch processing
nerf_hidden = img_out.reshape(B * num_patches, params.hidden_size)
nerf_pixels = nerf_pixels.reshape(B * num_patches, C, patch_size**2).transpose(1, 2)
if params.nerf_tile_size > 0 and num_patches > params.nerf_tile_size: if params.nerf_tile_size > 0 and num_patches > params.nerf_tile_size:
# Enable tiling if nerf_tile_size isn't 0 and we actually have more patches than # Enable tiling if nerf_tile_size isn't 0 and we actually have more patches than
# the tile size. # the tile size.
img_dct = self.forward_tiled_nerf(img_out, nerf_pixels, B, C, num_patches, patch_size, params) img_dct = self.forward_tiled_nerf(nerf_hidden, nerf_pixels, B, C, num_patches, patch_size, params)
else: else:
# Reshape for per-patch processing
nerf_hidden = img_out.reshape(B * num_patches, params.hidden_size)
nerf_pixels = nerf_pixels.reshape(B * num_patches, C, patch_size**2).transpose(1, 2)
# Get DCT-encoded pixel embeddings [pixel-dct] # Get DCT-encoded pixel embeddings [pixel-dct]
img_dct = self.nerf_image_embedder(nerf_pixels) img_dct = self.nerf_image_embedder(nerf_pixels)
@ -240,17 +240,8 @@ class ChromaRadiance(Chroma):
end = min(i + tile_size, num_patches) end = min(i + tile_size, num_patches)
# Slice the current tile from the input tensors # Slice the current tile from the input tensors
nerf_hidden_tile = nerf_hidden[:, i:end, :] nerf_hidden_tile = nerf_hidden[i * batch:end * batch]
nerf_pixels_tile = nerf_pixels[:, i:end, :] nerf_pixels_tile = nerf_pixels[i * batch:end * batch]
# Get the actual number of patches in this tile (can be smaller for the last tile)
num_patches_tile = nerf_hidden_tile.shape[1]
# Reshape the tile for per-patch processing
# [B, NumPatches_tile, D] -> [B * NumPatches_tile, D]
nerf_hidden_tile = nerf_hidden_tile.reshape(batch * num_patches_tile, params.hidden_size)
# [B, NumPatches_tile, C*P*P] -> [B*NumPatches_tile, C, P*P] -> [B*NumPatches_tile, P*P, C]
nerf_pixels_tile = nerf_pixels_tile.reshape(batch * num_patches_tile, channels, patch_size**2).transpose(1, 2)
# get DCT-encoded pixel embeddings [pixel-dct] # get DCT-encoded pixel embeddings [pixel-dct]
img_dct_tile = self.nerf_image_embedder(nerf_pixels_tile) img_dct_tile = self.nerf_image_embedder(nerf_pixels_tile)

View File

@ -213,7 +213,7 @@ def detect_unet_config(state_dict, key_prefix, metadata=None):
dit_config["nerf_mlp_ratio"] = 4 dit_config["nerf_mlp_ratio"] = 4
dit_config["nerf_depth"] = 4 dit_config["nerf_depth"] = 4
dit_config["nerf_max_freqs"] = 8 dit_config["nerf_max_freqs"] = 8
dit_config["nerf_tile_size"] = 32 dit_config["nerf_tile_size"] = 512
dit_config["nerf_final_head_type"] = "conv" if f"{key_prefix}nerf_final_layer_conv.norm.scale" in state_dict_keys else "linear" dit_config["nerf_final_head_type"] = "conv" if f"{key_prefix}nerf_final_layer_conv.norm.scale" in state_dict_keys else "linear"
dit_config["nerf_embedder_dtype"] = torch.float32 dit_config["nerf_embedder_dtype"] = torch.float32
else: else:

View File

@ -330,14 +330,21 @@ except:
SUPPORT_FP8_OPS = args.supports_fp8_compute SUPPORT_FP8_OPS = args.supports_fp8_compute
AMD_RDNA2_AND_OLDER_ARCH = ["gfx1030", "gfx1031", "gfx1010", "gfx1011", "gfx1012", "gfx906", "gfx900", "gfx803"]
try: try:
if is_amd(): if is_amd():
torch.backends.cudnn.enabled = False # Seems to improve things a lot on AMD arch = torch.cuda.get_device_properties(get_torch_device()).gcnArchName
if not (any((a in arch) for a in AMD_RDNA2_AND_OLDER_ARCH)):
torch.backends.cudnn.enabled = False # Seems to improve things a lot on AMD
logging.info("Set: torch.backends.cudnn.enabled = False for better AMD performance.")
try: try:
rocm_version = tuple(map(int, str(torch.version.hip).split(".")[:2])) rocm_version = tuple(map(int, str(torch.version.hip).split(".")[:2]))
except: except:
rocm_version = (6, -1) rocm_version = (6, -1)
arch = torch.cuda.get_device_properties(get_torch_device()).gcnArchName
logging.info("AMD arch: {}".format(arch)) logging.info("AMD arch: {}".format(arch))
logging.info("ROCm version: {}".format(rocm_version)) logging.info("ROCm version: {}".format(rocm_version))
if args.use_split_cross_attention == False and args.use_quad_cross_attention == False: if args.use_split_cross_attention == False and args.use_quad_cross_attention == False:
@ -371,6 +378,9 @@ try:
except: except:
pass pass
if torch.cuda.is_available() and torch.backends.cudnn.is_available() and PerformanceFeature.AutoTune in args.fast:
torch.backends.cudnn.benchmark = True
try: try:
if torch_version_numeric >= (2, 5): if torch_version_numeric >= (2, 5):
torch.backends.cuda.allow_fp16_bf16_reduction_math_sdp(True) torch.backends.cuda.allow_fp16_bf16_reduction_math_sdp(True)
@ -1358,7 +1368,7 @@ def should_use_bf16(device=None, model_params=0, prioritize_performance=True, ma
if is_amd(): if is_amd():
arch = torch.cuda.get_device_properties(device).gcnArchName arch = torch.cuda.get_device_properties(device).gcnArchName
if any((a in arch) for a in ["gfx1030", "gfx1031", "gfx1010", "gfx1011", "gfx1012", "gfx906", "gfx900", "gfx803"]): # RDNA2 and older don't support bf16 if any((a in arch) for a in AMD_RDNA2_AND_OLDER_ARCH): # RDNA2 and older don't support bf16
if manual_cast: if manual_cast:
return True return True
return False return False

View File

@ -25,6 +25,9 @@ import comfy.rmsnorm
import contextlib import contextlib
def run_every_op(): def run_every_op():
if torch.compiler.is_compiling():
return
comfy.model_management.throw_exception_if_processing_interrupted() comfy.model_management.throw_exception_if_processing_interrupted()
def scaled_dot_product_attention(q, k, v, *args, **kwargs): def scaled_dot_product_attention(q, k, v, *args, **kwargs):
@ -52,14 +55,22 @@ try:
except (ModuleNotFoundError, TypeError): except (ModuleNotFoundError, TypeError):
logging.warning("Could not set sdpa backend priority.") logging.warning("Could not set sdpa backend priority.")
cast_to = comfy.model_management.cast_to #TODO: remove once no more references NVIDIA_MEMORY_CONV_BUG_WORKAROUND = False
try:
if comfy.model_management.is_nvidia():
if torch.backends.cudnn.version() >= 91002 and comfy.model_management.torch_version_numeric >= (2, 9) and comfy.model_management.torch_version_numeric <= (2, 10):
#TODO: change upper bound version once it's fixed'
NVIDIA_MEMORY_CONV_BUG_WORKAROUND = True
logging.info("working around nvidia conv3d memory bug.")
except:
pass
if torch.cuda.is_available() and torch.backends.cudnn.is_available() and PerformanceFeature.AutoTune in args.fast: cast_to = comfy.model_management.cast_to #TODO: remove once no more references
torch.backends.cudnn.benchmark = True
def cast_to_input(weight, input, non_blocking=False, copy=True): def cast_to_input(weight, input, non_blocking=False, copy=True):
return comfy.model_management.cast_to(weight, input.dtype, input.device, non_blocking=non_blocking, copy=copy) return comfy.model_management.cast_to(weight, input.dtype, input.device, non_blocking=non_blocking, copy=copy)
@torch.compiler.disable()
def cast_bias_weight(s, input=None, dtype=None, device=None, bias_dtype=None): def cast_bias_weight(s, input=None, dtype=None, device=None, bias_dtype=None):
if input is not None: if input is not None:
if dtype is None: if dtype is None:
@ -151,6 +162,15 @@ class disable_weight_init:
def reset_parameters(self): def reset_parameters(self):
return None return None
def _conv_forward(self, input, weight, bias, *args, **kwargs):
if NVIDIA_MEMORY_CONV_BUG_WORKAROUND and weight.dtype in (torch.float16, torch.bfloat16):
out = torch.cudnn_convolution(input, weight, self.padding, self.stride, self.dilation, self.groups, benchmark=False, deterministic=False, allow_tf32=True)
if bias is not None:
out += bias.reshape((1, -1) + (1,) * (out.ndim - 2))
return out
else:
return super()._conv_forward(input, weight, bias, *args, **kwargs)
def forward_comfy_cast_weights(self, input): def forward_comfy_cast_weights(self, input):
weight, bias = cast_bias_weight(self, input) weight, bias = cast_bias_weight(self, input)
return self._conv_forward(input, weight, bias) return self._conv_forward(input, weight, bias)

View File

@ -393,7 +393,9 @@ class Veo3VideoGenerationNode(VeoVideoGenerationNode):
), ),
IO.Combo.Input( IO.Combo.Input(
"model", "model",
options=list(MODELS_MAP.keys()), options=[
"veo-3.1-generate", "veo-3.1-fast-generate", "veo-3.0-generate-001", "veo-3.0-fast-generate-001"
],
default="veo-3.0-generate-001", default="veo-3.0-generate-001",
tooltip="Veo 3 model to use for video generation", tooltip="Veo 3 model to use for video generation",
optional=True, optional=True,

View File

@ -1,20 +1,26 @@
from comfy.cldm.control_types import UNION_CONTROLNET_TYPES from comfy.cldm.control_types import UNION_CONTROLNET_TYPES
import nodes import nodes
import comfy.utils import comfy.utils
from typing_extensions import override
from comfy_api.latest import ComfyExtension, io
class SetUnionControlNetType: class SetUnionControlNetType(io.ComfyNode):
@classmethod @classmethod
def INPUT_TYPES(s): def define_schema(cls):
return {"required": {"control_net": ("CONTROL_NET", ), return io.Schema(
"type": (["auto"] + list(UNION_CONTROLNET_TYPES.keys()),) node_id="SetUnionControlNetType",
}} category="conditioning/controlnet",
inputs=[
io.ControlNet.Input("control_net"),
io.Combo.Input("type", options=["auto"] + list(UNION_CONTROLNET_TYPES.keys())),
],
outputs=[
io.ControlNet.Output(),
],
)
CATEGORY = "conditioning/controlnet" @classmethod
RETURN_TYPES = ("CONTROL_NET",) def execute(cls, control_net, type) -> io.NodeOutput:
FUNCTION = "set_controlnet_type"
def set_controlnet_type(self, control_net, type):
control_net = control_net.copy() control_net = control_net.copy()
type_number = UNION_CONTROLNET_TYPES.get(type, -1) type_number = UNION_CONTROLNET_TYPES.get(type, -1)
if type_number >= 0: if type_number >= 0:
@ -22,27 +28,36 @@ class SetUnionControlNetType:
else: else:
control_net.set_extra_arg("control_type", []) control_net.set_extra_arg("control_type", [])
return (control_net,) return io.NodeOutput(control_net)
class ControlNetInpaintingAliMamaApply(nodes.ControlNetApplyAdvanced): set_controlnet_type = execute # TODO: remove
class ControlNetInpaintingAliMamaApply(io.ComfyNode):
@classmethod @classmethod
def INPUT_TYPES(s): def define_schema(cls):
return {"required": {"positive": ("CONDITIONING", ), return io.Schema(
"negative": ("CONDITIONING", ), node_id="ControlNetInpaintingAliMamaApply",
"control_net": ("CONTROL_NET", ), category="conditioning/controlnet",
"vae": ("VAE", ), inputs=[
"image": ("IMAGE", ), io.Conditioning.Input("positive"),
"mask": ("MASK", ), io.Conditioning.Input("negative"),
"strength": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 10.0, "step": 0.01}), io.ControlNet.Input("control_net"),
"start_percent": ("FLOAT", {"default": 0.0, "min": 0.0, "max": 1.0, "step": 0.001}), io.Vae.Input("vae"),
"end_percent": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.001}) io.Image.Input("image"),
}} io.Mask.Input("mask"),
io.Float.Input("strength", default=1.0, min=0.0, max=10.0, step=0.01),
io.Float.Input("start_percent", default=0.0, min=0.0, max=1.0, step=0.001),
io.Float.Input("end_percent", default=1.0, min=0.0, max=1.0, step=0.001),
],
outputs=[
io.Conditioning.Output(display_name="positive"),
io.Conditioning.Output(display_name="negative"),
],
)
FUNCTION = "apply_inpaint_controlnet" @classmethod
def execute(cls, positive, negative, control_net, vae, image, mask, strength, start_percent, end_percent) -> io.NodeOutput:
CATEGORY = "conditioning/controlnet"
def apply_inpaint_controlnet(self, positive, negative, control_net, vae, image, mask, strength, start_percent, end_percent):
extra_concat = [] extra_concat = []
if control_net.concat_mask: if control_net.concat_mask:
mask = 1.0 - mask.reshape((-1, 1, mask.shape[-2], mask.shape[-1])) mask = 1.0 - mask.reshape((-1, 1, mask.shape[-2], mask.shape[-1]))
@ -50,11 +65,20 @@ class ControlNetInpaintingAliMamaApply(nodes.ControlNetApplyAdvanced):
image = image * mask_apply.movedim(1, -1).repeat(1, 1, 1, image.shape[3]) image = image * mask_apply.movedim(1, -1).repeat(1, 1, 1, image.shape[3])
extra_concat = [mask] extra_concat = [mask]
return self.apply_controlnet(positive, negative, control_net, image, strength, start_percent, end_percent, vae=vae, extra_concat=extra_concat) result = nodes.ControlNetApplyAdvanced().apply_controlnet(positive, negative, control_net, image, strength, start_percent, end_percent, vae=vae, extra_concat=extra_concat)
return io.NodeOutput(result[0], result[1])
apply_inpaint_controlnet = execute # TODO: remove
class ControlNetExtension(ComfyExtension):
@override
async def get_node_list(self) -> list[type[io.ComfyNode]]:
return [
SetUnionControlNetType,
ControlNetInpaintingAliMamaApply,
]
NODE_CLASS_MAPPINGS = {
"SetUnionControlNetType": SetUnionControlNetType, async def comfy_entrypoint() -> ControlNetExtension:
"ControlNetInpaintingAliMamaApply": ControlNetInpaintingAliMamaApply, return ControlNetExtension()
}

View File

@ -244,6 +244,8 @@ class EasyCacheHolder:
self.total_steps_skipped += 1 self.total_steps_skipped += 1
batch_offset = x.shape[0] // len(uuids) batch_offset = x.shape[0] // len(uuids)
for i, uuid in enumerate(uuids): for i, uuid in enumerate(uuids):
# slice out only what is relevant to this cond
batch_slice = [slice(i*batch_offset,(i+1)*batch_offset)]
# if cached dims don't match x dims, cut off excess and hope for the best (cosmos world2video) # if cached dims don't match x dims, cut off excess and hope for the best (cosmos world2video)
if x.shape[1:] != self.uuid_cache_diffs[uuid].shape[1:]: if x.shape[1:] != self.uuid_cache_diffs[uuid].shape[1:]:
if not self.allow_mismatch: if not self.allow_mismatch:
@ -261,9 +263,8 @@ class EasyCacheHolder:
slicing.append(slice(None, dim_u)) slicing.append(slice(None, dim_u))
else: else:
slicing.append(slice(None)) slicing.append(slice(None))
slicing = [slice(i*batch_offset,(i+1)*batch_offset)] + slicing batch_slice = batch_slice + slicing
x = x[slicing] x[batch_slice] += self.uuid_cache_diffs[uuid].to(x.device)
x += self.uuid_cache_diffs[uuid].to(x.device)
return x return x
def update_cache_diff(self, output: torch.Tensor, x: torch.Tensor, uuids: list[UUID]): def update_cache_diff(self, output: torch.Tensor, x: torch.Tensor, uuids: list[UUID]):

View File

@ -1,3 +1,3 @@
# This file is automatically generated by the build process when version is # This file is automatically generated by the build process when version is
# updated in pyproject.toml. # updated in pyproject.toml.
__version__ = "0.3.65" __version__ = "0.3.66"

View File

@ -1,6 +1,6 @@
import os import os
import importlib.util import importlib.util
from comfy.cli_args import args from comfy.cli_args import args, PerformanceFeature
import subprocess import subprocess
#Can't use pytorch to get the GPU names because the cuda malloc has to be set before the first import. #Can't use pytorch to get the GPU names because the cuda malloc has to be set before the first import.
@ -75,8 +75,9 @@ if not args.cuda_malloc:
spec.loader.exec_module(module) spec.loader.exec_module(module)
version = module.__version__ version = module.__version__
if int(version[0]) >= 2 and "+cu" in version: #enable by default for torch version 2.0 and up only on cuda torch if int(version[0]) >= 2 and "+cu" in version: # enable by default for torch version 2.0 and up only on cuda torch
args.cuda_malloc = cuda_malloc_supported() if PerformanceFeature.AutoTune not in args.fast: # Autotune has issues with cuda malloc
args.cuda_malloc = cuda_malloc_supported()
except: except:
pass pass

View File

@ -1,6 +1,6 @@
[project] [project]
name = "ComfyUI" name = "ComfyUI"
version = "0.3.65" version = "0.3.66"
readme = "README.md" readme = "README.md"
license = { file = "LICENSE" } license = { file = "LICENSE" }
requires-python = ">=3.9" requires-python = ">=3.9"

View File

@ -1,5 +1,5 @@
comfyui-frontend-package==1.28.7 comfyui-frontend-package==1.28.7
comfyui-workflow-templates==0.1.95 comfyui-workflow-templates==0.2.1
comfyui-embedded-docs==0.3.0 comfyui-embedded-docs==0.3.0
torch torch
torchsde torchsde

View File

@ -48,6 +48,28 @@ async def send_socket_catch_exception(function, message):
except (aiohttp.ClientError, aiohttp.ClientPayloadError, ConnectionResetError, BrokenPipeError, ConnectionError) as err: except (aiohttp.ClientError, aiohttp.ClientPayloadError, ConnectionResetError, BrokenPipeError, ConnectionError) as err:
logging.warning("send error: {}".format(err)) logging.warning("send error: {}".format(err))
# Track deprecated paths that have been warned about to only warn once per file
_deprecated_paths_warned = set()
@web.middleware
async def deprecation_warning(request: web.Request, handler):
"""Middleware to warn about deprecated frontend API paths"""
path = request.path
if path.startswith("/scripts/ui") or path.startswith("/extensions/core/"):
# Only warn once per unique file path
if path not in _deprecated_paths_warned:
_deprecated_paths_warned.add(path)
logging.warning(
f"[DEPRECATION WARNING] Detected import of deprecated legacy API: {path}. "
f"This is likely caused by a custom node extension using outdated APIs. "
f"Please update your extensions or contact the extension author for an updated version."
)
response: web.Response = await handler(request)
return response
@web.middleware @web.middleware
async def compress_body(request: web.Request, handler): async def compress_body(request: web.Request, handler):
accept_encoding = request.headers.get("Accept-Encoding", "") accept_encoding = request.headers.get("Accept-Encoding", "")
@ -159,7 +181,7 @@ class PromptServer():
self.client_session:Optional[aiohttp.ClientSession] = None self.client_session:Optional[aiohttp.ClientSession] = None
self.number = 0 self.number = 0
middlewares = [cache_control] middlewares = [cache_control, deprecation_warning]
if args.enable_compress_response_body: if args.enable_compress_response_body:
middlewares.append(compress_body) middlewares.append(compress_body)