Merge upstream/master, keep local README.md

This commit is contained in:
GitHub Actions 2025-10-22 00:35:30 +00:00
commit 9bb5b71d6d
15 changed files with 249 additions and 82 deletions

View File

@ -189,15 +189,15 @@ class ChromaRadiance(Chroma):
nerf_pixels = nn.functional.unfold(img_orig, kernel_size=patch_size, stride=patch_size)
nerf_pixels = nerf_pixels.transpose(1, 2) # -> [B, NumPatches, C * P * P]
# Reshape for per-patch processing
nerf_hidden = img_out.reshape(B * num_patches, params.hidden_size)
nerf_pixels = nerf_pixels.reshape(B * num_patches, C, patch_size**2).transpose(1, 2)
if params.nerf_tile_size > 0 and num_patches > params.nerf_tile_size:
# Enable tiling if nerf_tile_size isn't 0 and we actually have more patches than
# the tile size.
img_dct = self.forward_tiled_nerf(img_out, nerf_pixels, B, C, num_patches, patch_size, params)
img_dct = self.forward_tiled_nerf(nerf_hidden, nerf_pixels, B, C, num_patches, patch_size, params)
else:
# Reshape for per-patch processing
nerf_hidden = img_out.reshape(B * num_patches, params.hidden_size)
nerf_pixels = nerf_pixels.reshape(B * num_patches, C, patch_size**2).transpose(1, 2)
# Get DCT-encoded pixel embeddings [pixel-dct]
img_dct = self.nerf_image_embedder(nerf_pixels)
@ -240,17 +240,8 @@ class ChromaRadiance(Chroma):
end = min(i + tile_size, num_patches)
# Slice the current tile from the input tensors
nerf_hidden_tile = nerf_hidden[:, i:end, :]
nerf_pixels_tile = nerf_pixels[:, i:end, :]
# Get the actual number of patches in this tile (can be smaller for the last tile)
num_patches_tile = nerf_hidden_tile.shape[1]
# Reshape the tile for per-patch processing
# [B, NumPatches_tile, D] -> [B * NumPatches_tile, D]
nerf_hidden_tile = nerf_hidden_tile.reshape(batch * num_patches_tile, params.hidden_size)
# [B, NumPatches_tile, C*P*P] -> [B*NumPatches_tile, C, P*P] -> [B*NumPatches_tile, P*P, C]
nerf_pixels_tile = nerf_pixels_tile.reshape(batch * num_patches_tile, channels, patch_size**2).transpose(1, 2)
nerf_hidden_tile = nerf_hidden[i * batch:end * batch]
nerf_pixels_tile = nerf_pixels[i * batch:end * batch]
# get DCT-encoded pixel embeddings [pixel-dct]
img_dct_tile = self.nerf_image_embedder(nerf_pixels_tile)

View File

@ -213,7 +213,7 @@ def detect_unet_config(state_dict, key_prefix, metadata=None):
dit_config["nerf_mlp_ratio"] = 4
dit_config["nerf_depth"] = 4
dit_config["nerf_max_freqs"] = 8
dit_config["nerf_tile_size"] = 32
dit_config["nerf_tile_size"] = 512
dit_config["nerf_final_head_type"] = "conv" if f"{key_prefix}nerf_final_layer_conv.norm.scale" in state_dict_keys else "linear"
dit_config["nerf_embedder_dtype"] = torch.float32
else:

View File

@ -330,14 +330,21 @@ except:
SUPPORT_FP8_OPS = args.supports_fp8_compute
AMD_RDNA2_AND_OLDER_ARCH = ["gfx1030", "gfx1031", "gfx1010", "gfx1011", "gfx1012", "gfx906", "gfx900", "gfx803"]
try:
if is_amd():
torch.backends.cudnn.enabled = False # Seems to improve things a lot on AMD
arch = torch.cuda.get_device_properties(get_torch_device()).gcnArchName
if not (any((a in arch) for a in AMD_RDNA2_AND_OLDER_ARCH)):
torch.backends.cudnn.enabled = False # Seems to improve things a lot on AMD
logging.info("Set: torch.backends.cudnn.enabled = False for better AMD performance.")
try:
rocm_version = tuple(map(int, str(torch.version.hip).split(".")[:2]))
except:
rocm_version = (6, -1)
arch = torch.cuda.get_device_properties(get_torch_device()).gcnArchName
logging.info("AMD arch: {}".format(arch))
logging.info("ROCm version: {}".format(rocm_version))
if args.use_split_cross_attention == False and args.use_quad_cross_attention == False:
@ -349,7 +356,7 @@ try:
if any((a in arch) for a in ["gfx1201"]):
ENABLE_PYTORCH_ATTENTION = True
if torch_version_numeric >= (2, 7) and rocm_version >= (6, 4):
if any((a in arch) for a in ["gfx1200", "gfx1201", "gfx942", "gfx950"]): # TODO: more arches
if any((a in arch) for a in ["gfx1200", "gfx1201", "gfx950"]): # TODO: more arches, "gfx942" gives error on pytorch nightly 2.10 1013 rocm7.0
SUPPORT_FP8_OPS = True
except:
@ -371,6 +378,9 @@ try:
except:
pass
if torch.cuda.is_available() and torch.backends.cudnn.is_available() and PerformanceFeature.AutoTune in args.fast:
torch.backends.cudnn.benchmark = True
try:
if torch_version_numeric >= (2, 5):
torch.backends.cuda.allow_fp16_bf16_reduction_math_sdp(True)
@ -1327,7 +1337,7 @@ def should_use_bf16(device=None, model_params=0, prioritize_performance=True, ma
if is_amd():
arch = torch.cuda.get_device_properties(device).gcnArchName
if any((a in arch) for a in ["gfx1030", "gfx1031", "gfx1010", "gfx1011", "gfx1012", "gfx906", "gfx900", "gfx803"]): # RDNA2 and older don't support bf16
if any((a in arch) for a in AMD_RDNA2_AND_OLDER_ARCH): # RDNA2 and older don't support bf16
if manual_cast:
return True
return False

View File

@ -25,6 +25,9 @@ import comfy.rmsnorm
import contextlib
def run_every_op():
if torch.compiler.is_compiling():
return
comfy.model_management.throw_exception_if_processing_interrupted()
def scaled_dot_product_attention(q, k, v, *args, **kwargs):
@ -52,14 +55,22 @@ try:
except (ModuleNotFoundError, TypeError):
logging.warning("Could not set sdpa backend priority.")
cast_to = comfy.model_management.cast_to #TODO: remove once no more references
NVIDIA_MEMORY_CONV_BUG_WORKAROUND = False
try:
if comfy.model_management.is_nvidia():
if torch.backends.cudnn.version() >= 91002 and comfy.model_management.torch_version_numeric >= (2, 9) and comfy.model_management.torch_version_numeric <= (2, 10):
#TODO: change upper bound version once it's fixed'
NVIDIA_MEMORY_CONV_BUG_WORKAROUND = True
logging.info("working around nvidia conv3d memory bug.")
except:
pass
if torch.cuda.is_available() and torch.backends.cudnn.is_available() and PerformanceFeature.AutoTune in args.fast:
torch.backends.cudnn.benchmark = True
cast_to = comfy.model_management.cast_to #TODO: remove once no more references
def cast_to_input(weight, input, non_blocking=False, copy=True):
return comfy.model_management.cast_to(weight, input.dtype, input.device, non_blocking=non_blocking, copy=copy)
@torch.compiler.disable()
def cast_bias_weight(s, input=None, dtype=None, device=None, bias_dtype=None):
if input is not None:
if dtype is None:
@ -151,6 +162,15 @@ class disable_weight_init:
def reset_parameters(self):
return None
def _conv_forward(self, input, weight, bias, *args, **kwargs):
if NVIDIA_MEMORY_CONV_BUG_WORKAROUND and weight.dtype in (torch.float16, torch.bfloat16):
out = torch.cudnn_convolution(input, weight, self.padding, self.stride, self.dilation, self.groups, benchmark=False, deterministic=False, allow_tf32=True)
if bias is not None:
out += bias.reshape((1, -1) + (1,) * (out.ndim - 2))
return out
else:
return super()._conv_forward(input, weight, bias, *args, **kwargs)
def forward_comfy_cast_weights(self, input):
weight, bias = cast_bias_weight(self, input)
return self._conv_forward(input, weight, bias)

View File

@ -150,7 +150,7 @@ def merge_nested_dicts(dict1: dict, dict2: dict, copy_dict1=True):
for key, value in dict2.items():
if isinstance(value, dict):
curr_value = merged_dict.setdefault(key, {})
merged_dict[key] = merge_nested_dicts(value, curr_value)
merged_dict[key] = merge_nested_dicts(curr_value, value)
elif isinstance(value, list):
merged_dict.setdefault(key, []).extend(value)
else:

View File

@ -306,17 +306,10 @@ def _calc_cond_batch(model: BaseModel, conds: list[list[dict]], x_in: torch.Tens
copy_dict1=False)
if patches is not None:
# TODO: replace with merge_nested_dicts function
if "patches" in transformer_options:
cur_patches = transformer_options["patches"].copy()
for p in patches:
if p in cur_patches:
cur_patches[p] = cur_patches[p] + patches[p]
else:
cur_patches[p] = patches[p]
transformer_options["patches"] = cur_patches
else:
transformer_options["patches"] = patches
transformer_options["patches"] = comfy.patcher_extension.merge_nested_dicts(
transformer_options.get("patches", {}),
patches
)
transformer_options["cond_or_uncond"] = cond_or_uncond[:]
transformer_options["uuids"] = uuids[:]

View File

@ -27,6 +27,13 @@ from comfy_api_nodes.apinode_utils import (
)
AVERAGE_DURATION_VIDEO_GEN = 32
MODELS_MAP = {
"veo-2.0-generate-001": "veo-2.0-generate-001",
"veo-3.1-generate": "veo-3.1-generate-preview",
"veo-3.1-fast-generate": "veo-3.1-fast-generate-preview",
"veo-3.0-generate-001": "veo-3.0-generate-001",
"veo-3.0-fast-generate-001": "veo-3.0-fast-generate-001",
}
def convert_image_to_base64(image: torch.Tensor):
if image is None:
@ -158,6 +165,7 @@ class VeoVideoGenerationNode(IO.ComfyNode):
model="veo-2.0-generate-001",
generate_audio=False,
):
model = MODELS_MAP[model]
# Prepare the instances for the request
instances = []
@ -385,7 +393,9 @@ class Veo3VideoGenerationNode(VeoVideoGenerationNode):
),
IO.Combo.Input(
"model",
options=["veo-3.0-generate-001", "veo-3.0-fast-generate-001"],
options=[
"veo-3.1-generate", "veo-3.1-fast-generate", "veo-3.0-generate-001", "veo-3.0-fast-generate-001"
],
default="veo-3.0-generate-001",
tooltip="Veo 3 model to use for video generation",
optional=True,

View File

@ -1,20 +1,26 @@
from comfy.cldm.control_types import UNION_CONTROLNET_TYPES
import nodes
import comfy.utils
from typing_extensions import override
from comfy_api.latest import ComfyExtension, io
class SetUnionControlNetType:
class SetUnionControlNetType(io.ComfyNode):
@classmethod
def INPUT_TYPES(s):
return {"required": {"control_net": ("CONTROL_NET", ),
"type": (["auto"] + list(UNION_CONTROLNET_TYPES.keys()),)
}}
def define_schema(cls):
return io.Schema(
node_id="SetUnionControlNetType",
category="conditioning/controlnet",
inputs=[
io.ControlNet.Input("control_net"),
io.Combo.Input("type", options=["auto"] + list(UNION_CONTROLNET_TYPES.keys())),
],
outputs=[
io.ControlNet.Output(),
],
)
CATEGORY = "conditioning/controlnet"
RETURN_TYPES = ("CONTROL_NET",)
FUNCTION = "set_controlnet_type"
def set_controlnet_type(self, control_net, type):
@classmethod
def execute(cls, control_net, type) -> io.NodeOutput:
control_net = control_net.copy()
type_number = UNION_CONTROLNET_TYPES.get(type, -1)
if type_number >= 0:
@ -22,27 +28,36 @@ class SetUnionControlNetType:
else:
control_net.set_extra_arg("control_type", [])
return (control_net,)
return io.NodeOutput(control_net)
class ControlNetInpaintingAliMamaApply(nodes.ControlNetApplyAdvanced):
set_controlnet_type = execute # TODO: remove
class ControlNetInpaintingAliMamaApply(io.ComfyNode):
@classmethod
def INPUT_TYPES(s):
return {"required": {"positive": ("CONDITIONING", ),
"negative": ("CONDITIONING", ),
"control_net": ("CONTROL_NET", ),
"vae": ("VAE", ),
"image": ("IMAGE", ),
"mask": ("MASK", ),
"strength": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 10.0, "step": 0.01}),
"start_percent": ("FLOAT", {"default": 0.0, "min": 0.0, "max": 1.0, "step": 0.001}),
"end_percent": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.001})
}}
def define_schema(cls):
return io.Schema(
node_id="ControlNetInpaintingAliMamaApply",
category="conditioning/controlnet",
inputs=[
io.Conditioning.Input("positive"),
io.Conditioning.Input("negative"),
io.ControlNet.Input("control_net"),
io.Vae.Input("vae"),
io.Image.Input("image"),
io.Mask.Input("mask"),
io.Float.Input("strength", default=1.0, min=0.0, max=10.0, step=0.01),
io.Float.Input("start_percent", default=0.0, min=0.0, max=1.0, step=0.001),
io.Float.Input("end_percent", default=1.0, min=0.0, max=1.0, step=0.001),
],
outputs=[
io.Conditioning.Output(display_name="positive"),
io.Conditioning.Output(display_name="negative"),
],
)
FUNCTION = "apply_inpaint_controlnet"
CATEGORY = "conditioning/controlnet"
def apply_inpaint_controlnet(self, positive, negative, control_net, vae, image, mask, strength, start_percent, end_percent):
@classmethod
def execute(cls, positive, negative, control_net, vae, image, mask, strength, start_percent, end_percent) -> io.NodeOutput:
extra_concat = []
if control_net.concat_mask:
mask = 1.0 - mask.reshape((-1, 1, mask.shape[-2], mask.shape[-1]))
@ -50,11 +65,20 @@ class ControlNetInpaintingAliMamaApply(nodes.ControlNetApplyAdvanced):
image = image * mask_apply.movedim(1, -1).repeat(1, 1, 1, image.shape[3])
extra_concat = [mask]
return self.apply_controlnet(positive, negative, control_net, image, strength, start_percent, end_percent, vae=vae, extra_concat=extra_concat)
result = nodes.ControlNetApplyAdvanced().apply_controlnet(positive, negative, control_net, image, strength, start_percent, end_percent, vae=vae, extra_concat=extra_concat)
return io.NodeOutput(result[0], result[1])
apply_inpaint_controlnet = execute # TODO: remove
class ControlNetExtension(ComfyExtension):
@override
async def get_node_list(self) -> list[type[io.ComfyNode]]:
return [
SetUnionControlNetType,
ControlNetInpaintingAliMamaApply,
]
NODE_CLASS_MAPPINGS = {
"SetUnionControlNetType": SetUnionControlNetType,
"ControlNetInpaintingAliMamaApply": ControlNetInpaintingAliMamaApply,
}
async def comfy_entrypoint() -> ControlNetExtension:
return ControlNetExtension()

View File

@ -244,6 +244,8 @@ class EasyCacheHolder:
self.total_steps_skipped += 1
batch_offset = x.shape[0] // len(uuids)
for i, uuid in enumerate(uuids):
# slice out only what is relevant to this cond
batch_slice = [slice(i*batch_offset,(i+1)*batch_offset)]
# if cached dims don't match x dims, cut off excess and hope for the best (cosmos world2video)
if x.shape[1:] != self.uuid_cache_diffs[uuid].shape[1:]:
if not self.allow_mismatch:
@ -261,9 +263,8 @@ class EasyCacheHolder:
slicing.append(slice(None, dim_u))
else:
slicing.append(slice(None))
slicing = [slice(i*batch_offset,(i+1)*batch_offset)] + slicing
x = x[slicing]
x += self.uuid_cache_diffs[uuid].to(x.device)
batch_slice = batch_slice + slicing
x[batch_slice] += self.uuid_cache_diffs[uuid].to(x.device)
return x
def update_cache_diff(self, output: torch.Tensor, x: torch.Tensor, uuids: list[UUID]):

View File

@ -1,5 +1,7 @@
import torch
from typing_extensions import override
from comfy.k_diffusion.sampling import sigma_to_half_log_snr
from comfy_api.latest import ComfyExtension, io
@ -63,12 +65,105 @@ class EpsilonScaling(io.ComfyNode):
return io.NodeOutput(model_clone)
def compute_tsr_rescaling_factor(
snr: torch.Tensor, tsr_k: float, tsr_variance: float
) -> torch.Tensor:
"""Compute the rescaling score ratio in Temporal Score Rescaling.
See equation (6) in https://arxiv.org/pdf/2510.01184v1.
"""
posinf_mask = torch.isposinf(snr)
rescaling_factor = (snr * tsr_variance + 1) / (snr * tsr_variance / tsr_k + 1)
return torch.where(posinf_mask, tsr_k, rescaling_factor) # when snr → inf, r = tsr_k
class TemporalScoreRescaling(io.ComfyNode):
@classmethod
def define_schema(cls):
return io.Schema(
node_id="TemporalScoreRescaling",
display_name="TSR - Temporal Score Rescaling",
category="model_patches/unet",
inputs=[
io.Model.Input("model"),
io.Float.Input(
"tsr_k",
tooltip=(
"Controls the rescaling strength.\n"
"Lower k produces more detailed results; higher k produces smoother results in image generation. Setting k = 1 disables rescaling."
),
default=0.95,
min=0.01,
max=100.0,
step=0.001,
display_mode=io.NumberDisplay.number,
),
io.Float.Input(
"tsr_sigma",
tooltip=(
"Controls how early rescaling takes effect.\n"
"Larger values take effect earlier."
),
default=1.0,
min=0.01,
max=100.0,
step=0.001,
display_mode=io.NumberDisplay.number,
),
],
outputs=[
io.Model.Output(
display_name="patched_model",
),
],
description=(
"[Post-CFG Function]\n"
"TSR - Temporal Score Rescaling (2510.01184)\n\n"
"Rescaling the model's score or noise to steer the sampling diversity.\n"
),
)
@classmethod
def execute(cls, model, tsr_k, tsr_sigma) -> io.NodeOutput:
tsr_variance = tsr_sigma**2
def temporal_score_rescaling(args):
denoised = args["denoised"]
x = args["input"]
sigma = args["sigma"]
curr_model = args["model"]
# No rescaling (r = 1) or no noise
if tsr_k == 1 or sigma == 0:
return denoised
model_sampling = curr_model.current_patcher.get_model_object("model_sampling")
half_log_snr = sigma_to_half_log_snr(sigma, model_sampling)
snr = (2 * half_log_snr).exp()
# No rescaling needed (r = 1)
if snr == 0:
return denoised
rescaling_r = compute_tsr_rescaling_factor(snr, tsr_k, tsr_variance)
# Derived from scaled_denoised = (x - r * sigma * noise) / alpha
alpha = sigma * half_log_snr.exp()
return torch.lerp(x / alpha, denoised, rescaling_r)
m = model.clone()
m.set_model_sampler_post_cfg_function(temporal_score_rescaling)
return io.NodeOutput(m)
class EpsilonScalingExtension(ComfyExtension):
@override
async def get_node_list(self) -> list[type[io.ComfyNode]]:
return [
EpsilonScaling,
TemporalScoreRescaling,
]
async def comfy_entrypoint() -> EpsilonScalingExtension:
return EpsilonScalingExtension()

View File

@ -1,3 +1,3 @@
# This file is automatically generated by the build process when version is
# updated in pyproject.toml.
__version__ = "0.3.65"
__version__ = "0.3.66"

View File

@ -1,6 +1,6 @@
import os
import importlib.util
from comfy.cli_args import args
from comfy.cli_args import args, PerformanceFeature
import subprocess
#Can't use pytorch to get the GPU names because the cuda malloc has to be set before the first import.
@ -75,8 +75,9 @@ if not args.cuda_malloc:
spec.loader.exec_module(module)
version = module.__version__
if int(version[0]) >= 2 and "+cu" in version: #enable by default for torch version 2.0 and up only on cuda torch
args.cuda_malloc = cuda_malloc_supported()
if int(version[0]) >= 2 and "+cu" in version: # enable by default for torch version 2.0 and up only on cuda torch
if PerformanceFeature.AutoTune not in args.fast: # Autotune has issues with cuda malloc
args.cuda_malloc = cuda_malloc_supported()
except:
pass

View File

@ -1,6 +1,6 @@
[project]
name = "ComfyUI"
version = "0.3.65"
version = "0.3.66"
readme = "README.md"
license = { file = "LICENSE" }
requires-python = ">=3.9"

View File

@ -1,5 +1,5 @@
comfyui-frontend-package==1.27.10
comfyui-workflow-templates==0.1.95
comfyui-frontend-package==1.28.7
comfyui-workflow-templates==0.2.1
comfyui-embedded-docs==0.3.0
torch
torchsde

View File

@ -48,6 +48,28 @@ async def send_socket_catch_exception(function, message):
except (aiohttp.ClientError, aiohttp.ClientPayloadError, ConnectionResetError, BrokenPipeError, ConnectionError) as err:
logging.warning("send error: {}".format(err))
# Track deprecated paths that have been warned about to only warn once per file
_deprecated_paths_warned = set()
@web.middleware
async def deprecation_warning(request: web.Request, handler):
"""Middleware to warn about deprecated frontend API paths"""
path = request.path
if path.startswith("/scripts/ui") or path.startswith("/extensions/core/"):
# Only warn once per unique file path
if path not in _deprecated_paths_warned:
_deprecated_paths_warned.add(path)
logging.warning(
f"[DEPRECATION WARNING] Detected import of deprecated legacy API: {path}. "
f"This is likely caused by a custom node extension using outdated APIs. "
f"Please update your extensions or contact the extension author for an updated version."
)
response: web.Response = await handler(request)
return response
@web.middleware
async def compress_body(request: web.Request, handler):
accept_encoding = request.headers.get("Accept-Encoding", "")
@ -159,7 +181,7 @@ class PromptServer():
self.client_session:Optional[aiohttp.ClientSession] = None
self.number = 0
middlewares = [cache_control]
middlewares = [cache_control, deprecation_warning]
if args.enable_compress_response_body:
middlewares.append(compress_body)