mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2026-01-09 22:00:49 +08:00
Compare commits
19 Commits
d66e5dd864
...
801b9bf9db
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
801b9bf9db | ||
|
|
3cd7b32f1b | ||
|
|
c0c9720d77 | ||
|
|
fc0cb10bcb | ||
|
|
b7d7cc1d49 | ||
|
|
79e94544bd | ||
|
|
ce0000c4f2 | ||
|
|
c5cfb34c07 | ||
|
|
edee33f55e | ||
|
|
2c03884f5f | ||
|
|
6e9ee55cdd | ||
|
|
023cf13721 | ||
|
|
c3566c0d76 | ||
|
|
c3c3e93c5b | ||
|
|
6ffc159bdd | ||
|
|
96e0d0924e | ||
|
|
a5acc660b2 | ||
|
|
5f10b70d9c | ||
|
|
afe05d68ee |
@ -1,3 +1,3 @@
|
|||||||
..\python_embeded\python.exe -s ..\ComfyUI\main.py --windows-standalone-build --disable-api-nodes
|
..\python_embeded\python.exe -s ..\ComfyUI\main.py --windows-standalone-build --disable-api-nodes
|
||||||
echo If you see this and ComfyUI did not start try updating your Nvidia Drivers to the latest.
|
echo If you see this and ComfyUI did not start try updating your Nvidia Drivers to the latest. If you get a c10.dll error you need to install vc redist that you can find: https://aka.ms/vc14/vc_redist.x64.exe
|
||||||
pause
|
pause
|
||||||
|
|||||||
@ -1,3 +1,3 @@
|
|||||||
.\python_embeded\python.exe -s ComfyUI\main.py --windows-standalone-build
|
.\python_embeded\python.exe -s ComfyUI\main.py --windows-standalone-build
|
||||||
echo If you see this and ComfyUI did not start try updating your Nvidia Drivers to the latest.
|
echo If you see this and ComfyUI did not start try updating your Nvidia Drivers to the latest. If you get a c10.dll error you need to install vc redist that you can find: https://aka.ms/vc14/vc_redist.x64.exe
|
||||||
pause
|
pause
|
||||||
|
|||||||
@ -1,3 +1,3 @@
|
|||||||
.\python_embeded\python.exe -s ComfyUI\main.py --windows-standalone-build --fast fp16_accumulation
|
.\python_embeded\python.exe -s ComfyUI\main.py --windows-standalone-build --fast fp16_accumulation
|
||||||
echo If you see this and ComfyUI did not start try updating your Nvidia Drivers to the latest.
|
echo If you see this and ComfyUI did not start try updating your Nvidia Drivers to the latest. If you get a c10.dll error you need to install vc redist that you can find: https://aka.ms/vc14/vc_redist.x64.exe
|
||||||
pause
|
pause
|
||||||
|
|||||||
2
.github/workflows/stable-release.yml
vendored
2
.github/workflows/stable-release.yml
vendored
@ -117,7 +117,7 @@ jobs:
|
|||||||
./python.exe get-pip.py
|
./python.exe get-pip.py
|
||||||
./python.exe -s -m pip install ../${{ inputs.cache_tag }}_python_deps/*
|
./python.exe -s -m pip install ../${{ inputs.cache_tag }}_python_deps/*
|
||||||
|
|
||||||
grep comfyui ../ComfyUI/requirements.txt > ./requirements_comfyui.txt
|
grep comfy ../ComfyUI/requirements.txt > ./requirements_comfyui.txt
|
||||||
./python.exe -s -m pip install -r requirements_comfyui.txt
|
./python.exe -s -m pip install -r requirements_comfyui.txt
|
||||||
rm requirements_comfyui.txt
|
rm requirements_comfyui.txt
|
||||||
|
|
||||||
|
|||||||
2
.github/workflows/test-ci.yml
vendored
2
.github/workflows/test-ci.yml
vendored
@ -20,6 +20,7 @@ jobs:
|
|||||||
test-stable:
|
test-stable:
|
||||||
strategy:
|
strategy:
|
||||||
fail-fast: false
|
fail-fast: false
|
||||||
|
max-parallel: 1 # This forces sequential execution
|
||||||
matrix:
|
matrix:
|
||||||
# os: [macos, linux, windows]
|
# os: [macos, linux, windows]
|
||||||
# os: [macos, linux]
|
# os: [macos, linux]
|
||||||
@ -74,6 +75,7 @@ jobs:
|
|||||||
test-unix-nightly:
|
test-unix-nightly:
|
||||||
strategy:
|
strategy:
|
||||||
fail-fast: false
|
fail-fast: false
|
||||||
|
max-parallel: 1 # This forces sequential execution
|
||||||
matrix:
|
matrix:
|
||||||
# os: [macos, linux]
|
# os: [macos, linux]
|
||||||
os: [linux]
|
os: [linux]
|
||||||
|
|||||||
@ -408,7 +408,9 @@ class LTXV(LatentFormat):
|
|||||||
self.latent_rgb_factors_bias = [-0.0571, -0.1657, -0.2512]
|
self.latent_rgb_factors_bias = [-0.0571, -0.1657, -0.2512]
|
||||||
|
|
||||||
class LTXAV(LTXV):
|
class LTXAV(LTXV):
|
||||||
pass
|
def __init__(self):
|
||||||
|
self.latent_rgb_factors = None
|
||||||
|
self.latent_rgb_factors_bias = None
|
||||||
|
|
||||||
class HunyuanVideo(LatentFormat):
|
class HunyuanVideo(LatentFormat):
|
||||||
latent_channels = 16
|
latent_channels = 16
|
||||||
|
|||||||
@ -4,6 +4,7 @@ from torch import Tensor
|
|||||||
|
|
||||||
from comfy.ldm.modules.attention import optimized_attention
|
from comfy.ldm.modules.attention import optimized_attention
|
||||||
import comfy.model_management
|
import comfy.model_management
|
||||||
|
import logging
|
||||||
|
|
||||||
|
|
||||||
def attention(q: Tensor, k: Tensor, v: Tensor, pe: Tensor, mask=None, transformer_options={}) -> Tensor:
|
def attention(q: Tensor, k: Tensor, v: Tensor, pe: Tensor, mask=None, transformer_options={}) -> Tensor:
|
||||||
@ -13,7 +14,6 @@ def attention(q: Tensor, k: Tensor, v: Tensor, pe: Tensor, mask=None, transforme
|
|||||||
x = optimized_attention(q, k, v, heads, skip_reshape=True, mask=mask, transformer_options=transformer_options)
|
x = optimized_attention(q, k, v, heads, skip_reshape=True, mask=mask, transformer_options=transformer_options)
|
||||||
return x
|
return x
|
||||||
|
|
||||||
|
|
||||||
def rope(pos: Tensor, dim: int, theta: int) -> Tensor:
|
def rope(pos: Tensor, dim: int, theta: int) -> Tensor:
|
||||||
assert dim % 2 == 0
|
assert dim % 2 == 0
|
||||||
if comfy.model_management.is_device_mps(pos.device) or comfy.model_management.is_intel_xpu() or comfy.model_management.is_directml_enabled():
|
if comfy.model_management.is_device_mps(pos.device) or comfy.model_management.is_intel_xpu() or comfy.model_management.is_directml_enabled():
|
||||||
@ -28,13 +28,20 @@ def rope(pos: Tensor, dim: int, theta: int) -> Tensor:
|
|||||||
out = rearrange(out, "b n d (i j) -> b n d i j", i=2, j=2)
|
out = rearrange(out, "b n d (i j) -> b n d i j", i=2, j=2)
|
||||||
return out.to(dtype=torch.float32, device=pos.device)
|
return out.to(dtype=torch.float32, device=pos.device)
|
||||||
|
|
||||||
def apply_rope1(x: Tensor, freqs_cis: Tensor):
|
|
||||||
x_ = x.to(dtype=freqs_cis.dtype).reshape(*x.shape[:-1], -1, 1, 2)
|
|
||||||
|
|
||||||
x_out = freqs_cis[..., 0] * x_[..., 0]
|
try:
|
||||||
x_out.addcmul_(freqs_cis[..., 1], x_[..., 1])
|
import comfy.quant_ops
|
||||||
|
apply_rope = comfy.quant_ops.ck.apply_rope
|
||||||
|
apply_rope1 = comfy.quant_ops.ck.apply_rope1
|
||||||
|
except:
|
||||||
|
logging.warning("No comfy kitchen, using old apply_rope functions.")
|
||||||
|
def apply_rope1(x: Tensor, freqs_cis: Tensor):
|
||||||
|
x_ = x.to(dtype=freqs_cis.dtype).reshape(*x.shape[:-1], -1, 1, 2)
|
||||||
|
|
||||||
return x_out.reshape(*x.shape).type_as(x)
|
x_out = freqs_cis[..., 0] * x_[..., 0]
|
||||||
|
x_out.addcmul_(freqs_cis[..., 1], x_[..., 1])
|
||||||
|
|
||||||
def apply_rope(xq: Tensor, xk: Tensor, freqs_cis: Tensor):
|
return x_out.reshape(*x.shape).type_as(x)
|
||||||
return apply_rope1(xq, freqs_cis), apply_rope1(xk, freqs_cis)
|
|
||||||
|
def apply_rope(xq: Tensor, xk: Tensor, freqs_cis: Tensor):
|
||||||
|
return apply_rope1(xq, freqs_cis), apply_rope1(xk, freqs_cis)
|
||||||
|
|||||||
@ -276,7 +276,7 @@ class Embeddings1DConnector(nn.Module):
|
|||||||
max(1024, hidden_states.shape[1]) / self.num_learnable_registers
|
max(1024, hidden_states.shape[1]) / self.num_learnable_registers
|
||||||
)
|
)
|
||||||
learnable_registers = torch.tile(
|
learnable_registers = torch.tile(
|
||||||
self.learnable_registers, (num_registers_duplications, 1)
|
self.learnable_registers.to(hidden_states), (num_registers_duplications, 1)
|
||||||
)
|
)
|
||||||
|
|
||||||
hidden_states = torch.cat((hidden_states, learnable_registers[hidden_states.shape[1]:].unsqueeze(0).repeat(hidden_states.shape[0], 1, 1)), dim=1)
|
hidden_states = torch.cat((hidden_states, learnable_registers[hidden_states.shape[1]:].unsqueeze(0).repeat(hidden_states.shape[0], 1, 1)), dim=1)
|
||||||
|
|||||||
@ -306,7 +306,10 @@ class BaseModel(torch.nn.Module):
|
|||||||
to_load[k[len(unet_prefix):]] = sd.pop(k)
|
to_load[k[len(unet_prefix):]] = sd.pop(k)
|
||||||
|
|
||||||
to_load = self.model_config.process_unet_state_dict(to_load)
|
to_load = self.model_config.process_unet_state_dict(to_load)
|
||||||
m, u = self.diffusion_model.load_state_dict(to_load, strict=False)
|
# assign=True will reuse the tensor storage in state dict, this will avoid copy and saving CPU memory
|
||||||
|
# when loading large models with mmap.
|
||||||
|
delay_copy_with_assign = utils.MMAP_TORCH_FILES or not utils.DISABLE_MMAP
|
||||||
|
m, u = self.diffusion_model.load_state_dict(to_load, strict=False, assign=delay_copy_with_assign)
|
||||||
if len(m) > 0:
|
if len(m) > 0:
|
||||||
logging.warning("unet missing: {}".format(m))
|
logging.warning("unet missing: {}".format(m))
|
||||||
|
|
||||||
|
|||||||
@ -1504,6 +1504,16 @@ def supports_fp8_compute(device=None):
|
|||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
def supports_nvfp4_compute(device=None):
|
||||||
|
if not is_nvidia():
|
||||||
|
return False
|
||||||
|
|
||||||
|
props = torch.cuda.get_device_properties(device)
|
||||||
|
if props.major < 10:
|
||||||
|
return False
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
def extended_fp16_support():
|
def extended_fp16_support():
|
||||||
# TODO: check why some models work with fp16 on newer torch versions but not on older
|
# TODO: check why some models work with fp16 on newer torch versions but not on older
|
||||||
if torch_version_numeric < (2, 7):
|
if torch_version_numeric < (2, 7):
|
||||||
|
|||||||
25
comfy/ops.py
25
comfy/ops.py
@ -427,12 +427,12 @@ def fp8_linear(self, input):
|
|||||||
input = torch.clamp(input, min=-448, max=448, out=input)
|
input = torch.clamp(input, min=-448, max=448, out=input)
|
||||||
input_fp8 = input.to(dtype).contiguous()
|
input_fp8 = input.to(dtype).contiguous()
|
||||||
layout_params_input = TensorCoreFP8Layout.Params(scale=scale_input, orig_dtype=input_dtype, orig_shape=tuple(input_fp8.shape))
|
layout_params_input = TensorCoreFP8Layout.Params(scale=scale_input, orig_dtype=input_dtype, orig_shape=tuple(input_fp8.shape))
|
||||||
quantized_input = QuantizedTensor(input_fp8, TensorCoreFP8Layout, layout_params_input)
|
quantized_input = QuantizedTensor(input_fp8, "TensorCoreFP8Layout", layout_params_input)
|
||||||
|
|
||||||
# Wrap weight in QuantizedTensor - this enables unified dispatch
|
# Wrap weight in QuantizedTensor - this enables unified dispatch
|
||||||
# Call F.linear - __torch_dispatch__ routes to fp8_linear handler in quant_ops.py!
|
# Call F.linear - __torch_dispatch__ routes to fp8_linear handler in quant_ops.py!
|
||||||
layout_params_weight = TensorCoreFP8Layout.Params(scale=scale_weight, orig_dtype=input_dtype, orig_shape=tuple(w.shape))
|
layout_params_weight = TensorCoreFP8Layout.Params(scale=scale_weight, orig_dtype=input_dtype, orig_shape=tuple(w.shape))
|
||||||
quantized_weight = QuantizedTensor(w, TensorCoreFP8Layout, layout_params_weight)
|
quantized_weight = QuantizedTensor(w, "TensorCoreFP8Layout", layout_params_weight)
|
||||||
o = torch.nn.functional.linear(quantized_input, quantized_weight, bias)
|
o = torch.nn.functional.linear(quantized_input, quantized_weight, bias)
|
||||||
|
|
||||||
uncast_bias_weight(self, w, bias, offload_stream)
|
uncast_bias_weight(self, w, bias, offload_stream)
|
||||||
@ -493,11 +493,12 @@ from .quant_ops import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def mixed_precision_ops(quant_config={}, compute_dtype=torch.bfloat16, full_precision_mm=False):
|
def mixed_precision_ops(quant_config={}, compute_dtype=torch.bfloat16, full_precision_mm=False, disabled=[]):
|
||||||
class MixedPrecisionOps(manual_cast):
|
class MixedPrecisionOps(manual_cast):
|
||||||
_quant_config = quant_config
|
_quant_config = quant_config
|
||||||
_compute_dtype = compute_dtype
|
_compute_dtype = compute_dtype
|
||||||
_full_precision_mm = full_precision_mm
|
_full_precision_mm = full_precision_mm
|
||||||
|
_disabled = disabled
|
||||||
|
|
||||||
class Linear(torch.nn.Module, CastWeightBiasOp):
|
class Linear(torch.nn.Module, CastWeightBiasOp):
|
||||||
def __init__(
|
def __init__(
|
||||||
@ -522,6 +523,7 @@ def mixed_precision_ops(quant_config={}, compute_dtype=torch.bfloat16, full_prec
|
|||||||
|
|
||||||
self.tensor_class = None
|
self.tensor_class = None
|
||||||
self._full_precision_mm = MixedPrecisionOps._full_precision_mm
|
self._full_precision_mm = MixedPrecisionOps._full_precision_mm
|
||||||
|
self._full_precision_mm_config = False
|
||||||
|
|
||||||
def reset_parameters(self):
|
def reset_parameters(self):
|
||||||
return None
|
return None
|
||||||
@ -556,8 +558,12 @@ def mixed_precision_ops(quant_config={}, compute_dtype=torch.bfloat16, full_prec
|
|||||||
self.weight = torch.nn.Parameter(weight.to(device=device, dtype=MixedPrecisionOps._compute_dtype), requires_grad=False)
|
self.weight = torch.nn.Parameter(weight.to(device=device, dtype=MixedPrecisionOps._compute_dtype), requires_grad=False)
|
||||||
else:
|
else:
|
||||||
self.quant_format = layer_conf.get("format", None)
|
self.quant_format = layer_conf.get("format", None)
|
||||||
|
self._full_precision_mm_config = layer_conf.get("full_precision_matrix_mult", False)
|
||||||
if not self._full_precision_mm:
|
if not self._full_precision_mm:
|
||||||
self._full_precision_mm = layer_conf.get("full_precision_matrix_mult", False)
|
self._full_precision_mm = self._full_precision_mm_config
|
||||||
|
|
||||||
|
if self.quant_format in MixedPrecisionOps._disabled:
|
||||||
|
self._full_precision_mm = True
|
||||||
|
|
||||||
if self.quant_format is None:
|
if self.quant_format is None:
|
||||||
raise ValueError(f"Unknown quantization format for layer {layer_name}")
|
raise ValueError(f"Unknown quantization format for layer {layer_name}")
|
||||||
@ -630,7 +636,7 @@ def mixed_precision_ops(quant_config={}, compute_dtype=torch.bfloat16, full_prec
|
|||||||
sd["{}weight_scale".format(prefix)] = self.weight._params.block_scale
|
sd["{}weight_scale".format(prefix)] = self.weight._params.block_scale
|
||||||
|
|
||||||
quant_conf = {"format": self.quant_format}
|
quant_conf = {"format": self.quant_format}
|
||||||
if self._full_precision_mm:
|
if self._full_precision_mm_config:
|
||||||
quant_conf["full_precision_matrix_mult"] = True
|
quant_conf["full_precision_matrix_mult"] = True
|
||||||
sd["{}comfy_quant".format(prefix)] = torch.tensor(list(json.dumps(quant_conf).encode('utf-8')), dtype=torch.uint8)
|
sd["{}comfy_quant".format(prefix)] = torch.tensor(list(json.dumps(quant_conf).encode('utf-8')), dtype=torch.uint8)
|
||||||
return sd
|
return sd
|
||||||
@ -711,10 +717,17 @@ def mixed_precision_ops(quant_config={}, compute_dtype=torch.bfloat16, full_prec
|
|||||||
|
|
||||||
def pick_operations(weight_dtype, compute_dtype, load_device=None, disable_fast_fp8=False, fp8_optimizations=False, model_config=None):
|
def pick_operations(weight_dtype, compute_dtype, load_device=None, disable_fast_fp8=False, fp8_optimizations=False, model_config=None):
|
||||||
fp8_compute = comfy.model_management.supports_fp8_compute(load_device) # TODO: if we support more ops this needs to be more granular
|
fp8_compute = comfy.model_management.supports_fp8_compute(load_device) # TODO: if we support more ops this needs to be more granular
|
||||||
|
nvfp4_compute = comfy.model_management.supports_nvfp4_compute(load_device)
|
||||||
|
|
||||||
if model_config and hasattr(model_config, 'quant_config') and model_config.quant_config:
|
if model_config and hasattr(model_config, 'quant_config') and model_config.quant_config:
|
||||||
logging.info("Using mixed precision operations")
|
logging.info("Using mixed precision operations")
|
||||||
return mixed_precision_ops(model_config.quant_config, compute_dtype, full_precision_mm=not fp8_compute)
|
disabled = set()
|
||||||
|
if not nvfp4_compute:
|
||||||
|
disabled.add("nvfp4")
|
||||||
|
if not fp8_compute:
|
||||||
|
disabled.add("float8_e4m3fn")
|
||||||
|
disabled.add("float8_e5m2")
|
||||||
|
return mixed_precision_ops(model_config.quant_config, compute_dtype, disabled=disabled)
|
||||||
|
|
||||||
if (
|
if (
|
||||||
fp8_compute and
|
fp8_compute and
|
||||||
|
|||||||
@ -13,6 +13,13 @@ try:
|
|||||||
get_layout_class,
|
get_layout_class,
|
||||||
)
|
)
|
||||||
_CK_AVAILABLE = True
|
_CK_AVAILABLE = True
|
||||||
|
if torch.version.cuda is None:
|
||||||
|
ck.registry.disable("cuda")
|
||||||
|
else:
|
||||||
|
cuda_version = tuple(map(int, str(torch.version.cuda).split('.')))
|
||||||
|
if cuda_version < (13,):
|
||||||
|
ck.registry.disable("cuda")
|
||||||
|
|
||||||
ck.registry.disable("triton")
|
ck.registry.disable("triton")
|
||||||
for k, v in ck.list_backends().items():
|
for k, v in ck.list_backends().items():
|
||||||
logging.info(f"Found comfy_kitchen backend {k}: {v}")
|
logging.info(f"Found comfy_kitchen backend {k}: {v}")
|
||||||
|
|||||||
@ -36,10 +36,10 @@ class LTXAVGemmaTokenizer(sd1_clip.SD1Tokenizer):
|
|||||||
|
|
||||||
class Gemma3_12BModel(sd1_clip.SDClipModel):
|
class Gemma3_12BModel(sd1_clip.SDClipModel):
|
||||||
def __init__(self, device="cpu", layer="all", layer_idx=None, dtype=None, attention_mask=True, model_options={}):
|
def __init__(self, device="cpu", layer="all", layer_idx=None, dtype=None, attention_mask=True, model_options={}):
|
||||||
llama_scaled_fp8 = model_options.get("gemma_scaled_fp8", None)
|
llama_quantization_metadata = model_options.get("llama_quantization_metadata", None)
|
||||||
if llama_scaled_fp8 is not None:
|
if llama_quantization_metadata is not None:
|
||||||
model_options = model_options.copy()
|
model_options = model_options.copy()
|
||||||
model_options["scaled_fp8"] = llama_scaled_fp8
|
model_options["quantization_metadata"] = llama_quantization_metadata
|
||||||
|
|
||||||
super().__init__(device=device, layer=layer, layer_idx=layer_idx, textmodel_json_config={}, dtype=dtype, special_tokens={"start": 2, "pad": 0}, layer_norm_hidden_state=False, model_class=comfy.text_encoders.llama.Gemma3_12B, enable_attention_masks=attention_mask, return_attention_masks=attention_mask, model_options=model_options)
|
super().__init__(device=device, layer=layer, layer_idx=layer_idx, textmodel_json_config={}, dtype=dtype, special_tokens={"start": 2, "pad": 0}, layer_norm_hidden_state=False, model_class=comfy.text_encoders.llama.Gemma3_12B, enable_attention_masks=attention_mask, return_attention_masks=attention_mask, model_options=model_options)
|
||||||
|
|
||||||
@ -86,17 +86,19 @@ class LTXAVTEModel(torch.nn.Module):
|
|||||||
)
|
)
|
||||||
|
|
||||||
def set_clip_options(self, options):
|
def set_clip_options(self, options):
|
||||||
|
self.execution_device = options.get("execution_device", self.execution_device)
|
||||||
self.gemma3_12b.set_clip_options(options)
|
self.gemma3_12b.set_clip_options(options)
|
||||||
|
|
||||||
def reset_clip_options(self):
|
def reset_clip_options(self):
|
||||||
self.gemma3_12b.reset_clip_options()
|
self.gemma3_12b.reset_clip_options()
|
||||||
|
self.execution_device = None
|
||||||
|
|
||||||
def encode_token_weights(self, token_weight_pairs):
|
def encode_token_weights(self, token_weight_pairs):
|
||||||
token_weight_pairs = token_weight_pairs["gemma3_12b"]
|
token_weight_pairs = token_weight_pairs["gemma3_12b"]
|
||||||
|
|
||||||
out, pooled, extra = self.gemma3_12b.encode_token_weights(token_weight_pairs)
|
out, pooled, extra = self.gemma3_12b.encode_token_weights(token_weight_pairs)
|
||||||
out_device = out.device
|
out_device = out.device
|
||||||
out = out.movedim(1, -1).to(self.text_embedding_projection.weight.device)
|
out = out.movedim(1, -1).to(self.execution_device)
|
||||||
out = 8.0 * (out - out.mean(dim=(1, 2), keepdim=True)) / (out.amax(dim=(1, 2), keepdim=True) - out.amin(dim=(1, 2), keepdim=True) + 1e-6)
|
out = 8.0 * (out - out.mean(dim=(1, 2), keepdim=True)) / (out.amax(dim=(1, 2), keepdim=True) - out.amin(dim=(1, 2), keepdim=True) + 1e-6)
|
||||||
out = out.reshape((out.shape[0], out.shape[1], -1))
|
out = out.reshape((out.shape[0], out.shape[1], -1))
|
||||||
out = self.text_embedding_projection(out)
|
out = self.text_embedding_projection(out)
|
||||||
@ -117,12 +119,12 @@ class LTXAVTEModel(torch.nn.Module):
|
|||||||
return self.load_state_dict(sdo, strict=False)
|
return self.load_state_dict(sdo, strict=False)
|
||||||
|
|
||||||
|
|
||||||
def ltxav_te(dtype_llama=None, llama_scaled_fp8=None):
|
def ltxav_te(dtype_llama=None, llama_quantization_metadata=None):
|
||||||
class LTXAVTEModel_(LTXAVTEModel):
|
class LTXAVTEModel_(LTXAVTEModel):
|
||||||
def __init__(self, device="cpu", dtype=None, model_options={}):
|
def __init__(self, device="cpu", dtype=None, model_options={}):
|
||||||
if llama_scaled_fp8 is not None and "llama_scaled_fp8" not in model_options:
|
if llama_quantization_metadata is not None:
|
||||||
model_options = model_options.copy()
|
model_options = model_options.copy()
|
||||||
model_options["llama_scaled_fp8"] = llama_scaled_fp8
|
model_options["llama_quantization_metadata"] = llama_quantization_metadata
|
||||||
if dtype_llama is not None:
|
if dtype_llama is not None:
|
||||||
dtype = dtype_llama
|
dtype = dtype_llama
|
||||||
super().__init__(dtype_llama=dtype_llama, device=device, dtype=dtype, model_options=model_options)
|
super().__init__(dtype_llama=dtype_llama, device=device, dtype=dtype, model_options=model_options)
|
||||||
|
|||||||
@ -13,7 +13,9 @@ from comfy_api_nodes.util import (
|
|||||||
poll_op,
|
poll_op,
|
||||||
sync_op,
|
sync_op,
|
||||||
tensor_to_base64_string,
|
tensor_to_base64_string,
|
||||||
|
upload_video_to_comfyapi,
|
||||||
validate_audio_duration,
|
validate_audio_duration,
|
||||||
|
validate_video_duration,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -41,6 +43,12 @@ class Image2VideoInputField(BaseModel):
|
|||||||
audio_url: str | None = Field(None)
|
audio_url: str | None = Field(None)
|
||||||
|
|
||||||
|
|
||||||
|
class Reference2VideoInputField(BaseModel):
|
||||||
|
prompt: str = Field(...)
|
||||||
|
negative_prompt: str | None = Field(None)
|
||||||
|
reference_video_urls: list[str] = Field(...)
|
||||||
|
|
||||||
|
|
||||||
class Txt2ImageParametersField(BaseModel):
|
class Txt2ImageParametersField(BaseModel):
|
||||||
size: str = Field(...)
|
size: str = Field(...)
|
||||||
n: int = Field(1, description="Number of images to generate.") # we support only value=1
|
n: int = Field(1, description="Number of images to generate.") # we support only value=1
|
||||||
@ -76,6 +84,14 @@ class Image2VideoParametersField(BaseModel):
|
|||||||
shot_type: str = Field("single")
|
shot_type: str = Field("single")
|
||||||
|
|
||||||
|
|
||||||
|
class Reference2VideoParametersField(BaseModel):
|
||||||
|
size: str = Field(...)
|
||||||
|
duration: int = Field(5, ge=5, le=15)
|
||||||
|
shot_type: str = Field("single")
|
||||||
|
seed: int = Field(..., ge=0, le=2147483647)
|
||||||
|
watermark: bool = Field(False)
|
||||||
|
|
||||||
|
|
||||||
class Text2ImageTaskCreationRequest(BaseModel):
|
class Text2ImageTaskCreationRequest(BaseModel):
|
||||||
model: str = Field(...)
|
model: str = Field(...)
|
||||||
input: Text2ImageInputField = Field(...)
|
input: Text2ImageInputField = Field(...)
|
||||||
@ -100,6 +116,12 @@ class Image2VideoTaskCreationRequest(BaseModel):
|
|||||||
parameters: Image2VideoParametersField = Field(...)
|
parameters: Image2VideoParametersField = Field(...)
|
||||||
|
|
||||||
|
|
||||||
|
class Reference2VideoTaskCreationRequest(BaseModel):
|
||||||
|
model: str = Field(...)
|
||||||
|
input: Reference2VideoInputField = Field(...)
|
||||||
|
parameters: Reference2VideoParametersField = Field(...)
|
||||||
|
|
||||||
|
|
||||||
class TaskCreationOutputField(BaseModel):
|
class TaskCreationOutputField(BaseModel):
|
||||||
task_id: str = Field(...)
|
task_id: str = Field(...)
|
||||||
task_status: str = Field(...)
|
task_status: str = Field(...)
|
||||||
@ -721,6 +743,143 @@ class WanImageToVideoApi(IO.ComfyNode):
|
|||||||
return IO.NodeOutput(await download_url_to_video_output(response.output.video_url))
|
return IO.NodeOutput(await download_url_to_video_output(response.output.video_url))
|
||||||
|
|
||||||
|
|
||||||
|
class WanReferenceVideoApi(IO.ComfyNode):
|
||||||
|
@classmethod
|
||||||
|
def define_schema(cls):
|
||||||
|
return IO.Schema(
|
||||||
|
node_id="WanReferenceVideoApi",
|
||||||
|
display_name="Wan Reference to Video",
|
||||||
|
category="api node/video/Wan",
|
||||||
|
description="Use the character and voice from input videos, combined with a prompt, "
|
||||||
|
"to generate a new video that maintains character consistency.",
|
||||||
|
inputs=[
|
||||||
|
IO.Combo.Input("model", options=["wan2.6-r2v"]),
|
||||||
|
IO.String.Input(
|
||||||
|
"prompt",
|
||||||
|
multiline=True,
|
||||||
|
default="",
|
||||||
|
tooltip="Prompt describing the elements and visual features. Supports English and Chinese. "
|
||||||
|
"Use identifiers such as `character1` and `character2` to refer to the reference characters.",
|
||||||
|
),
|
||||||
|
IO.String.Input(
|
||||||
|
"negative_prompt",
|
||||||
|
multiline=True,
|
||||||
|
default="",
|
||||||
|
tooltip="Negative prompt describing what to avoid.",
|
||||||
|
),
|
||||||
|
IO.Autogrow.Input(
|
||||||
|
"reference_videos",
|
||||||
|
template=IO.Autogrow.TemplateNames(
|
||||||
|
IO.Video.Input("reference_video"),
|
||||||
|
names=["character1", "character2", "character3"],
|
||||||
|
min=1,
|
||||||
|
),
|
||||||
|
),
|
||||||
|
IO.Combo.Input(
|
||||||
|
"size",
|
||||||
|
options=[
|
||||||
|
"720p: 1:1 (960x960)",
|
||||||
|
"720p: 16:9 (1280x720)",
|
||||||
|
"720p: 9:16 (720x1280)",
|
||||||
|
"720p: 4:3 (1088x832)",
|
||||||
|
"720p: 3:4 (832x1088)",
|
||||||
|
"1080p: 1:1 (1440x1440)",
|
||||||
|
"1080p: 16:9 (1920x1080)",
|
||||||
|
"1080p: 9:16 (1080x1920)",
|
||||||
|
"1080p: 4:3 (1632x1248)",
|
||||||
|
"1080p: 3:4 (1248x1632)",
|
||||||
|
],
|
||||||
|
),
|
||||||
|
IO.Int.Input(
|
||||||
|
"duration",
|
||||||
|
default=5,
|
||||||
|
min=5,
|
||||||
|
max=10,
|
||||||
|
step=5,
|
||||||
|
display_mode=IO.NumberDisplay.slider,
|
||||||
|
),
|
||||||
|
IO.Int.Input(
|
||||||
|
"seed",
|
||||||
|
default=0,
|
||||||
|
min=0,
|
||||||
|
max=2147483647,
|
||||||
|
step=1,
|
||||||
|
display_mode=IO.NumberDisplay.number,
|
||||||
|
control_after_generate=True,
|
||||||
|
),
|
||||||
|
IO.Combo.Input(
|
||||||
|
"shot_type",
|
||||||
|
options=["single", "multi"],
|
||||||
|
tooltip="Specifies the shot type for the generated video, that is, whether the video is a "
|
||||||
|
"single continuous shot or multiple shots with cuts.",
|
||||||
|
),
|
||||||
|
IO.Boolean.Input(
|
||||||
|
"watermark",
|
||||||
|
default=False,
|
||||||
|
tooltip="Whether to add an AI-generated watermark to the result.",
|
||||||
|
),
|
||||||
|
],
|
||||||
|
outputs=[
|
||||||
|
IO.Video.Output(),
|
||||||
|
],
|
||||||
|
hidden=[
|
||||||
|
IO.Hidden.auth_token_comfy_org,
|
||||||
|
IO.Hidden.api_key_comfy_org,
|
||||||
|
IO.Hidden.unique_id,
|
||||||
|
],
|
||||||
|
is_api_node=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
async def execute(
|
||||||
|
cls,
|
||||||
|
model: str,
|
||||||
|
prompt: str,
|
||||||
|
negative_prompt: str,
|
||||||
|
reference_videos: IO.Autogrow.Type,
|
||||||
|
size: str,
|
||||||
|
duration: int,
|
||||||
|
seed: int,
|
||||||
|
shot_type: str,
|
||||||
|
watermark: bool,
|
||||||
|
):
|
||||||
|
reference_video_urls = []
|
||||||
|
for i in reference_videos:
|
||||||
|
validate_video_duration(reference_videos[i], min_duration=2, max_duration=30)
|
||||||
|
for i in reference_videos:
|
||||||
|
reference_video_urls.append(await upload_video_to_comfyapi(cls, reference_videos[i]))
|
||||||
|
width, height = RES_IN_PARENS.search(size).groups()
|
||||||
|
initial_response = await sync_op(
|
||||||
|
cls,
|
||||||
|
ApiEndpoint(path="/proxy/wan/api/v1/services/aigc/video-generation/video-synthesis", method="POST"),
|
||||||
|
response_model=TaskCreationResponse,
|
||||||
|
data=Reference2VideoTaskCreationRequest(
|
||||||
|
model=model,
|
||||||
|
input=Reference2VideoInputField(
|
||||||
|
prompt=prompt, negative_prompt=negative_prompt, reference_video_urls=reference_video_urls
|
||||||
|
),
|
||||||
|
parameters=Reference2VideoParametersField(
|
||||||
|
size=f"{width}*{height}",
|
||||||
|
duration=duration,
|
||||||
|
shot_type=shot_type,
|
||||||
|
watermark=watermark,
|
||||||
|
seed=seed,
|
||||||
|
),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
if not initial_response.output:
|
||||||
|
raise Exception(f"An unknown error occurred: {initial_response.code} - {initial_response.message}")
|
||||||
|
response = await poll_op(
|
||||||
|
cls,
|
||||||
|
ApiEndpoint(path=f"/proxy/wan/api/v1/tasks/{initial_response.output.task_id}"),
|
||||||
|
response_model=VideoTaskStatusResponse,
|
||||||
|
status_extractor=lambda x: x.output.task_status,
|
||||||
|
poll_interval=6,
|
||||||
|
max_poll_attempts=280,
|
||||||
|
)
|
||||||
|
return IO.NodeOutput(await download_url_to_video_output(response.output.video_url))
|
||||||
|
|
||||||
|
|
||||||
class WanApiExtension(ComfyExtension):
|
class WanApiExtension(ComfyExtension):
|
||||||
@override
|
@override
|
||||||
async def get_node_list(self) -> list[type[IO.ComfyNode]]:
|
async def get_node_list(self) -> list[type[IO.ComfyNode]]:
|
||||||
@ -729,6 +888,7 @@ class WanApiExtension(ComfyExtension):
|
|||||||
WanImageToImageApi,
|
WanImageToImageApi,
|
||||||
WanTextToVideoApi,
|
WanTextToVideoApi,
|
||||||
WanImageToVideoApi,
|
WanImageToVideoApi,
|
||||||
|
WanReferenceVideoApi,
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -119,7 +119,7 @@ async def upload_video_to_comfyapi(
|
|||||||
raise ValueError(f"Could not verify video duration from source: {e}") from e
|
raise ValueError(f"Could not verify video duration from source: {e}") from e
|
||||||
|
|
||||||
upload_mime_type = f"video/{container.value.lower()}"
|
upload_mime_type = f"video/{container.value.lower()}"
|
||||||
filename = f"uploaded_video.{container.value.lower()}"
|
filename = f"{uuid.uuid4()}.{container.value.lower()}"
|
||||||
|
|
||||||
# Convert VideoInput to BytesIO using specified container/codec
|
# Convert VideoInput to BytesIO using specified container/codec
|
||||||
video_bytes_io = BytesIO()
|
video_bytes_io = BytesIO()
|
||||||
|
|||||||
@ -1,3 +1,3 @@
|
|||||||
# This file is automatically generated by the build process when version is
|
# This file is automatically generated by the build process when version is
|
||||||
# updated in pyproject.toml.
|
# updated in pyproject.toml.
|
||||||
__version__ = "0.7.0"
|
__version__ = "0.8.0"
|
||||||
|
|||||||
@ -1,6 +1,6 @@
|
|||||||
[project]
|
[project]
|
||||||
name = "ComfyUI"
|
name = "ComfyUI"
|
||||||
version = "0.7.0"
|
version = "0.8.0"
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
license = { file = "LICENSE" }
|
license = { file = "LICENSE" }
|
||||||
requires-python = ">=3.10"
|
requires-python = ">=3.10"
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
comfyui-frontend-package==1.35.9
|
comfyui-frontend-package==1.35.9
|
||||||
comfyui-workflow-templates==0.7.66
|
comfyui-workflow-templates==0.7.67
|
||||||
comfyui-embedded-docs==0.3.1
|
comfyui-embedded-docs==0.3.1
|
||||||
torch
|
torch
|
||||||
torchsde
|
torchsde
|
||||||
@ -21,7 +21,7 @@ psutil
|
|||||||
alembic
|
alembic
|
||||||
SQLAlchemy
|
SQLAlchemy
|
||||||
av>=14.2.0
|
av>=14.2.0
|
||||||
comfy-kitchen>=0.2.0
|
comfy-kitchen>=0.2.3
|
||||||
|
|
||||||
#non essential dependencies:
|
#non essential dependencies:
|
||||||
kornia>=0.7.1
|
kornia>=0.7.1
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user