mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2026-04-19 06:52:31 +08:00
Merge upstream/master, keep local README.md
This commit is contained in:
commit
7db5d45147
@ -3,10 +3,13 @@ https://www.amd.com/en/resources/support-articles/release-notes/RN-AMDGPU-WINDOW
|
|||||||
|
|
||||||
HOW TO RUN:
|
HOW TO RUN:
|
||||||
|
|
||||||
if you have a AMD gpu:
|
If you have a AMD gpu:
|
||||||
|
|
||||||
run_amd_gpu.bat
|
run_amd_gpu.bat
|
||||||
|
|
||||||
|
If you have memory issues you can try disabling the smart memory management by running comfyui with:
|
||||||
|
|
||||||
|
run_amd_gpu_disable_smart_memory.bat
|
||||||
|
|
||||||
IF YOU GET A RED ERROR IN THE UI MAKE SURE YOU HAVE A MODEL/CHECKPOINT IN: ComfyUI\models\checkpoints
|
IF YOU GET A RED ERROR IN THE UI MAKE SURE YOU HAVE A MODEL/CHECKPOINT IN: ComfyUI\models\checkpoints
|
||||||
|
|
||||||
|
|||||||
2
.ci/windows_amd_base_files/run_amd_gpu_disable_smart_memory.bat
Executable file
2
.ci/windows_amd_base_files/run_amd_gpu_disable_smart_memory.bat
Executable file
@ -0,0 +1,2 @@
|
|||||||
|
.\python_embeded\python.exe -s ComfyUI\main.py --windows-standalone-build --disable-smart-memory
|
||||||
|
pause
|
||||||
25
.github/workflows/ruff.yml
vendored
25
.github/workflows/ruff.yml
vendored
@ -21,3 +21,28 @@ jobs:
|
|||||||
|
|
||||||
- name: Run Ruff
|
- name: Run Ruff
|
||||||
run: ruff check .
|
run: ruff check .
|
||||||
|
|
||||||
|
pylint:
|
||||||
|
name: Run Pylint
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Checkout repository
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Set up Python
|
||||||
|
uses: actions/setup-python@v4
|
||||||
|
with:
|
||||||
|
python-version: '3.12'
|
||||||
|
|
||||||
|
- name: Install requirements
|
||||||
|
run: |
|
||||||
|
python -m pip install --upgrade pip
|
||||||
|
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
|
||||||
|
pip install -r requirements.txt
|
||||||
|
|
||||||
|
- name: Install Pylint
|
||||||
|
run: pip install pylint
|
||||||
|
|
||||||
|
- name: Run Pylint
|
||||||
|
run: pylint comfy_api_nodes
|
||||||
|
|||||||
@ -23,8 +23,6 @@ class MusicDCAE(torch.nn.Module):
|
|||||||
else:
|
else:
|
||||||
self.source_sample_rate = source_sample_rate
|
self.source_sample_rate = source_sample_rate
|
||||||
|
|
||||||
# self.resampler = torchaudio.transforms.Resample(source_sample_rate, 44100)
|
|
||||||
|
|
||||||
self.transform = transforms.Compose([
|
self.transform = transforms.Compose([
|
||||||
transforms.Normalize(0.5, 0.5),
|
transforms.Normalize(0.5, 0.5),
|
||||||
])
|
])
|
||||||
@ -37,10 +35,6 @@ class MusicDCAE(torch.nn.Module):
|
|||||||
self.scale_factor = 0.1786
|
self.scale_factor = 0.1786
|
||||||
self.shift_factor = -1.9091
|
self.shift_factor = -1.9091
|
||||||
|
|
||||||
def load_audio(self, audio_path):
|
|
||||||
audio, sr = torchaudio.load(audio_path)
|
|
||||||
return audio, sr
|
|
||||||
|
|
||||||
def forward_mel(self, audios):
|
def forward_mel(self, audios):
|
||||||
mels = []
|
mels = []
|
||||||
for i in range(len(audios)):
|
for i in range(len(audios)):
|
||||||
@ -73,10 +67,8 @@ class MusicDCAE(torch.nn.Module):
|
|||||||
latent = self.dcae.encoder(mel.unsqueeze(0))
|
latent = self.dcae.encoder(mel.unsqueeze(0))
|
||||||
latents.append(latent)
|
latents.append(latent)
|
||||||
latents = torch.cat(latents, dim=0)
|
latents = torch.cat(latents, dim=0)
|
||||||
# latent_lengths = (audio_lengths / sr * 44100 / 512 / self.time_dimention_multiple).long()
|
|
||||||
latents = (latents - self.shift_factor) * self.scale_factor
|
latents = (latents - self.shift_factor) * self.scale_factor
|
||||||
return latents
|
return latents
|
||||||
# return latents, latent_lengths
|
|
||||||
|
|
||||||
@torch.no_grad()
|
@torch.no_grad()
|
||||||
def decode(self, latents, audio_lengths=None, sr=None):
|
def decode(self, latents, audio_lengths=None, sr=None):
|
||||||
@ -91,9 +83,7 @@ class MusicDCAE(torch.nn.Module):
|
|||||||
wav = self.vocoder.decode(mels[0]).squeeze(1)
|
wav = self.vocoder.decode(mels[0]).squeeze(1)
|
||||||
|
|
||||||
if sr is not None:
|
if sr is not None:
|
||||||
# resampler = torchaudio.transforms.Resample(44100, sr).to(latents.device).to(latents.dtype)
|
|
||||||
wav = torchaudio.functional.resample(wav, 44100, sr)
|
wav = torchaudio.functional.resample(wav, 44100, sr)
|
||||||
# wav = resampler(wav)
|
|
||||||
else:
|
else:
|
||||||
sr = 44100
|
sr = 44100
|
||||||
pred_wavs.append(wav)
|
pred_wavs.append(wav)
|
||||||
@ -101,7 +91,6 @@ class MusicDCAE(torch.nn.Module):
|
|||||||
if audio_lengths is not None:
|
if audio_lengths is not None:
|
||||||
pred_wavs = [wav[:, :length].cpu() for wav, length in zip(pred_wavs, audio_lengths)]
|
pred_wavs = [wav[:, :length].cpu() for wav, length in zip(pred_wavs, audio_lengths)]
|
||||||
return torch.stack(pred_wavs)
|
return torch.stack(pred_wavs)
|
||||||
# return sr, pred_wavs
|
|
||||||
|
|
||||||
def forward(self, audios, audio_lengths=None, sr=None):
|
def forward(self, audios, audio_lengths=None, sr=None):
|
||||||
latents, latent_lengths = self.encode(audios=audios, audio_lengths=audio_lengths, sr=sr)
|
latents, latent_lengths = self.encode(audios=audios, audio_lengths=audio_lengths, sr=sr)
|
||||||
|
|||||||
@ -1,7 +1,7 @@
|
|||||||
import torch
|
import torch
|
||||||
import torch.nn as nn
|
import torch.nn as nn
|
||||||
import torch.nn.functional as F
|
import torch.nn.functional as F
|
||||||
from comfy.ldm.modules.diffusionmodules.model import ResnetBlock, AttnBlock, VideoConv3d
|
from comfy.ldm.modules.diffusionmodules.model import ResnetBlock, AttnBlock, VideoConv3d, Normalize
|
||||||
import comfy.ops
|
import comfy.ops
|
||||||
import comfy.ldm.models.autoencoder
|
import comfy.ldm.models.autoencoder
|
||||||
ops = comfy.ops.disable_weight_init
|
ops = comfy.ops.disable_weight_init
|
||||||
@ -17,11 +17,12 @@ class RMS_norm(nn.Module):
|
|||||||
return F.normalize(x, dim=1) * self.scale * self.gamma
|
return F.normalize(x, dim=1) * self.scale * self.gamma
|
||||||
|
|
||||||
class DnSmpl(nn.Module):
|
class DnSmpl(nn.Module):
|
||||||
def __init__(self, ic, oc, tds=True):
|
def __init__(self, ic, oc, tds=True, refiner_vae=True, op=VideoConv3d):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
fct = 2 * 2 * 2 if tds else 1 * 2 * 2
|
fct = 2 * 2 * 2 if tds else 1 * 2 * 2
|
||||||
assert oc % fct == 0
|
assert oc % fct == 0
|
||||||
self.conv = VideoConv3d(ic, oc // fct, kernel_size=3)
|
self.conv = op(ic, oc // fct, kernel_size=3, stride=1, padding=1)
|
||||||
|
self.refiner_vae = refiner_vae
|
||||||
|
|
||||||
self.tds = tds
|
self.tds = tds
|
||||||
self.gs = fct * ic // oc
|
self.gs = fct * ic // oc
|
||||||
@ -30,7 +31,7 @@ class DnSmpl(nn.Module):
|
|||||||
r1 = 2 if self.tds else 1
|
r1 = 2 if self.tds else 1
|
||||||
h = self.conv(x)
|
h = self.conv(x)
|
||||||
|
|
||||||
if self.tds:
|
if self.tds and self.refiner_vae:
|
||||||
hf = h[:, :, :1, :, :]
|
hf = h[:, :, :1, :, :]
|
||||||
b, c, f, ht, wd = hf.shape
|
b, c, f, ht, wd = hf.shape
|
||||||
hf = hf.reshape(b, c, f, ht // 2, 2, wd // 2, 2)
|
hf = hf.reshape(b, c, f, ht // 2, 2, wd // 2, 2)
|
||||||
@ -66,6 +67,7 @@ class DnSmpl(nn.Module):
|
|||||||
sc = torch.cat([xf, xn], dim=2)
|
sc = torch.cat([xf, xn], dim=2)
|
||||||
else:
|
else:
|
||||||
b, c, frms, ht, wd = h.shape
|
b, c, frms, ht, wd = h.shape
|
||||||
|
|
||||||
nf = frms // r1
|
nf = frms // r1
|
||||||
h = h.reshape(b, c, nf, r1, ht // 2, 2, wd // 2, 2)
|
h = h.reshape(b, c, nf, r1, ht // 2, 2, wd // 2, 2)
|
||||||
h = h.permute(0, 3, 5, 7, 1, 2, 4, 6)
|
h = h.permute(0, 3, 5, 7, 1, 2, 4, 6)
|
||||||
@ -83,10 +85,11 @@ class DnSmpl(nn.Module):
|
|||||||
|
|
||||||
|
|
||||||
class UpSmpl(nn.Module):
|
class UpSmpl(nn.Module):
|
||||||
def __init__(self, ic, oc, tus=True):
|
def __init__(self, ic, oc, tus=True, refiner_vae=True, op=VideoConv3d):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
fct = 2 * 2 * 2 if tus else 1 * 2 * 2
|
fct = 2 * 2 * 2 if tus else 1 * 2 * 2
|
||||||
self.conv = VideoConv3d(ic, oc * fct, kernel_size=3)
|
self.conv = op(ic, oc * fct, kernel_size=3, stride=1, padding=1)
|
||||||
|
self.refiner_vae = refiner_vae
|
||||||
|
|
||||||
self.tus = tus
|
self.tus = tus
|
||||||
self.rp = fct * oc // ic
|
self.rp = fct * oc // ic
|
||||||
@ -95,7 +98,7 @@ class UpSmpl(nn.Module):
|
|||||||
r1 = 2 if self.tus else 1
|
r1 = 2 if self.tus else 1
|
||||||
h = self.conv(x)
|
h = self.conv(x)
|
||||||
|
|
||||||
if self.tus:
|
if self.tus and self.refiner_vae:
|
||||||
hf = h[:, :, :1, :, :]
|
hf = h[:, :, :1, :, :]
|
||||||
b, c, f, ht, wd = hf.shape
|
b, c, f, ht, wd = hf.shape
|
||||||
nc = c // (2 * 2)
|
nc = c // (2 * 2)
|
||||||
@ -148,43 +151,56 @@ class UpSmpl(nn.Module):
|
|||||||
|
|
||||||
class Encoder(nn.Module):
|
class Encoder(nn.Module):
|
||||||
def __init__(self, in_channels, z_channels, block_out_channels, num_res_blocks,
|
def __init__(self, in_channels, z_channels, block_out_channels, num_res_blocks,
|
||||||
ffactor_spatial, ffactor_temporal, downsample_match_channel=True, **_):
|
ffactor_spatial, ffactor_temporal, downsample_match_channel=True, refiner_vae=True, **_):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.z_channels = z_channels
|
self.z_channels = z_channels
|
||||||
self.block_out_channels = block_out_channels
|
self.block_out_channels = block_out_channels
|
||||||
self.num_res_blocks = num_res_blocks
|
self.num_res_blocks = num_res_blocks
|
||||||
self.conv_in = VideoConv3d(in_channels, block_out_channels[0], 3, 1, 1)
|
self.ffactor_temporal = ffactor_temporal
|
||||||
|
|
||||||
|
self.refiner_vae = refiner_vae
|
||||||
|
if self.refiner_vae:
|
||||||
|
conv_op = VideoConv3d
|
||||||
|
norm_op = RMS_norm
|
||||||
|
else:
|
||||||
|
conv_op = ops.Conv3d
|
||||||
|
norm_op = Normalize
|
||||||
|
|
||||||
|
self.conv_in = conv_op(in_channels, block_out_channels[0], 3, 1, 1)
|
||||||
|
|
||||||
self.down = nn.ModuleList()
|
self.down = nn.ModuleList()
|
||||||
ch = block_out_channels[0]
|
ch = block_out_channels[0]
|
||||||
depth = (ffactor_spatial >> 1).bit_length()
|
depth = (ffactor_spatial >> 1).bit_length()
|
||||||
depth_temporal = ((ffactor_spatial // ffactor_temporal) >> 1).bit_length()
|
depth_temporal = ((ffactor_spatial // self.ffactor_temporal) >> 1).bit_length()
|
||||||
|
|
||||||
for i, tgt in enumerate(block_out_channels):
|
for i, tgt in enumerate(block_out_channels):
|
||||||
stage = nn.Module()
|
stage = nn.Module()
|
||||||
stage.block = nn.ModuleList([ResnetBlock(in_channels=ch if j == 0 else tgt,
|
stage.block = nn.ModuleList([ResnetBlock(in_channels=ch if j == 0 else tgt,
|
||||||
out_channels=tgt,
|
out_channels=tgt,
|
||||||
temb_channels=0,
|
temb_channels=0,
|
||||||
conv_op=VideoConv3d, norm_op=RMS_norm)
|
conv_op=conv_op, norm_op=norm_op)
|
||||||
for j in range(num_res_blocks)])
|
for j in range(num_res_blocks)])
|
||||||
ch = tgt
|
ch = tgt
|
||||||
if i < depth:
|
if i < depth:
|
||||||
nxt = block_out_channels[i + 1] if i + 1 < len(block_out_channels) and downsample_match_channel else ch
|
nxt = block_out_channels[i + 1] if i + 1 < len(block_out_channels) and downsample_match_channel else ch
|
||||||
stage.downsample = DnSmpl(ch, nxt, tds=i >= depth_temporal)
|
stage.downsample = DnSmpl(ch, nxt, tds=i >= depth_temporal, refiner_vae=self.refiner_vae, op=conv_op)
|
||||||
ch = nxt
|
ch = nxt
|
||||||
self.down.append(stage)
|
self.down.append(stage)
|
||||||
|
|
||||||
self.mid = nn.Module()
|
self.mid = nn.Module()
|
||||||
self.mid.block_1 = ResnetBlock(in_channels=ch, out_channels=ch, temb_channels=0, conv_op=VideoConv3d, norm_op=RMS_norm)
|
self.mid.block_1 = ResnetBlock(in_channels=ch, out_channels=ch, temb_channels=0, conv_op=conv_op, norm_op=norm_op)
|
||||||
self.mid.attn_1 = AttnBlock(ch, conv_op=ops.Conv3d, norm_op=RMS_norm)
|
self.mid.attn_1 = AttnBlock(ch, conv_op=ops.Conv3d, norm_op=norm_op)
|
||||||
self.mid.block_2 = ResnetBlock(in_channels=ch, out_channels=ch, temb_channels=0, conv_op=VideoConv3d, norm_op=RMS_norm)
|
self.mid.block_2 = ResnetBlock(in_channels=ch, out_channels=ch, temb_channels=0, conv_op=conv_op, norm_op=norm_op)
|
||||||
|
|
||||||
self.norm_out = RMS_norm(ch)
|
self.norm_out = norm_op(ch)
|
||||||
self.conv_out = VideoConv3d(ch, z_channels << 1, 3, 1, 1)
|
self.conv_out = conv_op(ch, z_channels << 1, 3, 1, 1)
|
||||||
|
|
||||||
self.regul = comfy.ldm.models.autoencoder.DiagonalGaussianRegularizer()
|
self.regul = comfy.ldm.models.autoencoder.DiagonalGaussianRegularizer()
|
||||||
|
|
||||||
def forward(self, x):
|
def forward(self, x):
|
||||||
|
if not self.refiner_vae and x.shape[2] == 1:
|
||||||
|
x = x.expand(-1, -1, self.ffactor_temporal, -1, -1)
|
||||||
|
|
||||||
x = self.conv_in(x)
|
x = self.conv_in(x)
|
||||||
|
|
||||||
for stage in self.down:
|
for stage in self.down:
|
||||||
@ -200,31 +216,42 @@ class Encoder(nn.Module):
|
|||||||
skip = x.view(b, c // grp, grp, t, h, w).mean(2)
|
skip = x.view(b, c // grp, grp, t, h, w).mean(2)
|
||||||
|
|
||||||
out = self.conv_out(F.silu(self.norm_out(x))) + skip
|
out = self.conv_out(F.silu(self.norm_out(x))) + skip
|
||||||
out = self.regul(out)[0]
|
|
||||||
|
|
||||||
out = torch.cat((out[:, :, :1], out), dim=2)
|
if self.refiner_vae:
|
||||||
out = out.permute(0, 2, 1, 3, 4)
|
out = self.regul(out)[0]
|
||||||
b, f_times_2, c, h, w = out.shape
|
|
||||||
out = out.reshape(b, f_times_2 // 2, 2 * c, h, w)
|
out = torch.cat((out[:, :, :1], out), dim=2)
|
||||||
out = out.permute(0, 2, 1, 3, 4).contiguous()
|
out = out.permute(0, 2, 1, 3, 4)
|
||||||
|
b, f_times_2, c, h, w = out.shape
|
||||||
|
out = out.reshape(b, f_times_2 // 2, 2 * c, h, w)
|
||||||
|
out = out.permute(0, 2, 1, 3, 4).contiguous()
|
||||||
|
|
||||||
return out
|
return out
|
||||||
|
|
||||||
class Decoder(nn.Module):
|
class Decoder(nn.Module):
|
||||||
def __init__(self, z_channels, out_channels, block_out_channels, num_res_blocks,
|
def __init__(self, z_channels, out_channels, block_out_channels, num_res_blocks,
|
||||||
ffactor_spatial, ffactor_temporal, upsample_match_channel=True, **_):
|
ffactor_spatial, ffactor_temporal, upsample_match_channel=True, refiner_vae=True, **_):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
block_out_channels = block_out_channels[::-1]
|
block_out_channels = block_out_channels[::-1]
|
||||||
self.z_channels = z_channels
|
self.z_channels = z_channels
|
||||||
self.block_out_channels = block_out_channels
|
self.block_out_channels = block_out_channels
|
||||||
self.num_res_blocks = num_res_blocks
|
self.num_res_blocks = num_res_blocks
|
||||||
|
|
||||||
|
self.refiner_vae = refiner_vae
|
||||||
|
if self.refiner_vae:
|
||||||
|
conv_op = VideoConv3d
|
||||||
|
norm_op = RMS_norm
|
||||||
|
else:
|
||||||
|
conv_op = ops.Conv3d
|
||||||
|
norm_op = Normalize
|
||||||
|
|
||||||
ch = block_out_channels[0]
|
ch = block_out_channels[0]
|
||||||
self.conv_in = VideoConv3d(z_channels, ch, 3)
|
self.conv_in = conv_op(z_channels, ch, kernel_size=3, stride=1, padding=1)
|
||||||
|
|
||||||
self.mid = nn.Module()
|
self.mid = nn.Module()
|
||||||
self.mid.block_1 = ResnetBlock(in_channels=ch, out_channels=ch, temb_channels=0, conv_op=VideoConv3d, norm_op=RMS_norm)
|
self.mid.block_1 = ResnetBlock(in_channels=ch, out_channels=ch, temb_channels=0, conv_op=conv_op, norm_op=norm_op)
|
||||||
self.mid.attn_1 = AttnBlock(ch, conv_op=ops.Conv3d, norm_op=RMS_norm)
|
self.mid.attn_1 = AttnBlock(ch, conv_op=ops.Conv3d, norm_op=norm_op)
|
||||||
self.mid.block_2 = ResnetBlock(in_channels=ch, out_channels=ch, temb_channels=0, conv_op=VideoConv3d, norm_op=RMS_norm)
|
self.mid.block_2 = ResnetBlock(in_channels=ch, out_channels=ch, temb_channels=0, conv_op=conv_op, norm_op=norm_op)
|
||||||
|
|
||||||
self.up = nn.ModuleList()
|
self.up = nn.ModuleList()
|
||||||
depth = (ffactor_spatial >> 1).bit_length()
|
depth = (ffactor_spatial >> 1).bit_length()
|
||||||
@ -235,25 +262,26 @@ class Decoder(nn.Module):
|
|||||||
stage.block = nn.ModuleList([ResnetBlock(in_channels=ch if j == 0 else tgt,
|
stage.block = nn.ModuleList([ResnetBlock(in_channels=ch if j == 0 else tgt,
|
||||||
out_channels=tgt,
|
out_channels=tgt,
|
||||||
temb_channels=0,
|
temb_channels=0,
|
||||||
conv_op=VideoConv3d, norm_op=RMS_norm)
|
conv_op=conv_op, norm_op=norm_op)
|
||||||
for j in range(num_res_blocks + 1)])
|
for j in range(num_res_blocks + 1)])
|
||||||
ch = tgt
|
ch = tgt
|
||||||
if i < depth:
|
if i < depth:
|
||||||
nxt = block_out_channels[i + 1] if i + 1 < len(block_out_channels) and upsample_match_channel else ch
|
nxt = block_out_channels[i + 1] if i + 1 < len(block_out_channels) and upsample_match_channel else ch
|
||||||
stage.upsample = UpSmpl(ch, nxt, tus=i < depth_temporal)
|
stage.upsample = UpSmpl(ch, nxt, tus=i < depth_temporal, refiner_vae=self.refiner_vae, op=conv_op)
|
||||||
ch = nxt
|
ch = nxt
|
||||||
self.up.append(stage)
|
self.up.append(stage)
|
||||||
|
|
||||||
self.norm_out = RMS_norm(ch)
|
self.norm_out = norm_op(ch)
|
||||||
self.conv_out = VideoConv3d(ch, out_channels, 3)
|
self.conv_out = conv_op(ch, out_channels, 3, stride=1, padding=1)
|
||||||
|
|
||||||
def forward(self, z):
|
def forward(self, z):
|
||||||
z = z.permute(0, 2, 1, 3, 4)
|
if self.refiner_vae:
|
||||||
b, f, c, h, w = z.shape
|
z = z.permute(0, 2, 1, 3, 4)
|
||||||
z = z.reshape(b, f, 2, c // 2, h, w)
|
b, f, c, h, w = z.shape
|
||||||
z = z.permute(0, 1, 2, 3, 4, 5).reshape(b, f * 2, c // 2, h, w)
|
z = z.reshape(b, f, 2, c // 2, h, w)
|
||||||
z = z.permute(0, 2, 1, 3, 4)
|
z = z.permute(0, 1, 2, 3, 4, 5).reshape(b, f * 2, c // 2, h, w)
|
||||||
z = z[:, :, 1:]
|
z = z.permute(0, 2, 1, 3, 4)
|
||||||
|
z = z[:, :, 1:]
|
||||||
|
|
||||||
x = self.conv_in(z) + z.repeat_interleave(self.block_out_channels[0] // self.z_channels, 1)
|
x = self.conv_in(z) + z.repeat_interleave(self.block_out_channels[0] // self.z_channels, 1)
|
||||||
x = self.mid.block_2(self.mid.attn_1(self.mid.block_1(x)))
|
x = self.mid.block_2(self.mid.attn_1(self.mid.block_1(x)))
|
||||||
@ -264,4 +292,10 @@ class Decoder(nn.Module):
|
|||||||
if hasattr(stage, 'upsample'):
|
if hasattr(stage, 'upsample'):
|
||||||
x = stage.upsample(x)
|
x = stage.upsample(x)
|
||||||
|
|
||||||
return self.conv_out(F.silu(self.norm_out(x)))
|
out = self.conv_out(F.silu(self.norm_out(x)))
|
||||||
|
|
||||||
|
if not self.refiner_vae:
|
||||||
|
if z.shape[-3] == 1:
|
||||||
|
out = out[:, :, -1:]
|
||||||
|
|
||||||
|
return out
|
||||||
|
|||||||
@ -903,7 +903,7 @@ class MotionEncoder_tc(nn.Module):
|
|||||||
def __init__(self,
|
def __init__(self,
|
||||||
in_dim: int,
|
in_dim: int,
|
||||||
hidden_dim: int,
|
hidden_dim: int,
|
||||||
num_heads=int,
|
num_heads: int,
|
||||||
need_global=True,
|
need_global=True,
|
||||||
dtype=None,
|
dtype=None,
|
||||||
device=None,
|
device=None,
|
||||||
|
|||||||
@ -468,55 +468,46 @@ class WanVAE(nn.Module):
|
|||||||
attn_scales, self.temperal_upsample, dropout)
|
attn_scales, self.temperal_upsample, dropout)
|
||||||
|
|
||||||
def encode(self, x):
|
def encode(self, x):
|
||||||
self.clear_cache()
|
conv_idx = [0]
|
||||||
|
feat_map = [None] * count_conv3d(self.decoder)
|
||||||
## cache
|
## cache
|
||||||
t = x.shape[2]
|
t = x.shape[2]
|
||||||
iter_ = 1 + (t - 1) // 4
|
iter_ = 1 + (t - 1) // 4
|
||||||
## 对encode输入的x,按时间拆分为1、4、4、4....
|
## 对encode输入的x,按时间拆分为1、4、4、4....
|
||||||
for i in range(iter_):
|
for i in range(iter_):
|
||||||
self._enc_conv_idx = [0]
|
conv_idx = [0]
|
||||||
if i == 0:
|
if i == 0:
|
||||||
out = self.encoder(
|
out = self.encoder(
|
||||||
x[:, :, :1, :, :],
|
x[:, :, :1, :, :],
|
||||||
feat_cache=self._enc_feat_map,
|
feat_cache=feat_map,
|
||||||
feat_idx=self._enc_conv_idx)
|
feat_idx=conv_idx)
|
||||||
else:
|
else:
|
||||||
out_ = self.encoder(
|
out_ = self.encoder(
|
||||||
x[:, :, 1 + 4 * (i - 1):1 + 4 * i, :, :],
|
x[:, :, 1 + 4 * (i - 1):1 + 4 * i, :, :],
|
||||||
feat_cache=self._enc_feat_map,
|
feat_cache=feat_map,
|
||||||
feat_idx=self._enc_conv_idx)
|
feat_idx=conv_idx)
|
||||||
out = torch.cat([out, out_], 2)
|
out = torch.cat([out, out_], 2)
|
||||||
mu, log_var = self.conv1(out).chunk(2, dim=1)
|
mu, log_var = self.conv1(out).chunk(2, dim=1)
|
||||||
self.clear_cache()
|
|
||||||
return mu
|
return mu
|
||||||
|
|
||||||
def decode(self, z):
|
def decode(self, z):
|
||||||
self.clear_cache()
|
conv_idx = [0]
|
||||||
|
feat_map = [None] * count_conv3d(self.decoder)
|
||||||
# z: [b,c,t,h,w]
|
# z: [b,c,t,h,w]
|
||||||
|
|
||||||
iter_ = z.shape[2]
|
iter_ = z.shape[2]
|
||||||
x = self.conv2(z)
|
x = self.conv2(z)
|
||||||
for i in range(iter_):
|
for i in range(iter_):
|
||||||
self._conv_idx = [0]
|
conv_idx = [0]
|
||||||
if i == 0:
|
if i == 0:
|
||||||
out = self.decoder(
|
out = self.decoder(
|
||||||
x[:, :, i:i + 1, :, :],
|
x[:, :, i:i + 1, :, :],
|
||||||
feat_cache=self._feat_map,
|
feat_cache=feat_map,
|
||||||
feat_idx=self._conv_idx)
|
feat_idx=conv_idx)
|
||||||
else:
|
else:
|
||||||
out_ = self.decoder(
|
out_ = self.decoder(
|
||||||
x[:, :, i:i + 1, :, :],
|
x[:, :, i:i + 1, :, :],
|
||||||
feat_cache=self._feat_map,
|
feat_cache=feat_map,
|
||||||
feat_idx=self._conv_idx)
|
feat_idx=conv_idx)
|
||||||
out = torch.cat([out, out_], 2)
|
out = torch.cat([out, out_], 2)
|
||||||
self.clear_cache()
|
|
||||||
return out
|
return out
|
||||||
|
|
||||||
def clear_cache(self):
|
|
||||||
self._conv_num = count_conv3d(self.decoder)
|
|
||||||
self._conv_idx = [0]
|
|
||||||
self._feat_map = [None] * self._conv_num
|
|
||||||
#cache encode
|
|
||||||
self._enc_conv_num = count_conv3d(self.encoder)
|
|
||||||
self._enc_conv_idx = [0]
|
|
||||||
self._enc_feat_map = [None] * self._enc_conv_num
|
|
||||||
|
|||||||
86
comfy/sd.py
86
comfy/sd.py
@ -332,35 +332,51 @@ class VAE:
|
|||||||
self.first_stage_model = StageC_coder()
|
self.first_stage_model = StageC_coder()
|
||||||
self.downscale_ratio = 32
|
self.downscale_ratio = 32
|
||||||
self.latent_channels = 16
|
self.latent_channels = 16
|
||||||
elif "decoder.conv_in.weight" in sd and sd['decoder.conv_in.weight'].shape[1] == 64:
|
|
||||||
ddconfig = {"block_out_channels": [128, 256, 512, 512, 1024, 1024], "in_channels": 3, "out_channels": 3, "num_res_blocks": 2, "ffactor_spatial": 32, "downsample_match_channel": True, "upsample_match_channel": True}
|
|
||||||
self.latent_channels = ddconfig['z_channels'] = sd["decoder.conv_in.weight"].shape[1]
|
|
||||||
self.downscale_ratio = 32
|
|
||||||
self.upscale_ratio = 32
|
|
||||||
self.working_dtypes = [torch.float16, torch.bfloat16, torch.float32]
|
|
||||||
self.first_stage_model = AutoencodingEngine(regularizer_config={'target': "comfy.ldm.models.autoencoder.DiagonalGaussianRegularizer"},
|
|
||||||
encoder_config={'target': "comfy.ldm.hunyuan_video.vae.Encoder", 'params': ddconfig},
|
|
||||||
decoder_config={'target': "comfy.ldm.hunyuan_video.vae.Decoder", 'params': ddconfig})
|
|
||||||
|
|
||||||
self.memory_used_encode = lambda shape, dtype: (700 * shape[2] * shape[3]) * model_management.dtype_size(dtype)
|
|
||||||
self.memory_used_decode = lambda shape, dtype: (700 * shape[2] * shape[3] * 32 * 32) * model_management.dtype_size(dtype)
|
|
||||||
|
|
||||||
elif "decoder.conv_in.weight" in sd:
|
elif "decoder.conv_in.weight" in sd:
|
||||||
#default SD1.x/SD2.x VAE parameters
|
if sd['decoder.conv_in.weight'].shape[1] == 64:
|
||||||
ddconfig = {'double_z': True, 'z_channels': 4, 'resolution': 256, 'in_channels': 3, 'out_ch': 3, 'ch': 128, 'ch_mult': [1, 2, 4, 4], 'num_res_blocks': 2, 'attn_resolutions': [], 'dropout': 0.0}
|
ddconfig = {"block_out_channels": [128, 256, 512, 512, 1024, 1024], "in_channels": 3, "out_channels": 3, "num_res_blocks": 2, "ffactor_spatial": 32, "downsample_match_channel": True, "upsample_match_channel": True}
|
||||||
|
self.latent_channels = ddconfig['z_channels'] = sd["decoder.conv_in.weight"].shape[1]
|
||||||
if 'encoder.down.2.downsample.conv.weight' not in sd and 'decoder.up.3.upsample.conv.weight' not in sd: #Stable diffusion x4 upscaler VAE
|
self.downscale_ratio = 32
|
||||||
ddconfig['ch_mult'] = [1, 2, 4]
|
self.upscale_ratio = 32
|
||||||
self.downscale_ratio = 4
|
self.working_dtypes = [torch.float16, torch.bfloat16, torch.float32]
|
||||||
self.upscale_ratio = 4
|
|
||||||
|
|
||||||
self.latent_channels = ddconfig['z_channels'] = sd["decoder.conv_in.weight"].shape[1]
|
|
||||||
if 'post_quant_conv.weight' in sd:
|
|
||||||
self.first_stage_model = AutoencoderKL(ddconfig=ddconfig, embed_dim=sd['post_quant_conv.weight'].shape[1])
|
|
||||||
else:
|
|
||||||
self.first_stage_model = AutoencodingEngine(regularizer_config={'target': "comfy.ldm.models.autoencoder.DiagonalGaussianRegularizer"},
|
self.first_stage_model = AutoencodingEngine(regularizer_config={'target': "comfy.ldm.models.autoencoder.DiagonalGaussianRegularizer"},
|
||||||
encoder_config={'target': "comfy.ldm.modules.diffusionmodules.model.Encoder", 'params': ddconfig},
|
encoder_config={'target': "comfy.ldm.hunyuan_video.vae.Encoder", 'params': ddconfig},
|
||||||
decoder_config={'target': "comfy.ldm.modules.diffusionmodules.model.Decoder", 'params': ddconfig})
|
decoder_config={'target': "comfy.ldm.hunyuan_video.vae.Decoder", 'params': ddconfig})
|
||||||
|
|
||||||
|
self.memory_used_encode = lambda shape, dtype: (700 * shape[2] * shape[3]) * model_management.dtype_size(dtype)
|
||||||
|
self.memory_used_decode = lambda shape, dtype: (700 * shape[2] * shape[3] * 32 * 32) * model_management.dtype_size(dtype)
|
||||||
|
elif sd['decoder.conv_in.weight'].shape[1] == 32:
|
||||||
|
ddconfig = {"block_out_channels": [128, 256, 512, 1024, 1024], "in_channels": 3, "out_channels": 3, "num_res_blocks": 2, "ffactor_spatial": 16, "ffactor_temporal": 4, "downsample_match_channel": True, "upsample_match_channel": True, "refiner_vae": False}
|
||||||
|
self.latent_channels = ddconfig['z_channels'] = sd["decoder.conv_in.weight"].shape[1]
|
||||||
|
self.working_dtypes = [torch.float16, torch.bfloat16, torch.float32]
|
||||||
|
self.upscale_ratio = (lambda a: max(0, a * 4 - 3), 16, 16)
|
||||||
|
self.upscale_index_formula = (4, 16, 16)
|
||||||
|
self.downscale_ratio = (lambda a: max(0, math.floor((a + 3) / 4)), 16, 16)
|
||||||
|
self.downscale_index_formula = (4, 16, 16)
|
||||||
|
self.latent_dim = 3
|
||||||
|
self.not_video = True
|
||||||
|
self.first_stage_model = AutoencodingEngine(regularizer_config={'target': "comfy.ldm.models.autoencoder.DiagonalGaussianRegularizer"},
|
||||||
|
encoder_config={'target': "comfy.ldm.hunyuan_video.vae_refiner.Encoder", 'params': ddconfig},
|
||||||
|
decoder_config={'target': "comfy.ldm.hunyuan_video.vae_refiner.Decoder", 'params': ddconfig})
|
||||||
|
|
||||||
|
self.memory_used_encode = lambda shape, dtype: (2800 * shape[-2] * shape[-1]) * model_management.dtype_size(dtype)
|
||||||
|
self.memory_used_decode = lambda shape, dtype: (2800 * shape[-3] * shape[-2] * shape[-1] * 16 * 16) * model_management.dtype_size(dtype)
|
||||||
|
else:
|
||||||
|
#default SD1.x/SD2.x VAE parameters
|
||||||
|
ddconfig = {'double_z': True, 'z_channels': 4, 'resolution': 256, 'in_channels': 3, 'out_ch': 3, 'ch': 128, 'ch_mult': [1, 2, 4, 4], 'num_res_blocks': 2, 'attn_resolutions': [], 'dropout': 0.0}
|
||||||
|
|
||||||
|
if 'encoder.down.2.downsample.conv.weight' not in sd and 'decoder.up.3.upsample.conv.weight' not in sd: #Stable diffusion x4 upscaler VAE
|
||||||
|
ddconfig['ch_mult'] = [1, 2, 4]
|
||||||
|
self.downscale_ratio = 4
|
||||||
|
self.upscale_ratio = 4
|
||||||
|
|
||||||
|
self.latent_channels = ddconfig['z_channels'] = sd["decoder.conv_in.weight"].shape[1]
|
||||||
|
if 'post_quant_conv.weight' in sd:
|
||||||
|
self.first_stage_model = AutoencoderKL(ddconfig=ddconfig, embed_dim=sd['post_quant_conv.weight'].shape[1])
|
||||||
|
else:
|
||||||
|
self.first_stage_model = AutoencodingEngine(regularizer_config={'target': "comfy.ldm.models.autoencoder.DiagonalGaussianRegularizer"},
|
||||||
|
encoder_config={'target': "comfy.ldm.modules.diffusionmodules.model.Encoder", 'params': ddconfig},
|
||||||
|
decoder_config={'target': "comfy.ldm.modules.diffusionmodules.model.Decoder", 'params': ddconfig})
|
||||||
elif "decoder.layers.1.layers.0.beta" in sd:
|
elif "decoder.layers.1.layers.0.beta" in sd:
|
||||||
self.first_stage_model = AudioOobleckVAE()
|
self.first_stage_model = AudioOobleckVAE()
|
||||||
self.memory_used_encode = lambda shape, dtype: (1000 * shape[2]) * model_management.dtype_size(dtype)
|
self.memory_used_encode = lambda shape, dtype: (1000 * shape[2]) * model_management.dtype_size(dtype)
|
||||||
@ -636,6 +652,7 @@ class VAE:
|
|||||||
def decode(self, samples_in, vae_options={}):
|
def decode(self, samples_in, vae_options={}):
|
||||||
self.throw_exception_if_invalid()
|
self.throw_exception_if_invalid()
|
||||||
pixel_samples = None
|
pixel_samples = None
|
||||||
|
do_tile = False
|
||||||
try:
|
try:
|
||||||
memory_used = self.memory_used_decode(samples_in.shape, self.vae_dtype)
|
memory_used = self.memory_used_decode(samples_in.shape, self.vae_dtype)
|
||||||
model_management.load_models_gpu([self.patcher], memory_required=memory_used, force_full_load=self.disable_offload)
|
model_management.load_models_gpu([self.patcher], memory_required=memory_used, force_full_load=self.disable_offload)
|
||||||
@ -651,6 +668,13 @@ class VAE:
|
|||||||
pixel_samples[x:x+batch_number] = out
|
pixel_samples[x:x+batch_number] = out
|
||||||
except model_management.OOM_EXCEPTION:
|
except model_management.OOM_EXCEPTION:
|
||||||
logging.warning("Warning: Ran out of memory when regular VAE decoding, retrying with tiled VAE decoding.")
|
logging.warning("Warning: Ran out of memory when regular VAE decoding, retrying with tiled VAE decoding.")
|
||||||
|
#NOTE: We don't know what tensors were allocated to stack variables at the time of the
|
||||||
|
#exception and the exception itself refs them all until we get out of this except block.
|
||||||
|
#So we just set a flag for tiler fallback so that tensor gc can happen once the
|
||||||
|
#exception is fully off the books.
|
||||||
|
do_tile = True
|
||||||
|
|
||||||
|
if do_tile:
|
||||||
dims = samples_in.ndim - 2
|
dims = samples_in.ndim - 2
|
||||||
if dims == 1 or self.extra_1d_channel is not None:
|
if dims == 1 or self.extra_1d_channel is not None:
|
||||||
pixel_samples = self.decode_tiled_1d(samples_in)
|
pixel_samples = self.decode_tiled_1d(samples_in)
|
||||||
@ -697,6 +721,7 @@ class VAE:
|
|||||||
self.throw_exception_if_invalid()
|
self.throw_exception_if_invalid()
|
||||||
pixel_samples = self.vae_encode_crop_pixels(pixel_samples)
|
pixel_samples = self.vae_encode_crop_pixels(pixel_samples)
|
||||||
pixel_samples = pixel_samples.movedim(-1, 1)
|
pixel_samples = pixel_samples.movedim(-1, 1)
|
||||||
|
do_tile = False
|
||||||
if self.latent_dim == 3 and pixel_samples.ndim < 5:
|
if self.latent_dim == 3 and pixel_samples.ndim < 5:
|
||||||
if not self.not_video:
|
if not self.not_video:
|
||||||
pixel_samples = pixel_samples.movedim(1, 0).unsqueeze(0)
|
pixel_samples = pixel_samples.movedim(1, 0).unsqueeze(0)
|
||||||
@ -718,6 +743,13 @@ class VAE:
|
|||||||
|
|
||||||
except model_management.OOM_EXCEPTION:
|
except model_management.OOM_EXCEPTION:
|
||||||
logging.warning("Warning: Ran out of memory when regular VAE encoding, retrying with tiled VAE encoding.")
|
logging.warning("Warning: Ran out of memory when regular VAE encoding, retrying with tiled VAE encoding.")
|
||||||
|
#NOTE: We don't know what tensors were allocated to stack variables at the time of the
|
||||||
|
#exception and the exception itself refs them all until we get out of this except block.
|
||||||
|
#So we just set a flag for tiler fallback so that tensor gc can happen once the
|
||||||
|
#exception is fully off the books.
|
||||||
|
do_tile = True
|
||||||
|
|
||||||
|
if do_tile:
|
||||||
if self.latent_dim == 3:
|
if self.latent_dim == 3:
|
||||||
tile = 256
|
tile = 256
|
||||||
overlap = tile // 4
|
overlap = tile // 4
|
||||||
|
|||||||
@ -1605,6 +1605,7 @@ class _IO:
|
|||||||
Model = Model
|
Model = Model
|
||||||
ClipVision = ClipVision
|
ClipVision = ClipVision
|
||||||
ClipVisionOutput = ClipVisionOutput
|
ClipVisionOutput = ClipVisionOutput
|
||||||
|
AudioEncoder = AudioEncoder
|
||||||
AudioEncoderOutput = AudioEncoderOutput
|
AudioEncoderOutput = AudioEncoderOutput
|
||||||
StyleModel = StyleModel
|
StyleModel = StyleModel
|
||||||
Gligen = Gligen
|
Gligen = Gligen
|
||||||
|
|||||||
@ -152,7 +152,7 @@ def validate_aspect_ratio(
|
|||||||
raise TypeError(
|
raise TypeError(
|
||||||
f"Aspect ratio cannot reduce to any less than {minimum_ratio_str} ({minimum_ratio}), but was {aspect_ratio} ({calculated_ratio})."
|
f"Aspect ratio cannot reduce to any less than {minimum_ratio_str} ({minimum_ratio}), but was {aspect_ratio} ({calculated_ratio})."
|
||||||
)
|
)
|
||||||
elif calculated_ratio > maximum_ratio:
|
if calculated_ratio > maximum_ratio:
|
||||||
raise TypeError(
|
raise TypeError(
|
||||||
f"Aspect ratio cannot reduce to any greater than {maximum_ratio_str} ({maximum_ratio}), but was {aspect_ratio} ({calculated_ratio})."
|
f"Aspect ratio cannot reduce to any greater than {maximum_ratio_str} ({maximum_ratio}), but was {aspect_ratio} ({calculated_ratio})."
|
||||||
)
|
)
|
||||||
|
|||||||
3
comfy_api_nodes/apis/__init__.py
generated
3
comfy_api_nodes/apis/__init__.py
generated
@ -2,6 +2,7 @@
|
|||||||
# filename: filtered-openapi.yaml
|
# filename: filtered-openapi.yaml
|
||||||
# timestamp: 2025-07-30T08:54:00+00:00
|
# timestamp: 2025-07-30T08:54:00+00:00
|
||||||
|
|
||||||
|
# pylint: disable
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
from datetime import date, datetime
|
from datetime import date, datetime
|
||||||
@ -1320,6 +1321,7 @@ class KlingTextToVideoModelName(str, Enum):
|
|||||||
kling_v1 = 'kling-v1'
|
kling_v1 = 'kling-v1'
|
||||||
kling_v1_6 = 'kling-v1-6'
|
kling_v1_6 = 'kling-v1-6'
|
||||||
kling_v2_1_master = 'kling-v2-1-master'
|
kling_v2_1_master = 'kling-v2-1-master'
|
||||||
|
kling_v2_5_turbo = 'kling-v2-5-turbo'
|
||||||
|
|
||||||
|
|
||||||
class KlingVideoGenAspectRatio(str, Enum):
|
class KlingVideoGenAspectRatio(str, Enum):
|
||||||
@ -1354,6 +1356,7 @@ class KlingVideoGenModelName(str, Enum):
|
|||||||
kling_v2_master = 'kling-v2-master'
|
kling_v2_master = 'kling-v2-master'
|
||||||
kling_v2_1 = 'kling-v2-1'
|
kling_v2_1 = 'kling-v2-1'
|
||||||
kling_v2_1_master = 'kling-v2-1-master'
|
kling_v2_1_master = 'kling-v2-1-master'
|
||||||
|
kling_v2_5_turbo = 'kling-v2-5-turbo'
|
||||||
|
|
||||||
|
|
||||||
class KlingVideoResult(BaseModel):
|
class KlingVideoResult(BaseModel):
|
||||||
|
|||||||
@ -95,6 +95,7 @@ import aiohttp
|
|||||||
import asyncio
|
import asyncio
|
||||||
import logging
|
import logging
|
||||||
import io
|
import io
|
||||||
|
import os
|
||||||
import socket
|
import socket
|
||||||
from aiohttp.client_exceptions import ClientError, ClientResponseError
|
from aiohttp.client_exceptions import ClientError, ClientResponseError
|
||||||
from typing import Dict, Type, Optional, Any, TypeVar, Generic, Callable, Tuple
|
from typing import Dict, Type, Optional, Any, TypeVar, Generic, Callable, Tuple
|
||||||
@ -499,7 +500,9 @@ class ApiClient:
|
|||||||
else:
|
else:
|
||||||
raise ValueError("File must be BytesIO or str path")
|
raise ValueError("File must be BytesIO or str path")
|
||||||
|
|
||||||
operation_id = f"upload_{upload_url.split('/')[-1]}_{uuid.uuid4().hex[:8]}"
|
parsed = urlparse(upload_url)
|
||||||
|
basename = os.path.basename(parsed.path) or parsed.netloc or "upload"
|
||||||
|
operation_id = f"upload_{basename}_{uuid.uuid4().hex[:8]}"
|
||||||
request_logger.log_request_response(
|
request_logger.log_request_response(
|
||||||
operation_id=operation_id,
|
operation_id=operation_id,
|
||||||
request_method="PUT",
|
request_method="PUT",
|
||||||
@ -532,7 +535,7 @@ class ApiClient:
|
|||||||
request_method="PUT",
|
request_method="PUT",
|
||||||
request_url=upload_url,
|
request_url=upload_url,
|
||||||
response_status_code=e.status if hasattr(e, "status") else None,
|
response_status_code=e.status if hasattr(e, "status") else None,
|
||||||
response_headers=dict(e.headers) if getattr(e, "headers") else None,
|
response_headers=dict(e.headers) if hasattr(e, "headers") else None,
|
||||||
response_content=None,
|
response_content=None,
|
||||||
error_message=f"{type(e).__name__}: {str(e)}",
|
error_message=f"{type(e).__name__}: {str(e)}",
|
||||||
)
|
)
|
||||||
|
|||||||
@ -4,16 +4,18 @@ import os
|
|||||||
import datetime
|
import datetime
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
|
import re
|
||||||
|
import hashlib
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
import folder_paths
|
import folder_paths
|
||||||
|
|
||||||
# Get the logger instance
|
# Get the logger instance
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def get_log_directory():
|
def get_log_directory():
|
||||||
"""
|
"""Ensures the API log directory exists within ComfyUI's temp directory and returns its path."""
|
||||||
Ensures the API log directory exists within ComfyUI's temp directory
|
|
||||||
and returns its path.
|
|
||||||
"""
|
|
||||||
base_temp_dir = folder_paths.get_temp_directory()
|
base_temp_dir = folder_paths.get_temp_directory()
|
||||||
log_dir = os.path.join(base_temp_dir, "api_logs")
|
log_dir = os.path.join(base_temp_dir, "api_logs")
|
||||||
try:
|
try:
|
||||||
@ -24,42 +26,77 @@ def get_log_directory():
|
|||||||
return base_temp_dir
|
return base_temp_dir
|
||||||
return log_dir
|
return log_dir
|
||||||
|
|
||||||
def _format_data_for_logging(data):
|
|
||||||
|
def _sanitize_filename_component(name: str) -> str:
|
||||||
|
if not name:
|
||||||
|
return "log"
|
||||||
|
sanitized = re.sub(r"[^A-Za-z0-9._-]+", "_", name) # Replace disallowed characters with underscore
|
||||||
|
sanitized = sanitized.strip(" ._") # Windows: trailing dots or spaces are not allowed
|
||||||
|
if not sanitized:
|
||||||
|
sanitized = "log"
|
||||||
|
return sanitized
|
||||||
|
|
||||||
|
|
||||||
|
def _short_hash(*parts: str, length: int = 10) -> str:
|
||||||
|
return hashlib.sha1(("|".join(parts)).encode("utf-8")).hexdigest()[:length]
|
||||||
|
|
||||||
|
|
||||||
|
def _build_log_filepath(log_dir: str, operation_id: str, request_url: str) -> str:
|
||||||
|
"""Build log filepath. We keep it well under common path length limits aiming for <= 240 characters total."""
|
||||||
|
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S_%f")
|
||||||
|
slug = _sanitize_filename_component(operation_id) # Best-effort human-readable slug from operation_id
|
||||||
|
h = _short_hash(operation_id or "", request_url or "") # Short hash ties log to the full operation and URL
|
||||||
|
|
||||||
|
# Compute how much room we have for the slug given the directory length
|
||||||
|
# Keep total path length reasonably below ~260 on Windows.
|
||||||
|
max_total_path = 240
|
||||||
|
prefix = f"{timestamp}_"
|
||||||
|
suffix = f"_{h}.log"
|
||||||
|
if not slug:
|
||||||
|
slug = "op"
|
||||||
|
max_filename_len = max(60, max_total_path - len(log_dir) - 1)
|
||||||
|
max_slug_len = max(8, max_filename_len - len(prefix) - len(suffix))
|
||||||
|
if len(slug) > max_slug_len:
|
||||||
|
slug = slug[:max_slug_len].rstrip(" ._-")
|
||||||
|
return os.path.join(log_dir, f"{prefix}{slug}{suffix}")
|
||||||
|
|
||||||
|
|
||||||
|
def _format_data_for_logging(data: Any) -> str:
|
||||||
"""Helper to format data (dict, str, bytes) for logging."""
|
"""Helper to format data (dict, str, bytes) for logging."""
|
||||||
if isinstance(data, bytes):
|
if isinstance(data, bytes):
|
||||||
try:
|
try:
|
||||||
return data.decode('utf-8') # Try to decode as text
|
return data.decode("utf-8") # Try to decode as text
|
||||||
except UnicodeDecodeError:
|
except UnicodeDecodeError:
|
||||||
return f"[Binary data of length {len(data)} bytes]"
|
return f"[Binary data of length {len(data)} bytes]"
|
||||||
elif isinstance(data, (dict, list)):
|
elif isinstance(data, (dict, list)):
|
||||||
try:
|
try:
|
||||||
return json.dumps(data, indent=2, ensure_ascii=False)
|
return json.dumps(data, indent=2, ensure_ascii=False)
|
||||||
except TypeError:
|
except TypeError:
|
||||||
return str(data) # Fallback for non-serializable objects
|
return str(data) # Fallback for non-serializable objects
|
||||||
return str(data)
|
return str(data)
|
||||||
|
|
||||||
|
|
||||||
def log_request_response(
|
def log_request_response(
|
||||||
operation_id: str,
|
operation_id: str,
|
||||||
request_method: str,
|
request_method: str,
|
||||||
request_url: str,
|
request_url: str,
|
||||||
request_headers: dict | None = None,
|
request_headers: dict | None = None,
|
||||||
request_params: dict | None = None,
|
request_params: dict | None = None,
|
||||||
request_data: any = None,
|
request_data: Any = None,
|
||||||
response_status_code: int | None = None,
|
response_status_code: int | None = None,
|
||||||
response_headers: dict | None = None,
|
response_headers: dict | None = None,
|
||||||
response_content: any = None,
|
response_content: Any = None,
|
||||||
error_message: str | None = None
|
error_message: str | None = None,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Logs API request and response details to a file in the temp/api_logs directory.
|
Logs API request and response details to a file in the temp/api_logs directory.
|
||||||
|
Filenames are sanitized and length-limited for cross-platform safety.
|
||||||
|
If we still fail to write, we fall back to appending into api.log.
|
||||||
"""
|
"""
|
||||||
log_dir = get_log_directory()
|
log_dir = get_log_directory()
|
||||||
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S_%f")
|
filepath = _build_log_filepath(log_dir, operation_id, request_url)
|
||||||
filename = f"{timestamp}_{operation_id.replace('/', '_').replace(':', '_')}.log"
|
|
||||||
filepath = os.path.join(log_dir, filename)
|
|
||||||
|
|
||||||
log_content = []
|
|
||||||
|
|
||||||
|
log_content: list[str] = []
|
||||||
log_content.append(f"Timestamp: {datetime.datetime.now().isoformat()}")
|
log_content.append(f"Timestamp: {datetime.datetime.now().isoformat()}")
|
||||||
log_content.append(f"Operation ID: {operation_id}")
|
log_content.append(f"Operation ID: {operation_id}")
|
||||||
log_content.append("-" * 30 + " REQUEST " + "-" * 30)
|
log_content.append("-" * 30 + " REQUEST " + "-" * 30)
|
||||||
@ -69,7 +106,7 @@ def log_request_response(
|
|||||||
log_content.append(f"Headers:\n{_format_data_for_logging(request_headers)}")
|
log_content.append(f"Headers:\n{_format_data_for_logging(request_headers)}")
|
||||||
if request_params:
|
if request_params:
|
||||||
log_content.append(f"Params:\n{_format_data_for_logging(request_params)}")
|
log_content.append(f"Params:\n{_format_data_for_logging(request_params)}")
|
||||||
if request_data:
|
if request_data is not None:
|
||||||
log_content.append(f"Data/Body:\n{_format_data_for_logging(request_data)}")
|
log_content.append(f"Data/Body:\n{_format_data_for_logging(request_data)}")
|
||||||
|
|
||||||
log_content.append("\n" + "-" * 30 + " RESPONSE " + "-" * 30)
|
log_content.append("\n" + "-" * 30 + " RESPONSE " + "-" * 30)
|
||||||
@ -77,7 +114,7 @@ def log_request_response(
|
|||||||
log_content.append(f"Status Code: {response_status_code}")
|
log_content.append(f"Status Code: {response_status_code}")
|
||||||
if response_headers:
|
if response_headers:
|
||||||
log_content.append(f"Headers:\n{_format_data_for_logging(response_headers)}")
|
log_content.append(f"Headers:\n{_format_data_for_logging(response_headers)}")
|
||||||
if response_content:
|
if response_content is not None:
|
||||||
log_content.append(f"Content:\n{_format_data_for_logging(response_content)}")
|
log_content.append(f"Content:\n{_format_data_for_logging(response_content)}")
|
||||||
if error_message:
|
if error_message:
|
||||||
log_content.append(f"Error:\n{error_message}")
|
log_content.append(f"Error:\n{error_message}")
|
||||||
@ -89,6 +126,7 @@ def log_request_response(
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error writing API log to {filepath}: {e}")
|
logger.error(f"Error writing API log to {filepath}: {e}")
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
# Example usage (for testing the logger directly)
|
# Example usage (for testing the logger directly)
|
||||||
logger.setLevel(logging.DEBUG)
|
logger.setLevel(logging.DEBUG)
|
||||||
|
|||||||
@ -52,7 +52,3 @@ class RodinResourceItem(BaseModel):
|
|||||||
|
|
||||||
class Rodin3DDownloadResponse(BaseModel):
|
class Rodin3DDownloadResponse(BaseModel):
|
||||||
list: List[RodinResourceItem] = Field(..., description="Source List")
|
list: List[RodinResourceItem] = Field(..., description="Source List")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -39,6 +39,7 @@ from comfy_api_nodes.apinode_utils import (
|
|||||||
tensor_to_base64_string,
|
tensor_to_base64_string,
|
||||||
bytesio_to_image_tensor,
|
bytesio_to_image_tensor,
|
||||||
)
|
)
|
||||||
|
from comfy_api.util import VideoContainer, VideoCodec
|
||||||
|
|
||||||
|
|
||||||
GEMINI_BASE_ENDPOINT = "/proxy/vertexai/gemini"
|
GEMINI_BASE_ENDPOINT = "/proxy/vertexai/gemini"
|
||||||
@ -310,7 +311,7 @@ class GeminiNode(ComfyNodeABC):
|
|||||||
Returns:
|
Returns:
|
||||||
List of GeminiPart objects containing the encoded video.
|
List of GeminiPart objects containing the encoded video.
|
||||||
"""
|
"""
|
||||||
from comfy_api.util import VideoContainer, VideoCodec
|
|
||||||
base_64_string = video_to_base64_string(
|
base_64_string = video_to_base64_string(
|
||||||
video_input,
|
video_input,
|
||||||
container_format=VideoContainer.MP4,
|
container_format=VideoContainer.MP4,
|
||||||
@ -490,7 +491,6 @@ class GeminiInputFiles(ComfyNodeABC):
|
|||||||
# Use base64 string directly, not the data URI
|
# Use base64 string directly, not the data URI
|
||||||
with open(file_path, "rb") as f:
|
with open(file_path, "rb") as f:
|
||||||
file_content = f.read()
|
file_content = f.read()
|
||||||
import base64
|
|
||||||
base64_str = base64.b64encode(file_content).decode("utf-8")
|
base64_str = base64.b64encode(file_content).decode("utf-8")
|
||||||
|
|
||||||
return GeminiPart(
|
return GeminiPart(
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@ -2,11 +2,7 @@ import logging
|
|||||||
from typing import Any, Callable, Optional, TypeVar
|
from typing import Any, Callable, Optional, TypeVar
|
||||||
import torch
|
import torch
|
||||||
from typing_extensions import override
|
from typing_extensions import override
|
||||||
from comfy_api_nodes.util.validation_utils import (
|
from comfy_api_nodes.util.validation_utils import validate_image_dimensions
|
||||||
get_image_dimensions,
|
|
||||||
validate_image_dimensions,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
from comfy_api_nodes.apis import (
|
from comfy_api_nodes.apis import (
|
||||||
MoonvalleyTextToVideoRequest,
|
MoonvalleyTextToVideoRequest,
|
||||||
@ -132,47 +128,6 @@ def validate_prompts(
|
|||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
def validate_input_media(width, height, with_frame_conditioning, num_frames_in=None):
|
|
||||||
# inference validation
|
|
||||||
# T = num_frames
|
|
||||||
# in all cases, the following must be true: T divisible by 16 and H,W by 8. in addition...
|
|
||||||
# with image conditioning: H*W must be divisible by 8192
|
|
||||||
# without image conditioning: T divisible by 32
|
|
||||||
if num_frames_in and not num_frames_in % 16 == 0:
|
|
||||||
return False, ("The input video total frame count must be divisible by 16!")
|
|
||||||
|
|
||||||
if height % 8 != 0 or width % 8 != 0:
|
|
||||||
return False, (
|
|
||||||
f"Height ({height}) and width ({width}) must be " "divisible by 8"
|
|
||||||
)
|
|
||||||
|
|
||||||
if with_frame_conditioning:
|
|
||||||
if (height * width) % 8192 != 0:
|
|
||||||
return False, (
|
|
||||||
f"Height * width ({height * width}) must be "
|
|
||||||
"divisible by 8192 for frame conditioning"
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
if num_frames_in and not num_frames_in % 32 == 0:
|
|
||||||
return False, ("The input video total frame count must be divisible by 32!")
|
|
||||||
|
|
||||||
|
|
||||||
def validate_input_image(
|
|
||||||
image: torch.Tensor, with_frame_conditioning: bool = False
|
|
||||||
) -> None:
|
|
||||||
"""
|
|
||||||
Validates the input image adheres to the expectations of the API:
|
|
||||||
- The image resolution should not be less than 300*300px
|
|
||||||
- The aspect ratio of the image should be between 1:2.5 ~ 2.5:1
|
|
||||||
|
|
||||||
"""
|
|
||||||
height, width = get_image_dimensions(image)
|
|
||||||
validate_input_media(width, height, with_frame_conditioning)
|
|
||||||
validate_image_dimensions(
|
|
||||||
image, min_width=300, min_height=300, max_height=MAX_HEIGHT, max_width=MAX_WIDTH
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def validate_video_to_video_input(video: VideoInput) -> VideoInput:
|
def validate_video_to_video_input(video: VideoInput) -> VideoInput:
|
||||||
"""
|
"""
|
||||||
Validates and processes video input for Moonvalley Video-to-Video generation.
|
Validates and processes video input for Moonvalley Video-to-Video generation.
|
||||||
@ -499,7 +454,7 @@ class MoonvalleyImg2VideoNode(comfy_io.ComfyNode):
|
|||||||
seed: int,
|
seed: int,
|
||||||
steps: int,
|
steps: int,
|
||||||
) -> comfy_io.NodeOutput:
|
) -> comfy_io.NodeOutput:
|
||||||
validate_input_image(image, True)
|
validate_image_dimensions(image, min_width=300, min_height=300, max_height=MAX_HEIGHT, max_width=MAX_WIDTH)
|
||||||
validate_prompts(prompt, negative_prompt, MOONVALLEY_MAREY_MAX_PROMPT_LENGTH)
|
validate_prompts(prompt, negative_prompt, MOONVALLEY_MAREY_MAX_PROMPT_LENGTH)
|
||||||
width_height = parse_width_height_from_res(resolution)
|
width_height = parse_width_height_from_res(resolution)
|
||||||
|
|
||||||
@ -518,7 +473,7 @@ class MoonvalleyImg2VideoNode(comfy_io.ComfyNode):
|
|||||||
height=width_height["height"],
|
height=width_height["height"],
|
||||||
use_negative_prompts=True,
|
use_negative_prompts=True,
|
||||||
)
|
)
|
||||||
"""Upload image to comfy backend to have a URL available for further processing"""
|
|
||||||
# Get MIME type from tensor - assuming PNG format for image tensors
|
# Get MIME type from tensor - assuming PNG format for image tensors
|
||||||
mime_type = "image/png"
|
mime_type = "image/png"
|
||||||
|
|
||||||
@ -636,7 +591,6 @@ class MoonvalleyVideo2VideoNode(comfy_io.ComfyNode):
|
|||||||
validated_video = validate_video_to_video_input(video)
|
validated_video = validate_video_to_video_input(video)
|
||||||
video_url = await upload_video_to_comfyapi(validated_video, auth_kwargs=auth)
|
video_url = await upload_video_to_comfyapi(validated_video, auth_kwargs=auth)
|
||||||
|
|
||||||
"""Validate prompts and inference input"""
|
|
||||||
validate_prompts(prompt, negative_prompt)
|
validate_prompts(prompt, negative_prompt)
|
||||||
|
|
||||||
# Only include motion_intensity for Motion Transfer
|
# Only include motion_intensity for Motion Transfer
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@ -1,5 +1,7 @@
|
|||||||
from inspect import cleandoc
|
from inspect import cleandoc
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
from typing_extensions import override
|
||||||
|
from io import BytesIO
|
||||||
from comfy_api_nodes.apis.pixverse_api import (
|
from comfy_api_nodes.apis.pixverse_api import (
|
||||||
PixverseTextVideoRequest,
|
PixverseTextVideoRequest,
|
||||||
PixverseImageVideoRequest,
|
PixverseImageVideoRequest,
|
||||||
@ -26,12 +28,11 @@ from comfy_api_nodes.apinode_utils import (
|
|||||||
tensor_to_bytesio,
|
tensor_to_bytesio,
|
||||||
validate_string,
|
validate_string,
|
||||||
)
|
)
|
||||||
from comfy.comfy_types.node_typing import IO, ComfyNodeABC
|
|
||||||
from comfy_api.input_impl import VideoFromFile
|
from comfy_api.input_impl import VideoFromFile
|
||||||
|
from comfy_api.latest import ComfyExtension, io as comfy_io
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
import aiohttp
|
import aiohttp
|
||||||
from io import BytesIO
|
|
||||||
|
|
||||||
|
|
||||||
AVERAGE_DURATION_T2V = 32
|
AVERAGE_DURATION_T2V = 32
|
||||||
@ -72,100 +73,101 @@ async def upload_image_to_pixverse(image: torch.Tensor, auth_kwargs=None):
|
|||||||
return response_upload.Resp.img_id
|
return response_upload.Resp.img_id
|
||||||
|
|
||||||
|
|
||||||
class PixverseTemplateNode:
|
class PixverseTemplateNode(comfy_io.ComfyNode):
|
||||||
"""
|
"""
|
||||||
Select template for PixVerse Video generation.
|
Select template for PixVerse Video generation.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
RETURN_TYPES = (PixverseIO.TEMPLATE,)
|
@classmethod
|
||||||
RETURN_NAMES = ("pixverse_template",)
|
def define_schema(cls) -> comfy_io.Schema:
|
||||||
FUNCTION = "create_template"
|
return comfy_io.Schema(
|
||||||
CATEGORY = "api node/video/PixVerse"
|
node_id="PixverseTemplateNode",
|
||||||
|
display_name="PixVerse Template",
|
||||||
|
category="api node/video/PixVerse",
|
||||||
|
inputs=[
|
||||||
|
comfy_io.Combo.Input("template", options=[list(pixverse_templates.keys())]),
|
||||||
|
],
|
||||||
|
outputs=[comfy_io.Custom(PixverseIO.TEMPLATE).Output(display_name="pixverse_template")],
|
||||||
|
)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def INPUT_TYPES(s):
|
def execute(cls, template: str) -> comfy_io.NodeOutput:
|
||||||
return {
|
|
||||||
"required": {
|
|
||||||
"template": (list(pixverse_templates.keys()),),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
def create_template(self, template: str):
|
|
||||||
template_id = pixverse_templates.get(template, None)
|
template_id = pixverse_templates.get(template, None)
|
||||||
if template_id is None:
|
if template_id is None:
|
||||||
raise Exception(f"Template '{template}' is not recognized.")
|
raise Exception(f"Template '{template}' is not recognized.")
|
||||||
# just return the integer
|
# just return the integer
|
||||||
return (template_id,)
|
return comfy_io.NodeOutput(template_id)
|
||||||
|
|
||||||
|
|
||||||
class PixverseTextToVideoNode(ComfyNodeABC):
|
class PixverseTextToVideoNode(comfy_io.ComfyNode):
|
||||||
"""
|
"""
|
||||||
Generates videos based on prompt and output_size.
|
Generates videos based on prompt and output_size.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
RETURN_TYPES = (IO.VIDEO,)
|
@classmethod
|
||||||
DESCRIPTION = cleandoc(__doc__ or "") # Handle potential None value
|
def define_schema(cls) -> comfy_io.Schema:
|
||||||
FUNCTION = "api_call"
|
return comfy_io.Schema(
|
||||||
API_NODE = True
|
node_id="PixverseTextToVideoNode",
|
||||||
CATEGORY = "api node/video/PixVerse"
|
display_name="PixVerse Text to Video",
|
||||||
|
category="api node/video/PixVerse",
|
||||||
|
description=cleandoc(cls.__doc__ or ""),
|
||||||
|
inputs=[
|
||||||
|
comfy_io.String.Input(
|
||||||
|
"prompt",
|
||||||
|
multiline=True,
|
||||||
|
default="",
|
||||||
|
tooltip="Prompt for the video generation",
|
||||||
|
),
|
||||||
|
comfy_io.Combo.Input(
|
||||||
|
"aspect_ratio",
|
||||||
|
options=[ratio.value for ratio in PixverseAspectRatio],
|
||||||
|
),
|
||||||
|
comfy_io.Combo.Input(
|
||||||
|
"quality",
|
||||||
|
options=[resolution.value for resolution in PixverseQuality],
|
||||||
|
default=PixverseQuality.res_540p,
|
||||||
|
),
|
||||||
|
comfy_io.Combo.Input(
|
||||||
|
"duration_seconds",
|
||||||
|
options=[dur.value for dur in PixverseDuration],
|
||||||
|
),
|
||||||
|
comfy_io.Combo.Input(
|
||||||
|
"motion_mode",
|
||||||
|
options=[mode.value for mode in PixverseMotionMode],
|
||||||
|
),
|
||||||
|
comfy_io.Int.Input(
|
||||||
|
"seed",
|
||||||
|
default=0,
|
||||||
|
min=0,
|
||||||
|
max=2147483647,
|
||||||
|
control_after_generate=True,
|
||||||
|
tooltip="Seed for video generation.",
|
||||||
|
),
|
||||||
|
comfy_io.String.Input(
|
||||||
|
"negative_prompt",
|
||||||
|
default="",
|
||||||
|
multiline=True,
|
||||||
|
tooltip="An optional text description of undesired elements on an image.",
|
||||||
|
optional=True,
|
||||||
|
),
|
||||||
|
comfy_io.Custom(PixverseIO.TEMPLATE).Input(
|
||||||
|
"pixverse_template",
|
||||||
|
tooltip="An optional template to influence style of generation, created by the PixVerse Template node.",
|
||||||
|
optional=True,
|
||||||
|
),
|
||||||
|
],
|
||||||
|
outputs=[comfy_io.Video.Output()],
|
||||||
|
hidden=[
|
||||||
|
comfy_io.Hidden.auth_token_comfy_org,
|
||||||
|
comfy_io.Hidden.api_key_comfy_org,
|
||||||
|
comfy_io.Hidden.unique_id,
|
||||||
|
],
|
||||||
|
is_api_node=True,
|
||||||
|
)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def INPUT_TYPES(s):
|
async def execute(
|
||||||
return {
|
cls,
|
||||||
"required": {
|
|
||||||
"prompt": (
|
|
||||||
IO.STRING,
|
|
||||||
{
|
|
||||||
"multiline": True,
|
|
||||||
"default": "",
|
|
||||||
"tooltip": "Prompt for the video generation",
|
|
||||||
},
|
|
||||||
),
|
|
||||||
"aspect_ratio": ([ratio.value for ratio in PixverseAspectRatio],),
|
|
||||||
"quality": (
|
|
||||||
[resolution.value for resolution in PixverseQuality],
|
|
||||||
{
|
|
||||||
"default": PixverseQuality.res_540p,
|
|
||||||
},
|
|
||||||
),
|
|
||||||
"duration_seconds": ([dur.value for dur in PixverseDuration],),
|
|
||||||
"motion_mode": ([mode.value for mode in PixverseMotionMode],),
|
|
||||||
"seed": (
|
|
||||||
IO.INT,
|
|
||||||
{
|
|
||||||
"default": 0,
|
|
||||||
"min": 0,
|
|
||||||
"max": 2147483647,
|
|
||||||
"control_after_generate": True,
|
|
||||||
"tooltip": "Seed for video generation.",
|
|
||||||
},
|
|
||||||
),
|
|
||||||
},
|
|
||||||
"optional": {
|
|
||||||
"negative_prompt": (
|
|
||||||
IO.STRING,
|
|
||||||
{
|
|
||||||
"default": "",
|
|
||||||
"forceInput": True,
|
|
||||||
"tooltip": "An optional text description of undesired elements on an image.",
|
|
||||||
},
|
|
||||||
),
|
|
||||||
"pixverse_template": (
|
|
||||||
PixverseIO.TEMPLATE,
|
|
||||||
{
|
|
||||||
"tooltip": "An optional template to influence style of generation, created by the PixVerse Template node."
|
|
||||||
},
|
|
||||||
),
|
|
||||||
},
|
|
||||||
"hidden": {
|
|
||||||
"auth_token": "AUTH_TOKEN_COMFY_ORG",
|
|
||||||
"comfy_api_key": "API_KEY_COMFY_ORG",
|
|
||||||
"unique_id": "UNIQUE_ID",
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
async def api_call(
|
|
||||||
self,
|
|
||||||
prompt: str,
|
prompt: str,
|
||||||
aspect_ratio: str,
|
aspect_ratio: str,
|
||||||
quality: str,
|
quality: str,
|
||||||
@ -174,9 +176,7 @@ class PixverseTextToVideoNode(ComfyNodeABC):
|
|||||||
seed,
|
seed,
|
||||||
negative_prompt: str = None,
|
negative_prompt: str = None,
|
||||||
pixverse_template: int = None,
|
pixverse_template: int = None,
|
||||||
unique_id: Optional[str] = None,
|
) -> comfy_io.NodeOutput:
|
||||||
**kwargs,
|
|
||||||
):
|
|
||||||
validate_string(prompt, strip_whitespace=False)
|
validate_string(prompt, strip_whitespace=False)
|
||||||
# 1080p is limited to 5 seconds duration
|
# 1080p is limited to 5 seconds duration
|
||||||
# only normal motion_mode supported for 1080p or for non-5 second duration
|
# only normal motion_mode supported for 1080p or for non-5 second duration
|
||||||
@ -186,6 +186,10 @@ class PixverseTextToVideoNode(ComfyNodeABC):
|
|||||||
elif duration_seconds != PixverseDuration.dur_5:
|
elif duration_seconds != PixverseDuration.dur_5:
|
||||||
motion_mode = PixverseMotionMode.normal
|
motion_mode = PixverseMotionMode.normal
|
||||||
|
|
||||||
|
auth = {
|
||||||
|
"auth_token": cls.hidden.auth_token_comfy_org,
|
||||||
|
"comfy_api_key": cls.hidden.api_key_comfy_org,
|
||||||
|
}
|
||||||
operation = SynchronousOperation(
|
operation = SynchronousOperation(
|
||||||
endpoint=ApiEndpoint(
|
endpoint=ApiEndpoint(
|
||||||
path="/proxy/pixverse/video/text/generate",
|
path="/proxy/pixverse/video/text/generate",
|
||||||
@ -203,7 +207,7 @@ class PixverseTextToVideoNode(ComfyNodeABC):
|
|||||||
template_id=pixverse_template,
|
template_id=pixverse_template,
|
||||||
seed=seed,
|
seed=seed,
|
||||||
),
|
),
|
||||||
auth_kwargs=kwargs,
|
auth_kwargs=auth,
|
||||||
)
|
)
|
||||||
response_api = await operation.execute()
|
response_api = await operation.execute()
|
||||||
|
|
||||||
@ -224,8 +228,8 @@ class PixverseTextToVideoNode(ComfyNodeABC):
|
|||||||
PixverseStatus.deleted,
|
PixverseStatus.deleted,
|
||||||
],
|
],
|
||||||
status_extractor=lambda x: x.Resp.status,
|
status_extractor=lambda x: x.Resp.status,
|
||||||
auth_kwargs=kwargs,
|
auth_kwargs=auth,
|
||||||
node_id=unique_id,
|
node_id=cls.hidden.unique_id,
|
||||||
result_url_extractor=get_video_url_from_response,
|
result_url_extractor=get_video_url_from_response,
|
||||||
estimated_duration=AVERAGE_DURATION_T2V,
|
estimated_duration=AVERAGE_DURATION_T2V,
|
||||||
)
|
)
|
||||||
@ -233,77 +237,75 @@ class PixverseTextToVideoNode(ComfyNodeABC):
|
|||||||
|
|
||||||
async with aiohttp.ClientSession() as session:
|
async with aiohttp.ClientSession() as session:
|
||||||
async with session.get(response_poll.Resp.url) as vid_response:
|
async with session.get(response_poll.Resp.url) as vid_response:
|
||||||
return (VideoFromFile(BytesIO(await vid_response.content.read())),)
|
return comfy_io.NodeOutput(VideoFromFile(BytesIO(await vid_response.content.read())))
|
||||||
|
|
||||||
|
|
||||||
class PixverseImageToVideoNode(ComfyNodeABC):
|
class PixverseImageToVideoNode(comfy_io.ComfyNode):
|
||||||
"""
|
"""
|
||||||
Generates videos based on prompt and output_size.
|
Generates videos based on prompt and output_size.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
RETURN_TYPES = (IO.VIDEO,)
|
@classmethod
|
||||||
DESCRIPTION = cleandoc(__doc__ or "") # Handle potential None value
|
def define_schema(cls) -> comfy_io.Schema:
|
||||||
FUNCTION = "api_call"
|
return comfy_io.Schema(
|
||||||
API_NODE = True
|
node_id="PixverseImageToVideoNode",
|
||||||
CATEGORY = "api node/video/PixVerse"
|
display_name="PixVerse Image to Video",
|
||||||
|
category="api node/video/PixVerse",
|
||||||
|
description=cleandoc(cls.__doc__ or ""),
|
||||||
|
inputs=[
|
||||||
|
comfy_io.Image.Input("image"),
|
||||||
|
comfy_io.String.Input(
|
||||||
|
"prompt",
|
||||||
|
multiline=True,
|
||||||
|
default="",
|
||||||
|
tooltip="Prompt for the video generation",
|
||||||
|
),
|
||||||
|
comfy_io.Combo.Input(
|
||||||
|
"quality",
|
||||||
|
options=[resolution.value for resolution in PixverseQuality],
|
||||||
|
default=PixverseQuality.res_540p,
|
||||||
|
),
|
||||||
|
comfy_io.Combo.Input(
|
||||||
|
"duration_seconds",
|
||||||
|
options=[dur.value for dur in PixverseDuration],
|
||||||
|
),
|
||||||
|
comfy_io.Combo.Input(
|
||||||
|
"motion_mode",
|
||||||
|
options=[mode.value for mode in PixverseMotionMode],
|
||||||
|
),
|
||||||
|
comfy_io.Int.Input(
|
||||||
|
"seed",
|
||||||
|
default=0,
|
||||||
|
min=0,
|
||||||
|
max=2147483647,
|
||||||
|
control_after_generate=True,
|
||||||
|
tooltip="Seed for video generation.",
|
||||||
|
),
|
||||||
|
comfy_io.String.Input(
|
||||||
|
"negative_prompt",
|
||||||
|
default="",
|
||||||
|
multiline=True,
|
||||||
|
tooltip="An optional text description of undesired elements on an image.",
|
||||||
|
optional=True,
|
||||||
|
),
|
||||||
|
comfy_io.Custom(PixverseIO.TEMPLATE).Input(
|
||||||
|
"pixverse_template",
|
||||||
|
tooltip="An optional template to influence style of generation, created by the PixVerse Template node.",
|
||||||
|
optional=True,
|
||||||
|
),
|
||||||
|
],
|
||||||
|
outputs=[comfy_io.Video.Output()],
|
||||||
|
hidden=[
|
||||||
|
comfy_io.Hidden.auth_token_comfy_org,
|
||||||
|
comfy_io.Hidden.api_key_comfy_org,
|
||||||
|
comfy_io.Hidden.unique_id,
|
||||||
|
],
|
||||||
|
is_api_node=True,
|
||||||
|
)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def INPUT_TYPES(s):
|
async def execute(
|
||||||
return {
|
cls,
|
||||||
"required": {
|
|
||||||
"image": (IO.IMAGE,),
|
|
||||||
"prompt": (
|
|
||||||
IO.STRING,
|
|
||||||
{
|
|
||||||
"multiline": True,
|
|
||||||
"default": "",
|
|
||||||
"tooltip": "Prompt for the video generation",
|
|
||||||
},
|
|
||||||
),
|
|
||||||
"quality": (
|
|
||||||
[resolution.value for resolution in PixverseQuality],
|
|
||||||
{
|
|
||||||
"default": PixverseQuality.res_540p,
|
|
||||||
},
|
|
||||||
),
|
|
||||||
"duration_seconds": ([dur.value for dur in PixverseDuration],),
|
|
||||||
"motion_mode": ([mode.value for mode in PixverseMotionMode],),
|
|
||||||
"seed": (
|
|
||||||
IO.INT,
|
|
||||||
{
|
|
||||||
"default": 0,
|
|
||||||
"min": 0,
|
|
||||||
"max": 2147483647,
|
|
||||||
"control_after_generate": True,
|
|
||||||
"tooltip": "Seed for video generation.",
|
|
||||||
},
|
|
||||||
),
|
|
||||||
},
|
|
||||||
"optional": {
|
|
||||||
"negative_prompt": (
|
|
||||||
IO.STRING,
|
|
||||||
{
|
|
||||||
"default": "",
|
|
||||||
"forceInput": True,
|
|
||||||
"tooltip": "An optional text description of undesired elements on an image.",
|
|
||||||
},
|
|
||||||
),
|
|
||||||
"pixverse_template": (
|
|
||||||
PixverseIO.TEMPLATE,
|
|
||||||
{
|
|
||||||
"tooltip": "An optional template to influence style of generation, created by the PixVerse Template node."
|
|
||||||
},
|
|
||||||
),
|
|
||||||
},
|
|
||||||
"hidden": {
|
|
||||||
"auth_token": "AUTH_TOKEN_COMFY_ORG",
|
|
||||||
"comfy_api_key": "API_KEY_COMFY_ORG",
|
|
||||||
"unique_id": "UNIQUE_ID",
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
async def api_call(
|
|
||||||
self,
|
|
||||||
image: torch.Tensor,
|
image: torch.Tensor,
|
||||||
prompt: str,
|
prompt: str,
|
||||||
quality: str,
|
quality: str,
|
||||||
@ -312,11 +314,13 @@ class PixverseImageToVideoNode(ComfyNodeABC):
|
|||||||
seed,
|
seed,
|
||||||
negative_prompt: str = None,
|
negative_prompt: str = None,
|
||||||
pixverse_template: int = None,
|
pixverse_template: int = None,
|
||||||
unique_id: Optional[str] = None,
|
) -> comfy_io.NodeOutput:
|
||||||
**kwargs,
|
|
||||||
):
|
|
||||||
validate_string(prompt, strip_whitespace=False)
|
validate_string(prompt, strip_whitespace=False)
|
||||||
img_id = await upload_image_to_pixverse(image, auth_kwargs=kwargs)
|
auth = {
|
||||||
|
"auth_token": cls.hidden.auth_token_comfy_org,
|
||||||
|
"comfy_api_key": cls.hidden.api_key_comfy_org,
|
||||||
|
}
|
||||||
|
img_id = await upload_image_to_pixverse(image, auth_kwargs=auth)
|
||||||
|
|
||||||
# 1080p is limited to 5 seconds duration
|
# 1080p is limited to 5 seconds duration
|
||||||
# only normal motion_mode supported for 1080p or for non-5 second duration
|
# only normal motion_mode supported for 1080p or for non-5 second duration
|
||||||
@ -343,7 +347,7 @@ class PixverseImageToVideoNode(ComfyNodeABC):
|
|||||||
template_id=pixverse_template,
|
template_id=pixverse_template,
|
||||||
seed=seed,
|
seed=seed,
|
||||||
),
|
),
|
||||||
auth_kwargs=kwargs,
|
auth_kwargs=auth,
|
||||||
)
|
)
|
||||||
response_api = await operation.execute()
|
response_api = await operation.execute()
|
||||||
|
|
||||||
@ -364,8 +368,8 @@ class PixverseImageToVideoNode(ComfyNodeABC):
|
|||||||
PixverseStatus.deleted,
|
PixverseStatus.deleted,
|
||||||
],
|
],
|
||||||
status_extractor=lambda x: x.Resp.status,
|
status_extractor=lambda x: x.Resp.status,
|
||||||
auth_kwargs=kwargs,
|
auth_kwargs=auth,
|
||||||
node_id=unique_id,
|
node_id=cls.hidden.unique_id,
|
||||||
result_url_extractor=get_video_url_from_response,
|
result_url_extractor=get_video_url_from_response,
|
||||||
estimated_duration=AVERAGE_DURATION_I2V,
|
estimated_duration=AVERAGE_DURATION_I2V,
|
||||||
)
|
)
|
||||||
@ -373,72 +377,71 @@ class PixverseImageToVideoNode(ComfyNodeABC):
|
|||||||
|
|
||||||
async with aiohttp.ClientSession() as session:
|
async with aiohttp.ClientSession() as session:
|
||||||
async with session.get(response_poll.Resp.url) as vid_response:
|
async with session.get(response_poll.Resp.url) as vid_response:
|
||||||
return (VideoFromFile(BytesIO(await vid_response.content.read())),)
|
return comfy_io.NodeOutput(VideoFromFile(BytesIO(await vid_response.content.read())))
|
||||||
|
|
||||||
|
|
||||||
class PixverseTransitionVideoNode(ComfyNodeABC):
|
class PixverseTransitionVideoNode(comfy_io.ComfyNode):
|
||||||
"""
|
"""
|
||||||
Generates videos based on prompt and output_size.
|
Generates videos based on prompt and output_size.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
RETURN_TYPES = (IO.VIDEO,)
|
@classmethod
|
||||||
DESCRIPTION = cleandoc(__doc__ or "") # Handle potential None value
|
def define_schema(cls) -> comfy_io.Schema:
|
||||||
FUNCTION = "api_call"
|
return comfy_io.Schema(
|
||||||
API_NODE = True
|
node_id="PixverseTransitionVideoNode",
|
||||||
CATEGORY = "api node/video/PixVerse"
|
display_name="PixVerse Transition Video",
|
||||||
|
category="api node/video/PixVerse",
|
||||||
|
description=cleandoc(cls.__doc__ or ""),
|
||||||
|
inputs=[
|
||||||
|
comfy_io.Image.Input("first_frame"),
|
||||||
|
comfy_io.Image.Input("last_frame"),
|
||||||
|
comfy_io.String.Input(
|
||||||
|
"prompt",
|
||||||
|
multiline=True,
|
||||||
|
default="",
|
||||||
|
tooltip="Prompt for the video generation",
|
||||||
|
),
|
||||||
|
comfy_io.Combo.Input(
|
||||||
|
"quality",
|
||||||
|
options=[resolution.value for resolution in PixverseQuality],
|
||||||
|
default=PixverseQuality.res_540p,
|
||||||
|
),
|
||||||
|
comfy_io.Combo.Input(
|
||||||
|
"duration_seconds",
|
||||||
|
options=[dur.value for dur in PixverseDuration],
|
||||||
|
),
|
||||||
|
comfy_io.Combo.Input(
|
||||||
|
"motion_mode",
|
||||||
|
options=[mode.value for mode in PixverseMotionMode],
|
||||||
|
),
|
||||||
|
comfy_io.Int.Input(
|
||||||
|
"seed",
|
||||||
|
default=0,
|
||||||
|
min=0,
|
||||||
|
max=2147483647,
|
||||||
|
control_after_generate=True,
|
||||||
|
tooltip="Seed for video generation.",
|
||||||
|
),
|
||||||
|
comfy_io.String.Input(
|
||||||
|
"negative_prompt",
|
||||||
|
default="",
|
||||||
|
multiline=True,
|
||||||
|
tooltip="An optional text description of undesired elements on an image.",
|
||||||
|
optional=True,
|
||||||
|
),
|
||||||
|
],
|
||||||
|
outputs=[comfy_io.Video.Output()],
|
||||||
|
hidden=[
|
||||||
|
comfy_io.Hidden.auth_token_comfy_org,
|
||||||
|
comfy_io.Hidden.api_key_comfy_org,
|
||||||
|
comfy_io.Hidden.unique_id,
|
||||||
|
],
|
||||||
|
is_api_node=True,
|
||||||
|
)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def INPUT_TYPES(s):
|
async def execute(
|
||||||
return {
|
cls,
|
||||||
"required": {
|
|
||||||
"first_frame": (IO.IMAGE,),
|
|
||||||
"last_frame": (IO.IMAGE,),
|
|
||||||
"prompt": (
|
|
||||||
IO.STRING,
|
|
||||||
{
|
|
||||||
"multiline": True,
|
|
||||||
"default": "",
|
|
||||||
"tooltip": "Prompt for the video generation",
|
|
||||||
},
|
|
||||||
),
|
|
||||||
"quality": (
|
|
||||||
[resolution.value for resolution in PixverseQuality],
|
|
||||||
{
|
|
||||||
"default": PixverseQuality.res_540p,
|
|
||||||
},
|
|
||||||
),
|
|
||||||
"duration_seconds": ([dur.value for dur in PixverseDuration],),
|
|
||||||
"motion_mode": ([mode.value for mode in PixverseMotionMode],),
|
|
||||||
"seed": (
|
|
||||||
IO.INT,
|
|
||||||
{
|
|
||||||
"default": 0,
|
|
||||||
"min": 0,
|
|
||||||
"max": 2147483647,
|
|
||||||
"control_after_generate": True,
|
|
||||||
"tooltip": "Seed for video generation.",
|
|
||||||
},
|
|
||||||
),
|
|
||||||
},
|
|
||||||
"optional": {
|
|
||||||
"negative_prompt": (
|
|
||||||
IO.STRING,
|
|
||||||
{
|
|
||||||
"default": "",
|
|
||||||
"forceInput": True,
|
|
||||||
"tooltip": "An optional text description of undesired elements on an image.",
|
|
||||||
},
|
|
||||||
),
|
|
||||||
},
|
|
||||||
"hidden": {
|
|
||||||
"auth_token": "AUTH_TOKEN_COMFY_ORG",
|
|
||||||
"comfy_api_key": "API_KEY_COMFY_ORG",
|
|
||||||
"unique_id": "UNIQUE_ID",
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
async def api_call(
|
|
||||||
self,
|
|
||||||
first_frame: torch.Tensor,
|
first_frame: torch.Tensor,
|
||||||
last_frame: torch.Tensor,
|
last_frame: torch.Tensor,
|
||||||
prompt: str,
|
prompt: str,
|
||||||
@ -447,12 +450,14 @@ class PixverseTransitionVideoNode(ComfyNodeABC):
|
|||||||
motion_mode: str,
|
motion_mode: str,
|
||||||
seed,
|
seed,
|
||||||
negative_prompt: str = None,
|
negative_prompt: str = None,
|
||||||
unique_id: Optional[str] = None,
|
) -> comfy_io.NodeOutput:
|
||||||
**kwargs,
|
|
||||||
):
|
|
||||||
validate_string(prompt, strip_whitespace=False)
|
validate_string(prompt, strip_whitespace=False)
|
||||||
first_frame_id = await upload_image_to_pixverse(first_frame, auth_kwargs=kwargs)
|
auth = {
|
||||||
last_frame_id = await upload_image_to_pixverse(last_frame, auth_kwargs=kwargs)
|
"auth_token": cls.hidden.auth_token_comfy_org,
|
||||||
|
"comfy_api_key": cls.hidden.api_key_comfy_org,
|
||||||
|
}
|
||||||
|
first_frame_id = await upload_image_to_pixverse(first_frame, auth_kwargs=auth)
|
||||||
|
last_frame_id = await upload_image_to_pixverse(last_frame, auth_kwargs=auth)
|
||||||
|
|
||||||
# 1080p is limited to 5 seconds duration
|
# 1080p is limited to 5 seconds duration
|
||||||
# only normal motion_mode supported for 1080p or for non-5 second duration
|
# only normal motion_mode supported for 1080p or for non-5 second duration
|
||||||
@ -479,7 +484,7 @@ class PixverseTransitionVideoNode(ComfyNodeABC):
|
|||||||
negative_prompt=negative_prompt if negative_prompt else None,
|
negative_prompt=negative_prompt if negative_prompt else None,
|
||||||
seed=seed,
|
seed=seed,
|
||||||
),
|
),
|
||||||
auth_kwargs=kwargs,
|
auth_kwargs=auth,
|
||||||
)
|
)
|
||||||
response_api = await operation.execute()
|
response_api = await operation.execute()
|
||||||
|
|
||||||
@ -500,8 +505,8 @@ class PixverseTransitionVideoNode(ComfyNodeABC):
|
|||||||
PixverseStatus.deleted,
|
PixverseStatus.deleted,
|
||||||
],
|
],
|
||||||
status_extractor=lambda x: x.Resp.status,
|
status_extractor=lambda x: x.Resp.status,
|
||||||
auth_kwargs=kwargs,
|
auth_kwargs=auth,
|
||||||
node_id=unique_id,
|
node_id=cls.hidden.unique_id,
|
||||||
result_url_extractor=get_video_url_from_response,
|
result_url_extractor=get_video_url_from_response,
|
||||||
estimated_duration=AVERAGE_DURATION_T2V,
|
estimated_duration=AVERAGE_DURATION_T2V,
|
||||||
)
|
)
|
||||||
@ -509,19 +514,19 @@ class PixverseTransitionVideoNode(ComfyNodeABC):
|
|||||||
|
|
||||||
async with aiohttp.ClientSession() as session:
|
async with aiohttp.ClientSession() as session:
|
||||||
async with session.get(response_poll.Resp.url) as vid_response:
|
async with session.get(response_poll.Resp.url) as vid_response:
|
||||||
return (VideoFromFile(BytesIO(await vid_response.content.read())),)
|
return comfy_io.NodeOutput(VideoFromFile(BytesIO(await vid_response.content.read())))
|
||||||
|
|
||||||
|
|
||||||
NODE_CLASS_MAPPINGS = {
|
class PixVerseExtension(ComfyExtension):
|
||||||
"PixverseTextToVideoNode": PixverseTextToVideoNode,
|
@override
|
||||||
"PixverseImageToVideoNode": PixverseImageToVideoNode,
|
async def get_node_list(self) -> list[type[comfy_io.ComfyNode]]:
|
||||||
"PixverseTransitionVideoNode": PixverseTransitionVideoNode,
|
return [
|
||||||
"PixverseTemplateNode": PixverseTemplateNode,
|
PixverseTextToVideoNode,
|
||||||
}
|
PixverseImageToVideoNode,
|
||||||
|
PixverseTransitionVideoNode,
|
||||||
|
PixverseTemplateNode,
|
||||||
|
]
|
||||||
|
|
||||||
NODE_DISPLAY_NAME_MAPPINGS = {
|
|
||||||
"PixverseTextToVideoNode": "PixVerse Text to Video",
|
async def comfy_entrypoint() -> PixVerseExtension:
|
||||||
"PixverseImageToVideoNode": "PixVerse Image to Video",
|
return PixVerseExtension()
|
||||||
"PixverseTransitionVideoNode": "PixVerse Transition Video",
|
|
||||||
"PixverseTemplateNode": "PixVerse Template",
|
|
||||||
}
|
|
||||||
|
|||||||
@ -38,48 +38,48 @@ from PIL import UnidentifiedImageError
|
|||||||
|
|
||||||
|
|
||||||
async def handle_recraft_file_request(
|
async def handle_recraft_file_request(
|
||||||
image: torch.Tensor,
|
image: torch.Tensor,
|
||||||
path: str,
|
path: str,
|
||||||
mask: torch.Tensor=None,
|
mask: torch.Tensor=None,
|
||||||
total_pixels=4096*4096,
|
total_pixels=4096*4096,
|
||||||
timeout=1024,
|
timeout=1024,
|
||||||
request=None,
|
request=None,
|
||||||
auth_kwargs: dict[str,str] = None,
|
auth_kwargs: dict[str,str] = None,
|
||||||
) -> list[BytesIO]:
|
) -> list[BytesIO]:
|
||||||
"""
|
"""
|
||||||
Handle sending common Recraft file-only request to get back file bytes.
|
Handle sending common Recraft file-only request to get back file bytes.
|
||||||
"""
|
"""
|
||||||
if request is None:
|
if request is None:
|
||||||
request = EmptyRequest()
|
request = EmptyRequest()
|
||||||
|
|
||||||
files = {
|
files = {
|
||||||
'image': tensor_to_bytesio(image, total_pixels=total_pixels).read()
|
'image': tensor_to_bytesio(image, total_pixels=total_pixels).read()
|
||||||
}
|
}
|
||||||
if mask is not None:
|
if mask is not None:
|
||||||
files['mask'] = tensor_to_bytesio(mask, total_pixels=total_pixels).read()
|
files['mask'] = tensor_to_bytesio(mask, total_pixels=total_pixels).read()
|
||||||
|
|
||||||
operation = SynchronousOperation(
|
operation = SynchronousOperation(
|
||||||
endpoint=ApiEndpoint(
|
endpoint=ApiEndpoint(
|
||||||
path=path,
|
path=path,
|
||||||
method=HttpMethod.POST,
|
method=HttpMethod.POST,
|
||||||
request_model=type(request),
|
request_model=type(request),
|
||||||
response_model=RecraftImageGenerationResponse,
|
response_model=RecraftImageGenerationResponse,
|
||||||
),
|
),
|
||||||
request=request,
|
request=request,
|
||||||
files=files,
|
files=files,
|
||||||
content_type="multipart/form-data",
|
content_type="multipart/form-data",
|
||||||
auth_kwargs=auth_kwargs,
|
auth_kwargs=auth_kwargs,
|
||||||
multipart_parser=recraft_multipart_parser,
|
multipart_parser=recraft_multipart_parser,
|
||||||
)
|
)
|
||||||
response: RecraftImageGenerationResponse = await operation.execute()
|
response: RecraftImageGenerationResponse = await operation.execute()
|
||||||
all_bytesio = []
|
all_bytesio = []
|
||||||
if response.image is not None:
|
if response.image is not None:
|
||||||
all_bytesio.append(await download_url_to_bytesio(response.image.url, timeout=timeout))
|
all_bytesio.append(await download_url_to_bytesio(response.image.url, timeout=timeout))
|
||||||
else:
|
else:
|
||||||
for data in response.data:
|
for data in response.data:
|
||||||
all_bytesio.append(await download_url_to_bytesio(data.url, timeout=timeout))
|
all_bytesio.append(await download_url_to_bytesio(data.url, timeout=timeout))
|
||||||
|
|
||||||
return all_bytesio
|
return all_bytesio
|
||||||
|
|
||||||
|
|
||||||
def recraft_multipart_parser(data, parent_key=None, formatter: callable=None, converted_to_check: list[list]=None, is_list=False) -> dict:
|
def recraft_multipart_parser(data, parent_key=None, formatter: callable=None, converted_to_check: list[list]=None, is_list=False) -> dict:
|
||||||
@ -107,7 +107,7 @@ def recraft_multipart_parser(data, parent_key=None, formatter: callable=None, co
|
|||||||
# if list already exists exists, just extend list with data
|
# if list already exists exists, just extend list with data
|
||||||
for check_list in lists_to_check:
|
for check_list in lists_to_check:
|
||||||
for conv_tuple in check_list:
|
for conv_tuple in check_list:
|
||||||
if conv_tuple[0] == parent_key and type(conv_tuple[1]) is list:
|
if conv_tuple[0] == parent_key and isinstance(conv_tuple[1], list):
|
||||||
conv_tuple[1].append(formatter(data))
|
conv_tuple[1].append(formatter(data))
|
||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
@ -119,7 +119,7 @@ def recraft_multipart_parser(data, parent_key=None, formatter: callable=None, co
|
|||||||
if formatter is None:
|
if formatter is None:
|
||||||
formatter = lambda v: v # Multipart representation of value
|
formatter = lambda v: v # Multipart representation of value
|
||||||
|
|
||||||
if type(data) is not dict:
|
if not isinstance(data, dict):
|
||||||
# if list already exists exists, just extend list with data
|
# if list already exists exists, just extend list with data
|
||||||
added = handle_converted_lists(data, parent_key, converted_to_check)
|
added = handle_converted_lists(data, parent_key, converted_to_check)
|
||||||
if added:
|
if added:
|
||||||
@ -136,9 +136,9 @@ def recraft_multipart_parser(data, parent_key=None, formatter: callable=None, co
|
|||||||
|
|
||||||
for key, value in data.items():
|
for key, value in data.items():
|
||||||
current_key = key if parent_key is None else f"{parent_key}[{key}]"
|
current_key = key if parent_key is None else f"{parent_key}[{key}]"
|
||||||
if type(value) is dict:
|
if isinstance(value, dict):
|
||||||
converted.extend(recraft_multipart_parser(value, current_key, formatter, next_check).items())
|
converted.extend(recraft_multipart_parser(value, current_key, formatter, next_check).items())
|
||||||
elif type(value) is list:
|
elif isinstance(value, list):
|
||||||
for ind, list_value in enumerate(value):
|
for ind, list_value in enumerate(value):
|
||||||
iter_key = f"{current_key}[]"
|
iter_key = f"{current_key}[]"
|
||||||
converted.extend(recraft_multipart_parser(list_value, iter_key, formatter, next_check, is_list=True).items())
|
converted.extend(recraft_multipart_parser(list_value, iter_key, formatter, next_check, is_list=True).items())
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@ -360,7 +360,7 @@ class RecordAudio:
|
|||||||
def load(self, audio):
|
def load(self, audio):
|
||||||
audio_path = folder_paths.get_annotated_filepath(audio)
|
audio_path = folder_paths.get_annotated_filepath(audio)
|
||||||
|
|
||||||
waveform, sample_rate = torchaudio.load(audio_path)
|
waveform, sample_rate = load(audio_path)
|
||||||
audio = {"waveform": waveform.unsqueeze(0), "sample_rate": sample_rate}
|
audio = {"waveform": waveform.unsqueeze(0), "sample_rate": sample_rate}
|
||||||
return (audio, )
|
return (audio, )
|
||||||
|
|
||||||
|
|||||||
@ -1,44 +1,62 @@
|
|||||||
import folder_paths
|
import folder_paths
|
||||||
import comfy.audio_encoders.audio_encoders
|
import comfy.audio_encoders.audio_encoders
|
||||||
import comfy.utils
|
import comfy.utils
|
||||||
|
from typing_extensions import override
|
||||||
|
from comfy_api.latest import ComfyExtension, io
|
||||||
|
|
||||||
|
|
||||||
class AudioEncoderLoader:
|
class AudioEncoderLoader(io.ComfyNode):
|
||||||
@classmethod
|
@classmethod
|
||||||
def INPUT_TYPES(s):
|
def define_schema(cls) -> io.Schema:
|
||||||
return {"required": { "audio_encoder_name": (folder_paths.get_filename_list("audio_encoders"), ),
|
return io.Schema(
|
||||||
}}
|
node_id="AudioEncoderLoader",
|
||||||
RETURN_TYPES = ("AUDIO_ENCODER",)
|
category="loaders",
|
||||||
FUNCTION = "load_model"
|
inputs=[
|
||||||
|
io.Combo.Input(
|
||||||
|
"audio_encoder_name",
|
||||||
|
options=folder_paths.get_filename_list("audio_encoders"),
|
||||||
|
),
|
||||||
|
],
|
||||||
|
outputs=[io.AudioEncoder.Output()],
|
||||||
|
)
|
||||||
|
|
||||||
CATEGORY = "loaders"
|
@classmethod
|
||||||
|
def execute(cls, audio_encoder_name) -> io.NodeOutput:
|
||||||
def load_model(self, audio_encoder_name):
|
|
||||||
audio_encoder_name = folder_paths.get_full_path_or_raise("audio_encoders", audio_encoder_name)
|
audio_encoder_name = folder_paths.get_full_path_or_raise("audio_encoders", audio_encoder_name)
|
||||||
sd = comfy.utils.load_torch_file(audio_encoder_name, safe_load=True)
|
sd = comfy.utils.load_torch_file(audio_encoder_name, safe_load=True)
|
||||||
audio_encoder = comfy.audio_encoders.audio_encoders.load_audio_encoder_from_sd(sd)
|
audio_encoder = comfy.audio_encoders.audio_encoders.load_audio_encoder_from_sd(sd)
|
||||||
if audio_encoder is None:
|
if audio_encoder is None:
|
||||||
raise RuntimeError("ERROR: audio encoder file is invalid and does not contain a valid model.")
|
raise RuntimeError("ERROR: audio encoder file is invalid and does not contain a valid model.")
|
||||||
return (audio_encoder,)
|
return io.NodeOutput(audio_encoder)
|
||||||
|
|
||||||
|
|
||||||
class AudioEncoderEncode:
|
class AudioEncoderEncode(io.ComfyNode):
|
||||||
@classmethod
|
@classmethod
|
||||||
def INPUT_TYPES(s):
|
def define_schema(cls) -> io.Schema:
|
||||||
return {"required": { "audio_encoder": ("AUDIO_ENCODER",),
|
return io.Schema(
|
||||||
"audio": ("AUDIO",),
|
node_id="AudioEncoderEncode",
|
||||||
}}
|
category="conditioning",
|
||||||
RETURN_TYPES = ("AUDIO_ENCODER_OUTPUT",)
|
inputs=[
|
||||||
FUNCTION = "encode"
|
io.AudioEncoder.Input("audio_encoder"),
|
||||||
|
io.Audio.Input("audio"),
|
||||||
|
],
|
||||||
|
outputs=[io.AudioEncoderOutput.Output()],
|
||||||
|
)
|
||||||
|
|
||||||
CATEGORY = "conditioning"
|
@classmethod
|
||||||
|
def execute(cls, audio_encoder, audio) -> io.NodeOutput:
|
||||||
def encode(self, audio_encoder, audio):
|
|
||||||
output = audio_encoder.encode_audio(audio["waveform"], audio["sample_rate"])
|
output = audio_encoder.encode_audio(audio["waveform"], audio["sample_rate"])
|
||||||
return (output,)
|
return io.NodeOutput(output)
|
||||||
|
|
||||||
|
|
||||||
NODE_CLASS_MAPPINGS = {
|
class AudioEncoder(ComfyExtension):
|
||||||
"AudioEncoderLoader": AudioEncoderLoader,
|
@override
|
||||||
"AudioEncoderEncode": AudioEncoderEncode,
|
async def get_node_list(self) -> list[type[io.ComfyNode]]:
|
||||||
}
|
return [
|
||||||
|
AudioEncoderLoader,
|
||||||
|
AudioEncoderEncode,
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
async def comfy_entrypoint() -> AudioEncoder:
|
||||||
|
return AudioEncoder()
|
||||||
|
|||||||
@ -1,34 +1,41 @@
|
|||||||
# code adapted from https://github.com/exx8/differential-diffusion
|
# code adapted from https://github.com/exx8/differential-diffusion
|
||||||
|
|
||||||
|
from typing_extensions import override
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
|
from comfy_api.latest import ComfyExtension, io
|
||||||
|
|
||||||
class DifferentialDiffusion():
|
|
||||||
|
class DifferentialDiffusion(io.ComfyNode):
|
||||||
@classmethod
|
@classmethod
|
||||||
def INPUT_TYPES(s):
|
def define_schema(cls):
|
||||||
return {
|
return io.Schema(
|
||||||
"required": {
|
node_id="DifferentialDiffusion",
|
||||||
"model": ("MODEL", ),
|
display_name="Differential Diffusion",
|
||||||
},
|
category="_for_testing",
|
||||||
"optional": {
|
inputs=[
|
||||||
"strength": ("FLOAT", {
|
io.Model.Input("model"),
|
||||||
"default": 1.0,
|
io.Float.Input(
|
||||||
"min": 0.0,
|
"strength",
|
||||||
"max": 1.0,
|
default=1.0,
|
||||||
"step": 0.01,
|
min=0.0,
|
||||||
}),
|
max=1.0,
|
||||||
}
|
step=0.01,
|
||||||
}
|
optional=True,
|
||||||
RETURN_TYPES = ("MODEL",)
|
),
|
||||||
FUNCTION = "apply"
|
],
|
||||||
CATEGORY = "_for_testing"
|
outputs=[io.Model.Output()],
|
||||||
INIT = False
|
is_experimental=True,
|
||||||
|
)
|
||||||
|
|
||||||
def apply(self, model, strength=1.0):
|
@classmethod
|
||||||
|
def execute(cls, model, strength=1.0) -> io.NodeOutput:
|
||||||
model = model.clone()
|
model = model.clone()
|
||||||
model.set_model_denoise_mask_function(lambda *args, **kwargs: self.forward(*args, **kwargs, strength=strength))
|
model.set_model_denoise_mask_function(lambda *args, **kwargs: cls.forward(*args, **kwargs, strength=strength))
|
||||||
return (model, )
|
return io.NodeOutput(model)
|
||||||
|
|
||||||
def forward(self, sigma: torch.Tensor, denoise_mask: torch.Tensor, extra_options: dict, strength: float):
|
@classmethod
|
||||||
|
def forward(cls, sigma: torch.Tensor, denoise_mask: torch.Tensor, extra_options: dict, strength: float):
|
||||||
model = extra_options["model"]
|
model = extra_options["model"]
|
||||||
step_sigmas = extra_options["sigmas"]
|
step_sigmas = extra_options["sigmas"]
|
||||||
sigma_to = model.inner_model.model_sampling.sigma_min
|
sigma_to = model.inner_model.model_sampling.sigma_min
|
||||||
@ -53,9 +60,13 @@ class DifferentialDiffusion():
|
|||||||
return binary_mask
|
return binary_mask
|
||||||
|
|
||||||
|
|
||||||
NODE_CLASS_MAPPINGS = {
|
class DifferentialDiffusionExtension(ComfyExtension):
|
||||||
"DifferentialDiffusion": DifferentialDiffusion,
|
@override
|
||||||
}
|
async def get_node_list(self) -> list[type[io.ComfyNode]]:
|
||||||
NODE_DISPLAY_NAME_MAPPINGS = {
|
return [
|
||||||
"DifferentialDiffusion": "Differential Diffusion",
|
DifferentialDiffusion,
|
||||||
}
|
]
|
||||||
|
|
||||||
|
|
||||||
|
async def comfy_entrypoint() -> DifferentialDiffusionExtension:
|
||||||
|
return DifferentialDiffusionExtension()
|
||||||
|
|||||||
@ -1,26 +1,38 @@
|
|||||||
import node_helpers
|
import node_helpers
|
||||||
|
from typing_extensions import override
|
||||||
|
from comfy_api.latest import ComfyExtension, io
|
||||||
|
|
||||||
|
|
||||||
class ReferenceLatent:
|
class ReferenceLatent(io.ComfyNode):
|
||||||
@classmethod
|
@classmethod
|
||||||
def INPUT_TYPES(s):
|
def define_schema(cls):
|
||||||
return {"required": {"conditioning": ("CONDITIONING", ),
|
return io.Schema(
|
||||||
},
|
node_id="ReferenceLatent",
|
||||||
"optional": {"latent": ("LATENT", ),}
|
category="advanced/conditioning/edit_models",
|
||||||
}
|
description="This node sets the guiding latent for an edit model. If the model supports it you can chain multiple to set multiple reference images.",
|
||||||
|
inputs=[
|
||||||
|
io.Conditioning.Input("conditioning"),
|
||||||
|
io.Latent.Input("latent", optional=True),
|
||||||
|
],
|
||||||
|
outputs=[
|
||||||
|
io.Conditioning.Output(),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
RETURN_TYPES = ("CONDITIONING",)
|
@classmethod
|
||||||
FUNCTION = "append"
|
def execute(cls, conditioning, latent=None) -> io.NodeOutput:
|
||||||
|
|
||||||
CATEGORY = "advanced/conditioning/edit_models"
|
|
||||||
DESCRIPTION = "This node sets the guiding latent for an edit model. If the model supports it you can chain multiple to set multiple reference images."
|
|
||||||
|
|
||||||
def append(self, conditioning, latent=None):
|
|
||||||
if latent is not None:
|
if latent is not None:
|
||||||
conditioning = node_helpers.conditioning_set_values(conditioning, {"reference_latents": [latent["samples"]]}, append=True)
|
conditioning = node_helpers.conditioning_set_values(conditioning, {"reference_latents": [latent["samples"]]}, append=True)
|
||||||
return (conditioning, )
|
return io.NodeOutput(conditioning)
|
||||||
|
|
||||||
|
|
||||||
NODE_CLASS_MAPPINGS = {
|
class EditModelExtension(ComfyExtension):
|
||||||
"ReferenceLatent": ReferenceLatent,
|
@override
|
||||||
}
|
async def get_node_list(self) -> list[type[io.ComfyNode]]:
|
||||||
|
return [
|
||||||
|
ReferenceLatent,
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def comfy_entrypoint() -> EditModelExtension:
|
||||||
|
return EditModelExtension()
|
||||||
|
|||||||
74
comfy_extras/nodes_eps.py
Normal file
74
comfy_extras/nodes_eps.py
Normal file
@ -0,0 +1,74 @@
|
|||||||
|
from typing_extensions import override
|
||||||
|
|
||||||
|
from comfy_api.latest import ComfyExtension, io
|
||||||
|
|
||||||
|
|
||||||
|
class EpsilonScaling(io.ComfyNode):
|
||||||
|
"""
|
||||||
|
Implements the Epsilon Scaling method from 'Elucidating the Exposure Bias in Diffusion Models'
|
||||||
|
(https://arxiv.org/abs/2308.15321v6).
|
||||||
|
|
||||||
|
This method mitigates exposure bias by scaling the predicted noise during sampling,
|
||||||
|
which can significantly improve sample quality. This implementation uses the "uniform schedule"
|
||||||
|
recommended by the paper for its practicality and effectiveness.
|
||||||
|
"""
|
||||||
|
@classmethod
|
||||||
|
def define_schema(cls):
|
||||||
|
return io.Schema(
|
||||||
|
node_id="Epsilon Scaling",
|
||||||
|
category="model_patches/unet",
|
||||||
|
inputs=[
|
||||||
|
io.Model.Input("model"),
|
||||||
|
io.Float.Input(
|
||||||
|
"scaling_factor",
|
||||||
|
default=1.005,
|
||||||
|
min=0.5,
|
||||||
|
max=1.5,
|
||||||
|
step=0.001,
|
||||||
|
display_mode=io.NumberDisplay.number,
|
||||||
|
),
|
||||||
|
],
|
||||||
|
outputs=[
|
||||||
|
io.Model.Output(),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def execute(cls, model, scaling_factor) -> io.NodeOutput:
|
||||||
|
# Prevent division by zero, though the UI's min value should prevent this.
|
||||||
|
if scaling_factor == 0:
|
||||||
|
scaling_factor = 1e-9
|
||||||
|
|
||||||
|
def epsilon_scaling_function(args):
|
||||||
|
"""
|
||||||
|
This function is applied after the CFG guidance has been calculated.
|
||||||
|
It recalculates the denoised latent by scaling the predicted noise.
|
||||||
|
"""
|
||||||
|
denoised = args["denoised"]
|
||||||
|
x = args["input"]
|
||||||
|
|
||||||
|
noise_pred = x - denoised
|
||||||
|
|
||||||
|
scaled_noise_pred = noise_pred / scaling_factor
|
||||||
|
|
||||||
|
new_denoised = x - scaled_noise_pred
|
||||||
|
|
||||||
|
return new_denoised
|
||||||
|
|
||||||
|
# Clone the model patcher to avoid modifying the original model in place
|
||||||
|
model_clone = model.clone()
|
||||||
|
|
||||||
|
model_clone.set_model_sampler_post_cfg_function(epsilon_scaling_function)
|
||||||
|
|
||||||
|
return io.NodeOutput(model_clone)
|
||||||
|
|
||||||
|
|
||||||
|
class EpsilonScalingExtension(ComfyExtension):
|
||||||
|
@override
|
||||||
|
async def get_node_list(self) -> list[type[io.ComfyNode]]:
|
||||||
|
return [
|
||||||
|
EpsilonScaling,
|
||||||
|
]
|
||||||
|
|
||||||
|
async def comfy_entrypoint() -> EpsilonScalingExtension:
|
||||||
|
return EpsilonScalingExtension()
|
||||||
@ -1,6 +1,8 @@
|
|||||||
# from https://github.com/zju-pi/diff-sampler/tree/main/gits-main
|
# from https://github.com/zju-pi/diff-sampler/tree/main/gits-main
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import torch
|
import torch
|
||||||
|
from typing_extensions import override
|
||||||
|
from comfy_api.latest import ComfyExtension, io
|
||||||
|
|
||||||
def loglinear_interp(t_steps, num_steps):
|
def loglinear_interp(t_steps, num_steps):
|
||||||
"""
|
"""
|
||||||
@ -333,25 +335,28 @@ NOISE_LEVELS = {
|
|||||||
],
|
],
|
||||||
}
|
}
|
||||||
|
|
||||||
class GITSScheduler:
|
class GITSScheduler(io.ComfyNode):
|
||||||
@classmethod
|
@classmethod
|
||||||
def INPUT_TYPES(s):
|
def define_schema(cls):
|
||||||
return {"required":
|
return io.Schema(
|
||||||
{"coeff": ("FLOAT", {"default": 1.20, "min": 0.80, "max": 1.50, "step": 0.05}),
|
node_id="GITSScheduler",
|
||||||
"steps": ("INT", {"default": 10, "min": 2, "max": 1000}),
|
category="sampling/custom_sampling/schedulers",
|
||||||
"denoise": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.01}),
|
inputs=[
|
||||||
}
|
io.Float.Input("coeff", default=1.20, min=0.80, max=1.50, step=0.05),
|
||||||
}
|
io.Int.Input("steps", default=10, min=2, max=1000),
|
||||||
RETURN_TYPES = ("SIGMAS",)
|
io.Float.Input("denoise", default=1.0, min=0.0, max=1.0, step=0.01),
|
||||||
CATEGORY = "sampling/custom_sampling/schedulers"
|
],
|
||||||
|
outputs=[
|
||||||
|
io.Sigmas.Output(),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
FUNCTION = "get_sigmas"
|
@classmethod
|
||||||
|
def execute(cls, coeff, steps, denoise):
|
||||||
def get_sigmas(self, coeff, steps, denoise):
|
|
||||||
total_steps = steps
|
total_steps = steps
|
||||||
if denoise < 1.0:
|
if denoise < 1.0:
|
||||||
if denoise <= 0.0:
|
if denoise <= 0.0:
|
||||||
return (torch.FloatTensor([]),)
|
return io.NodeOutput(torch.FloatTensor([]))
|
||||||
total_steps = round(steps * denoise)
|
total_steps = round(steps * denoise)
|
||||||
|
|
||||||
if steps <= 20:
|
if steps <= 20:
|
||||||
@ -362,8 +367,16 @@ class GITSScheduler:
|
|||||||
|
|
||||||
sigmas = sigmas[-(total_steps + 1):]
|
sigmas = sigmas[-(total_steps + 1):]
|
||||||
sigmas[-1] = 0
|
sigmas[-1] = 0
|
||||||
return (torch.FloatTensor(sigmas), )
|
return io.NodeOutput(torch.FloatTensor(sigmas))
|
||||||
|
|
||||||
NODE_CLASS_MAPPINGS = {
|
|
||||||
"GITSScheduler": GITSScheduler,
|
class GITSSchedulerExtension(ComfyExtension):
|
||||||
}
|
@override
|
||||||
|
async def get_node_list(self) -> list[type[io.ComfyNode]]:
|
||||||
|
return [
|
||||||
|
GITSScheduler,
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
async def comfy_entrypoint() -> GITSSchedulerExtension:
|
||||||
|
return GITSSchedulerExtension()
|
||||||
|
|||||||
@ -1,21 +1,30 @@
|
|||||||
import torch
|
import torch
|
||||||
|
|
||||||
class InstructPixToPixConditioning:
|
from typing_extensions import override
|
||||||
|
from comfy_api.latest import ComfyExtension, io
|
||||||
|
|
||||||
|
|
||||||
|
class InstructPixToPixConditioning(io.ComfyNode):
|
||||||
@classmethod
|
@classmethod
|
||||||
def INPUT_TYPES(s):
|
def define_schema(cls):
|
||||||
return {"required": {"positive": ("CONDITIONING", ),
|
return io.Schema(
|
||||||
"negative": ("CONDITIONING", ),
|
node_id="InstructPixToPixConditioning",
|
||||||
"vae": ("VAE", ),
|
category="conditioning/instructpix2pix",
|
||||||
"pixels": ("IMAGE", ),
|
inputs=[
|
||||||
}}
|
io.Conditioning.Input("positive"),
|
||||||
|
io.Conditioning.Input("negative"),
|
||||||
|
io.Vae.Input("vae"),
|
||||||
|
io.Image.Input("pixels"),
|
||||||
|
],
|
||||||
|
outputs=[
|
||||||
|
io.Conditioning.Output(display_name="positive"),
|
||||||
|
io.Conditioning.Output(display_name="negative"),
|
||||||
|
io.Latent.Output(display_name="latent"),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
RETURN_TYPES = ("CONDITIONING","CONDITIONING","LATENT")
|
@classmethod
|
||||||
RETURN_NAMES = ("positive", "negative", "latent")
|
def execute(cls, positive, negative, pixels, vae) -> io.NodeOutput:
|
||||||
FUNCTION = "encode"
|
|
||||||
|
|
||||||
CATEGORY = "conditioning/instructpix2pix"
|
|
||||||
|
|
||||||
def encode(self, positive, negative, pixels, vae):
|
|
||||||
x = (pixels.shape[1] // 8) * 8
|
x = (pixels.shape[1] // 8) * 8
|
||||||
y = (pixels.shape[2] // 8) * 8
|
y = (pixels.shape[2] // 8) * 8
|
||||||
|
|
||||||
@ -38,8 +47,17 @@ class InstructPixToPixConditioning:
|
|||||||
n = [t[0], d]
|
n = [t[0], d]
|
||||||
c.append(n)
|
c.append(n)
|
||||||
out.append(c)
|
out.append(c)
|
||||||
return (out[0], out[1], out_latent)
|
return io.NodeOutput(out[0], out[1], out_latent)
|
||||||
|
|
||||||
|
|
||||||
|
class InstructPix2PixExtension(ComfyExtension):
|
||||||
|
@override
|
||||||
|
async def get_node_list(self) -> list[type[io.ComfyNode]]:
|
||||||
|
return [
|
||||||
|
InstructPixToPixConditioning,
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
async def comfy_entrypoint() -> InstructPix2PixExtension:
|
||||||
|
return InstructPix2PixExtension()
|
||||||
|
|
||||||
NODE_CLASS_MAPPINGS = {
|
|
||||||
"InstructPixToPixConditioning": InstructPixToPixConditioning,
|
|
||||||
}
|
|
||||||
|
|||||||
@ -1,4 +1,3 @@
|
|||||||
import io
|
|
||||||
import nodes
|
import nodes
|
||||||
import node_helpers
|
import node_helpers
|
||||||
import torch
|
import torch
|
||||||
@ -8,46 +7,60 @@ import comfy.utils
|
|||||||
import math
|
import math
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import av
|
import av
|
||||||
|
from io import BytesIO
|
||||||
|
from typing_extensions import override
|
||||||
from comfy.ldm.lightricks.symmetric_patchifier import SymmetricPatchifier, latent_to_pixel_coords
|
from comfy.ldm.lightricks.symmetric_patchifier import SymmetricPatchifier, latent_to_pixel_coords
|
||||||
|
from comfy_api.latest import ComfyExtension, io
|
||||||
|
|
||||||
class EmptyLTXVLatentVideo:
|
class EmptyLTXVLatentVideo(io.ComfyNode):
|
||||||
@classmethod
|
@classmethod
|
||||||
def INPUT_TYPES(s):
|
def define_schema(cls):
|
||||||
return {"required": { "width": ("INT", {"default": 768, "min": 64, "max": nodes.MAX_RESOLUTION, "step": 32}),
|
return io.Schema(
|
||||||
"height": ("INT", {"default": 512, "min": 64, "max": nodes.MAX_RESOLUTION, "step": 32}),
|
node_id="EmptyLTXVLatentVideo",
|
||||||
"length": ("INT", {"default": 97, "min": 1, "max": nodes.MAX_RESOLUTION, "step": 8}),
|
category="latent/video/ltxv",
|
||||||
"batch_size": ("INT", {"default": 1, "min": 1, "max": 4096})}}
|
inputs=[
|
||||||
RETURN_TYPES = ("LATENT",)
|
io.Int.Input("width", default=768, min=64, max=nodes.MAX_RESOLUTION, step=32),
|
||||||
FUNCTION = "generate"
|
io.Int.Input("height", default=512, min=64, max=nodes.MAX_RESOLUTION, step=32),
|
||||||
|
io.Int.Input("length", default=97, min=1, max=nodes.MAX_RESOLUTION, step=8),
|
||||||
|
io.Int.Input("batch_size", default=1, min=1, max=4096),
|
||||||
|
],
|
||||||
|
outputs=[
|
||||||
|
io.Latent.Output(),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
CATEGORY = "latent/video/ltxv"
|
@classmethod
|
||||||
|
def execute(cls, width, height, length, batch_size=1) -> io.NodeOutput:
|
||||||
def generate(self, width, height, length, batch_size=1):
|
|
||||||
latent = torch.zeros([batch_size, 128, ((length - 1) // 8) + 1, height // 32, width // 32], device=comfy.model_management.intermediate_device())
|
latent = torch.zeros([batch_size, 128, ((length - 1) // 8) + 1, height // 32, width // 32], device=comfy.model_management.intermediate_device())
|
||||||
return ({"samples": latent}, )
|
return io.NodeOutput({"samples": latent})
|
||||||
|
|
||||||
|
|
||||||
class LTXVImgToVideo:
|
class LTXVImgToVideo(io.ComfyNode):
|
||||||
@classmethod
|
@classmethod
|
||||||
def INPUT_TYPES(s):
|
def define_schema(cls):
|
||||||
return {"required": {"positive": ("CONDITIONING", ),
|
return io.Schema(
|
||||||
"negative": ("CONDITIONING", ),
|
node_id="LTXVImgToVideo",
|
||||||
"vae": ("VAE",),
|
category="conditioning/video_models",
|
||||||
"image": ("IMAGE",),
|
inputs=[
|
||||||
"width": ("INT", {"default": 768, "min": 64, "max": nodes.MAX_RESOLUTION, "step": 32}),
|
io.Conditioning.Input("positive"),
|
||||||
"height": ("INT", {"default": 512, "min": 64, "max": nodes.MAX_RESOLUTION, "step": 32}),
|
io.Conditioning.Input("negative"),
|
||||||
"length": ("INT", {"default": 97, "min": 9, "max": nodes.MAX_RESOLUTION, "step": 8}),
|
io.Vae.Input("vae"),
|
||||||
"batch_size": ("INT", {"default": 1, "min": 1, "max": 4096}),
|
io.Image.Input("image"),
|
||||||
"strength": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0}),
|
io.Int.Input("width", default=768, min=64, max=nodes.MAX_RESOLUTION, step=32),
|
||||||
}}
|
io.Int.Input("height", default=512, min=64, max=nodes.MAX_RESOLUTION, step=32),
|
||||||
|
io.Int.Input("length", default=97, min=9, max=nodes.MAX_RESOLUTION, step=8),
|
||||||
|
io.Int.Input("batch_size", default=1, min=1, max=4096),
|
||||||
|
io.Float.Input("strength", default=1.0, min=0.0, max=1.0),
|
||||||
|
],
|
||||||
|
outputs=[
|
||||||
|
io.Conditioning.Output(display_name="positive"),
|
||||||
|
io.Conditioning.Output(display_name="negative"),
|
||||||
|
io.Latent.Output(display_name="latent"),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
RETURN_TYPES = ("CONDITIONING", "CONDITIONING", "LATENT")
|
@classmethod
|
||||||
RETURN_NAMES = ("positive", "negative", "latent")
|
def execute(cls, positive, negative, image, vae, width, height, length, batch_size, strength) -> io.NodeOutput:
|
||||||
|
|
||||||
CATEGORY = "conditioning/video_models"
|
|
||||||
FUNCTION = "generate"
|
|
||||||
|
|
||||||
def generate(self, positive, negative, image, vae, width, height, length, batch_size, strength):
|
|
||||||
pixels = comfy.utils.common_upscale(image.movedim(-1, 1), width, height, "bilinear", "center").movedim(1, -1)
|
pixels = comfy.utils.common_upscale(image.movedim(-1, 1), width, height, "bilinear", "center").movedim(1, -1)
|
||||||
encode_pixels = pixels[:, :, :, :3]
|
encode_pixels = pixels[:, :, :, :3]
|
||||||
t = vae.encode(encode_pixels)
|
t = vae.encode(encode_pixels)
|
||||||
@ -62,7 +75,7 @@ class LTXVImgToVideo:
|
|||||||
)
|
)
|
||||||
conditioning_latent_frames_mask[:, :, :t.shape[2]] = 1.0 - strength
|
conditioning_latent_frames_mask[:, :, :t.shape[2]] = 1.0 - strength
|
||||||
|
|
||||||
return (positive, negative, {"samples": latent, "noise_mask": conditioning_latent_frames_mask}, )
|
return io.NodeOutput(positive, negative, {"samples": latent, "noise_mask": conditioning_latent_frames_mask})
|
||||||
|
|
||||||
|
|
||||||
def conditioning_get_any_value(conditioning, key, default=None):
|
def conditioning_get_any_value(conditioning, key, default=None):
|
||||||
@ -93,35 +106,46 @@ def get_keyframe_idxs(cond):
|
|||||||
num_keyframes = torch.unique(keyframe_idxs[:, 0]).shape[0]
|
num_keyframes = torch.unique(keyframe_idxs[:, 0]).shape[0]
|
||||||
return keyframe_idxs, num_keyframes
|
return keyframe_idxs, num_keyframes
|
||||||
|
|
||||||
class LTXVAddGuide:
|
class LTXVAddGuide(io.ComfyNode):
|
||||||
|
NUM_PREFIX_FRAMES = 2
|
||||||
|
PATCHIFIER = SymmetricPatchifier(1)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def INPUT_TYPES(s):
|
def define_schema(cls):
|
||||||
return {"required": {"positive": ("CONDITIONING", ),
|
return io.Schema(
|
||||||
"negative": ("CONDITIONING", ),
|
node_id="LTXVAddGuide",
|
||||||
"vae": ("VAE",),
|
category="conditioning/video_models",
|
||||||
"latent": ("LATENT",),
|
inputs=[
|
||||||
"image": ("IMAGE", {"tooltip": "Image or video to condition the latent video on. Must be 8*n + 1 frames."
|
io.Conditioning.Input("positive"),
|
||||||
"If the video is not 8*n + 1 frames, it will be cropped to the nearest 8*n + 1 frames."}),
|
io.Conditioning.Input("negative"),
|
||||||
"frame_idx": ("INT", {"default": 0, "min": -9999, "max": 9999,
|
io.Vae.Input("vae"),
|
||||||
"tooltip": "Frame index to start the conditioning at. For single-frame images or "
|
io.Latent.Input("latent"),
|
||||||
"videos with 1-8 frames, any frame_idx value is acceptable. For videos with 9+ "
|
io.Image.Input(
|
||||||
"frames, frame_idx must be divisible by 8, otherwise it will be rounded down to "
|
"image",
|
||||||
"the nearest multiple of 8. Negative values are counted from the end of the video."}),
|
tooltip="Image or video to condition the latent video on. Must be 8*n + 1 frames. "
|
||||||
"strength": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.01}),
|
"If the video is not 8*n + 1 frames, it will be cropped to the nearest 8*n + 1 frames.",
|
||||||
}
|
),
|
||||||
}
|
io.Int.Input(
|
||||||
|
"frame_idx",
|
||||||
|
default=0,
|
||||||
|
min=-9999,
|
||||||
|
max=9999,
|
||||||
|
tooltip="Frame index to start the conditioning at. "
|
||||||
|
"For single-frame images or videos with 1-8 frames, any frame_idx value is acceptable. "
|
||||||
|
"For videos with 9+ frames, frame_idx must be divisible by 8, otherwise it will be rounded "
|
||||||
|
"down to the nearest multiple of 8. Negative values are counted from the end of the video.",
|
||||||
|
),
|
||||||
|
io.Float.Input("strength", default=1.0, min=0.0, max=1.0, step=0.01),
|
||||||
|
],
|
||||||
|
outputs=[
|
||||||
|
io.Conditioning.Output(display_name="positive"),
|
||||||
|
io.Conditioning.Output(display_name="negative"),
|
||||||
|
io.Latent.Output(display_name="latent"),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
RETURN_TYPES = ("CONDITIONING", "CONDITIONING", "LATENT")
|
@classmethod
|
||||||
RETURN_NAMES = ("positive", "negative", "latent")
|
def encode(cls, vae, latent_width, latent_height, images, scale_factors):
|
||||||
|
|
||||||
CATEGORY = "conditioning/video_models"
|
|
||||||
FUNCTION = "generate"
|
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
self._num_prefix_frames = 2
|
|
||||||
self._patchifier = SymmetricPatchifier(1)
|
|
||||||
|
|
||||||
def encode(self, vae, latent_width, latent_height, images, scale_factors):
|
|
||||||
time_scale_factor, width_scale_factor, height_scale_factor = scale_factors
|
time_scale_factor, width_scale_factor, height_scale_factor = scale_factors
|
||||||
images = images[:(images.shape[0] - 1) // time_scale_factor * time_scale_factor + 1]
|
images = images[:(images.shape[0] - 1) // time_scale_factor * time_scale_factor + 1]
|
||||||
pixels = comfy.utils.common_upscale(images.movedim(-1, 1), latent_width * width_scale_factor, latent_height * height_scale_factor, "bilinear", crop="disabled").movedim(1, -1)
|
pixels = comfy.utils.common_upscale(images.movedim(-1, 1), latent_width * width_scale_factor, latent_height * height_scale_factor, "bilinear", crop="disabled").movedim(1, -1)
|
||||||
@ -129,7 +153,8 @@ class LTXVAddGuide:
|
|||||||
t = vae.encode(encode_pixels)
|
t = vae.encode(encode_pixels)
|
||||||
return encode_pixels, t
|
return encode_pixels, t
|
||||||
|
|
||||||
def get_latent_index(self, cond, latent_length, guide_length, frame_idx, scale_factors):
|
@classmethod
|
||||||
|
def get_latent_index(cls, cond, latent_length, guide_length, frame_idx, scale_factors):
|
||||||
time_scale_factor, _, _ = scale_factors
|
time_scale_factor, _, _ = scale_factors
|
||||||
_, num_keyframes = get_keyframe_idxs(cond)
|
_, num_keyframes = get_keyframe_idxs(cond)
|
||||||
latent_count = latent_length - num_keyframes
|
latent_count = latent_length - num_keyframes
|
||||||
@ -141,9 +166,10 @@ class LTXVAddGuide:
|
|||||||
|
|
||||||
return frame_idx, latent_idx
|
return frame_idx, latent_idx
|
||||||
|
|
||||||
def add_keyframe_index(self, cond, frame_idx, guiding_latent, scale_factors):
|
@classmethod
|
||||||
|
def add_keyframe_index(cls, cond, frame_idx, guiding_latent, scale_factors):
|
||||||
keyframe_idxs, _ = get_keyframe_idxs(cond)
|
keyframe_idxs, _ = get_keyframe_idxs(cond)
|
||||||
_, latent_coords = self._patchifier.patchify(guiding_latent)
|
_, latent_coords = cls.PATCHIFIER.patchify(guiding_latent)
|
||||||
pixel_coords = latent_to_pixel_coords(latent_coords, scale_factors, causal_fix=frame_idx == 0) # we need the causal fix only if we're placing the new latents at index 0
|
pixel_coords = latent_to_pixel_coords(latent_coords, scale_factors, causal_fix=frame_idx == 0) # we need the causal fix only if we're placing the new latents at index 0
|
||||||
pixel_coords[:, 0] += frame_idx
|
pixel_coords[:, 0] += frame_idx
|
||||||
if keyframe_idxs is None:
|
if keyframe_idxs is None:
|
||||||
@ -152,8 +178,9 @@ class LTXVAddGuide:
|
|||||||
keyframe_idxs = torch.cat([keyframe_idxs, pixel_coords], dim=2)
|
keyframe_idxs = torch.cat([keyframe_idxs, pixel_coords], dim=2)
|
||||||
return node_helpers.conditioning_set_values(cond, {"keyframe_idxs": keyframe_idxs})
|
return node_helpers.conditioning_set_values(cond, {"keyframe_idxs": keyframe_idxs})
|
||||||
|
|
||||||
def append_keyframe(self, positive, negative, frame_idx, latent_image, noise_mask, guiding_latent, strength, scale_factors):
|
@classmethod
|
||||||
_, latent_idx = self.get_latent_index(
|
def append_keyframe(cls, positive, negative, frame_idx, latent_image, noise_mask, guiding_latent, strength, scale_factors):
|
||||||
|
_, latent_idx = cls.get_latent_index(
|
||||||
cond=positive,
|
cond=positive,
|
||||||
latent_length=latent_image.shape[2],
|
latent_length=latent_image.shape[2],
|
||||||
guide_length=guiding_latent.shape[2],
|
guide_length=guiding_latent.shape[2],
|
||||||
@ -162,8 +189,8 @@ class LTXVAddGuide:
|
|||||||
)
|
)
|
||||||
noise_mask[:, :, latent_idx:latent_idx + guiding_latent.shape[2]] = 1.0
|
noise_mask[:, :, latent_idx:latent_idx + guiding_latent.shape[2]] = 1.0
|
||||||
|
|
||||||
positive = self.add_keyframe_index(positive, frame_idx, guiding_latent, scale_factors)
|
positive = cls.add_keyframe_index(positive, frame_idx, guiding_latent, scale_factors)
|
||||||
negative = self.add_keyframe_index(negative, frame_idx, guiding_latent, scale_factors)
|
negative = cls.add_keyframe_index(negative, frame_idx, guiding_latent, scale_factors)
|
||||||
|
|
||||||
mask = torch.full(
|
mask = torch.full(
|
||||||
(noise_mask.shape[0], 1, guiding_latent.shape[2], noise_mask.shape[3], noise_mask.shape[4]),
|
(noise_mask.shape[0], 1, guiding_latent.shape[2], noise_mask.shape[3], noise_mask.shape[4]),
|
||||||
@ -176,7 +203,8 @@ class LTXVAddGuide:
|
|||||||
noise_mask = torch.cat([noise_mask, mask], dim=2)
|
noise_mask = torch.cat([noise_mask, mask], dim=2)
|
||||||
return positive, negative, latent_image, noise_mask
|
return positive, negative, latent_image, noise_mask
|
||||||
|
|
||||||
def replace_latent_frames(self, latent_image, noise_mask, guiding_latent, latent_idx, strength):
|
@classmethod
|
||||||
|
def replace_latent_frames(cls, latent_image, noise_mask, guiding_latent, latent_idx, strength):
|
||||||
cond_length = guiding_latent.shape[2]
|
cond_length = guiding_latent.shape[2]
|
||||||
assert latent_image.shape[2] >= latent_idx + cond_length, "Conditioning frames exceed the length of the latent sequence."
|
assert latent_image.shape[2] >= latent_idx + cond_length, "Conditioning frames exceed the length of the latent sequence."
|
||||||
|
|
||||||
@ -195,20 +223,21 @@ class LTXVAddGuide:
|
|||||||
|
|
||||||
return latent_image, noise_mask
|
return latent_image, noise_mask
|
||||||
|
|
||||||
def generate(self, positive, negative, vae, latent, image, frame_idx, strength):
|
@classmethod
|
||||||
|
def execute(cls, positive, negative, vae, latent, image, frame_idx, strength) -> io.NodeOutput:
|
||||||
scale_factors = vae.downscale_index_formula
|
scale_factors = vae.downscale_index_formula
|
||||||
latent_image = latent["samples"]
|
latent_image = latent["samples"]
|
||||||
noise_mask = get_noise_mask(latent)
|
noise_mask = get_noise_mask(latent)
|
||||||
|
|
||||||
_, _, latent_length, latent_height, latent_width = latent_image.shape
|
_, _, latent_length, latent_height, latent_width = latent_image.shape
|
||||||
image, t = self.encode(vae, latent_width, latent_height, image, scale_factors)
|
image, t = cls.encode(vae, latent_width, latent_height, image, scale_factors)
|
||||||
|
|
||||||
frame_idx, latent_idx = self.get_latent_index(positive, latent_length, len(image), frame_idx, scale_factors)
|
frame_idx, latent_idx = cls.get_latent_index(positive, latent_length, len(image), frame_idx, scale_factors)
|
||||||
assert latent_idx + t.shape[2] <= latent_length, "Conditioning frames exceed the length of the latent sequence."
|
assert latent_idx + t.shape[2] <= latent_length, "Conditioning frames exceed the length of the latent sequence."
|
||||||
|
|
||||||
num_prefix_frames = min(self._num_prefix_frames, t.shape[2])
|
num_prefix_frames = min(cls.NUM_PREFIX_FRAMES, t.shape[2])
|
||||||
|
|
||||||
positive, negative, latent_image, noise_mask = self.append_keyframe(
|
positive, negative, latent_image, noise_mask = cls.append_keyframe(
|
||||||
positive,
|
positive,
|
||||||
negative,
|
negative,
|
||||||
frame_idx,
|
frame_idx,
|
||||||
@ -223,9 +252,9 @@ class LTXVAddGuide:
|
|||||||
|
|
||||||
t = t[:, :, num_prefix_frames:]
|
t = t[:, :, num_prefix_frames:]
|
||||||
if t.shape[2] == 0:
|
if t.shape[2] == 0:
|
||||||
return (positive, negative, {"samples": latent_image, "noise_mask": noise_mask},)
|
return io.NodeOutput(positive, negative, {"samples": latent_image, "noise_mask": noise_mask})
|
||||||
|
|
||||||
latent_image, noise_mask = self.replace_latent_frames(
|
latent_image, noise_mask = cls.replace_latent_frames(
|
||||||
latent_image,
|
latent_image,
|
||||||
noise_mask,
|
noise_mask,
|
||||||
t,
|
t,
|
||||||
@ -233,34 +262,35 @@ class LTXVAddGuide:
|
|||||||
strength,
|
strength,
|
||||||
)
|
)
|
||||||
|
|
||||||
return (positive, negative, {"samples": latent_image, "noise_mask": noise_mask},)
|
return io.NodeOutput(positive, negative, {"samples": latent_image, "noise_mask": noise_mask})
|
||||||
|
|
||||||
|
|
||||||
class LTXVCropGuides:
|
class LTXVCropGuides(io.ComfyNode):
|
||||||
@classmethod
|
@classmethod
|
||||||
def INPUT_TYPES(s):
|
def define_schema(cls):
|
||||||
return {"required": {"positive": ("CONDITIONING", ),
|
return io.Schema(
|
||||||
"negative": ("CONDITIONING", ),
|
node_id="LTXVCropGuides",
|
||||||
"latent": ("LATENT",),
|
category="conditioning/video_models",
|
||||||
}
|
inputs=[
|
||||||
}
|
io.Conditioning.Input("positive"),
|
||||||
|
io.Conditioning.Input("negative"),
|
||||||
|
io.Latent.Input("latent"),
|
||||||
|
],
|
||||||
|
outputs=[
|
||||||
|
io.Conditioning.Output(display_name="positive"),
|
||||||
|
io.Conditioning.Output(display_name="negative"),
|
||||||
|
io.Latent.Output(display_name="latent"),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
RETURN_TYPES = ("CONDITIONING", "CONDITIONING", "LATENT")
|
@classmethod
|
||||||
RETURN_NAMES = ("positive", "negative", "latent")
|
def execute(cls, positive, negative, latent) -> io.NodeOutput:
|
||||||
|
|
||||||
CATEGORY = "conditioning/video_models"
|
|
||||||
FUNCTION = "crop"
|
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
self._patchifier = SymmetricPatchifier(1)
|
|
||||||
|
|
||||||
def crop(self, positive, negative, latent):
|
|
||||||
latent_image = latent["samples"].clone()
|
latent_image = latent["samples"].clone()
|
||||||
noise_mask = get_noise_mask(latent)
|
noise_mask = get_noise_mask(latent)
|
||||||
|
|
||||||
_, num_keyframes = get_keyframe_idxs(positive)
|
_, num_keyframes = get_keyframe_idxs(positive)
|
||||||
if num_keyframes == 0:
|
if num_keyframes == 0:
|
||||||
return (positive, negative, {"samples": latent_image, "noise_mask": noise_mask},)
|
return io.NodeOutput(positive, negative, {"samples": latent_image, "noise_mask": noise_mask},)
|
||||||
|
|
||||||
latent_image = latent_image[:, :, :-num_keyframes]
|
latent_image = latent_image[:, :, :-num_keyframes]
|
||||||
noise_mask = noise_mask[:, :, :-num_keyframes]
|
noise_mask = noise_mask[:, :, :-num_keyframes]
|
||||||
@ -268,44 +298,52 @@ class LTXVCropGuides:
|
|||||||
positive = node_helpers.conditioning_set_values(positive, {"keyframe_idxs": None})
|
positive = node_helpers.conditioning_set_values(positive, {"keyframe_idxs": None})
|
||||||
negative = node_helpers.conditioning_set_values(negative, {"keyframe_idxs": None})
|
negative = node_helpers.conditioning_set_values(negative, {"keyframe_idxs": None})
|
||||||
|
|
||||||
return (positive, negative, {"samples": latent_image, "noise_mask": noise_mask},)
|
return io.NodeOutput(positive, negative, {"samples": latent_image, "noise_mask": noise_mask})
|
||||||
|
|
||||||
|
|
||||||
class LTXVConditioning:
|
class LTXVConditioning(io.ComfyNode):
|
||||||
@classmethod
|
@classmethod
|
||||||
def INPUT_TYPES(s):
|
def define_schema(cls):
|
||||||
return {"required": {"positive": ("CONDITIONING", ),
|
return io.Schema(
|
||||||
"negative": ("CONDITIONING", ),
|
node_id="LTXVConditioning",
|
||||||
"frame_rate": ("FLOAT", {"default": 25.0, "min": 0.0, "max": 1000.0, "step": 0.01}),
|
category="conditioning/video_models",
|
||||||
}}
|
inputs=[
|
||||||
RETURN_TYPES = ("CONDITIONING", "CONDITIONING")
|
io.Conditioning.Input("positive"),
|
||||||
RETURN_NAMES = ("positive", "negative")
|
io.Conditioning.Input("negative"),
|
||||||
FUNCTION = "append"
|
io.Float.Input("frame_rate", default=25.0, min=0.0, max=1000.0, step=0.01),
|
||||||
|
],
|
||||||
|
outputs=[
|
||||||
|
io.Conditioning.Output(display_name="positive"),
|
||||||
|
io.Conditioning.Output(display_name="negative"),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
CATEGORY = "conditioning/video_models"
|
@classmethod
|
||||||
|
def execute(cls, positive, negative, frame_rate) -> io.NodeOutput:
|
||||||
def append(self, positive, negative, frame_rate):
|
|
||||||
positive = node_helpers.conditioning_set_values(positive, {"frame_rate": frame_rate})
|
positive = node_helpers.conditioning_set_values(positive, {"frame_rate": frame_rate})
|
||||||
negative = node_helpers.conditioning_set_values(negative, {"frame_rate": frame_rate})
|
negative = node_helpers.conditioning_set_values(negative, {"frame_rate": frame_rate})
|
||||||
return (positive, negative)
|
return io.NodeOutput(positive, negative)
|
||||||
|
|
||||||
|
|
||||||
class ModelSamplingLTXV:
|
class ModelSamplingLTXV(io.ComfyNode):
|
||||||
@classmethod
|
@classmethod
|
||||||
def INPUT_TYPES(s):
|
def define_schema(cls):
|
||||||
return {"required": { "model": ("MODEL",),
|
return io.Schema(
|
||||||
"max_shift": ("FLOAT", {"default": 2.05, "min": 0.0, "max": 100.0, "step":0.01}),
|
node_id="ModelSamplingLTXV",
|
||||||
"base_shift": ("FLOAT", {"default": 0.95, "min": 0.0, "max": 100.0, "step":0.01}),
|
category="advanced/model",
|
||||||
},
|
inputs=[
|
||||||
"optional": {"latent": ("LATENT",), }
|
io.Model.Input("model"),
|
||||||
}
|
io.Float.Input("max_shift", default=2.05, min=0.0, max=100.0, step=0.01),
|
||||||
|
io.Float.Input("base_shift", default=0.95, min=0.0, max=100.0, step=0.01),
|
||||||
|
io.Latent.Input("latent", optional=True),
|
||||||
|
],
|
||||||
|
outputs=[
|
||||||
|
io.Model.Output(),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
RETURN_TYPES = ("MODEL",)
|
@classmethod
|
||||||
FUNCTION = "patch"
|
def execute(cls, model, max_shift, base_shift, latent=None) -> io.NodeOutput:
|
||||||
|
|
||||||
CATEGORY = "advanced/model"
|
|
||||||
|
|
||||||
def patch(self, model, max_shift, base_shift, latent=None):
|
|
||||||
m = model.clone()
|
m = model.clone()
|
||||||
|
|
||||||
if latent is None:
|
if latent is None:
|
||||||
@ -329,37 +367,41 @@ class ModelSamplingLTXV:
|
|||||||
model_sampling.set_parameters(shift=shift)
|
model_sampling.set_parameters(shift=shift)
|
||||||
m.add_object_patch("model_sampling", model_sampling)
|
m.add_object_patch("model_sampling", model_sampling)
|
||||||
|
|
||||||
return (m, )
|
return io.NodeOutput(m)
|
||||||
|
|
||||||
|
|
||||||
class LTXVScheduler:
|
class LTXVScheduler(io.ComfyNode):
|
||||||
@classmethod
|
@classmethod
|
||||||
def INPUT_TYPES(s):
|
def define_schema(cls):
|
||||||
return {"required":
|
return io.Schema(
|
||||||
{"steps": ("INT", {"default": 20, "min": 1, "max": 10000}),
|
node_id="LTXVScheduler",
|
||||||
"max_shift": ("FLOAT", {"default": 2.05, "min": 0.0, "max": 100.0, "step":0.01}),
|
category="sampling/custom_sampling/schedulers",
|
||||||
"base_shift": ("FLOAT", {"default": 0.95, "min": 0.0, "max": 100.0, "step":0.01}),
|
inputs=[
|
||||||
"stretch": ("BOOLEAN", {
|
io.Int.Input("steps", default=20, min=1, max=10000),
|
||||||
"default": True,
|
io.Float.Input("max_shift", default=2.05, min=0.0, max=100.0, step=0.01),
|
||||||
"tooltip": "Stretch the sigmas to be in the range [terminal, 1]."
|
io.Float.Input("base_shift", default=0.95, min=0.0, max=100.0, step=0.01),
|
||||||
}),
|
io.Boolean.Input(
|
||||||
"terminal": (
|
id="stretch",
|
||||||
"FLOAT",
|
default=True,
|
||||||
{
|
tooltip="Stretch the sigmas to be in the range [terminal, 1].",
|
||||||
"default": 0.1, "min": 0.0, "max": 0.99, "step": 0.01,
|
),
|
||||||
"tooltip": "The terminal value of the sigmas after stretching."
|
io.Float.Input(
|
||||||
},
|
id="terminal",
|
||||||
),
|
default=0.1,
|
||||||
},
|
min=0.0,
|
||||||
"optional": {"latent": ("LATENT",), }
|
max=0.99,
|
||||||
}
|
step=0.01,
|
||||||
|
tooltip="The terminal value of the sigmas after stretching.",
|
||||||
|
),
|
||||||
|
io.Latent.Input("latent", optional=True),
|
||||||
|
],
|
||||||
|
outputs=[
|
||||||
|
io.Sigmas.Output(),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
RETURN_TYPES = ("SIGMAS",)
|
@classmethod
|
||||||
CATEGORY = "sampling/custom_sampling/schedulers"
|
def execute(cls, steps, max_shift, base_shift, stretch, terminal, latent=None) -> io.NodeOutput:
|
||||||
|
|
||||||
FUNCTION = "get_sigmas"
|
|
||||||
|
|
||||||
def get_sigmas(self, steps, max_shift, base_shift, stretch, terminal, latent=None):
|
|
||||||
if latent is None:
|
if latent is None:
|
||||||
tokens = 4096
|
tokens = 4096
|
||||||
else:
|
else:
|
||||||
@ -389,7 +431,7 @@ class LTXVScheduler:
|
|||||||
stretched = 1.0 - (one_minus_z / scale_factor)
|
stretched = 1.0 - (one_minus_z / scale_factor)
|
||||||
sigmas[non_zero_mask] = stretched
|
sigmas[non_zero_mask] = stretched
|
||||||
|
|
||||||
return (sigmas,)
|
return io.NodeOutput(sigmas)
|
||||||
|
|
||||||
def encode_single_frame(output_file, image_array: np.ndarray, crf):
|
def encode_single_frame(output_file, image_array: np.ndarray, crf):
|
||||||
container = av.open(output_file, "w", format="mp4")
|
container = av.open(output_file, "w", format="mp4")
|
||||||
@ -423,52 +465,54 @@ def preprocess(image: torch.Tensor, crf=29):
|
|||||||
return image
|
return image
|
||||||
|
|
||||||
image_array = (image[:(image.shape[0] // 2) * 2, :(image.shape[1] // 2) * 2] * 255.0).byte().cpu().numpy()
|
image_array = (image[:(image.shape[0] // 2) * 2, :(image.shape[1] // 2) * 2] * 255.0).byte().cpu().numpy()
|
||||||
with io.BytesIO() as output_file:
|
with BytesIO() as output_file:
|
||||||
encode_single_frame(output_file, image_array, crf)
|
encode_single_frame(output_file, image_array, crf)
|
||||||
video_bytes = output_file.getvalue()
|
video_bytes = output_file.getvalue()
|
||||||
with io.BytesIO(video_bytes) as video_file:
|
with BytesIO(video_bytes) as video_file:
|
||||||
image_array = decode_single_frame(video_file)
|
image_array = decode_single_frame(video_file)
|
||||||
tensor = torch.tensor(image_array, dtype=image.dtype, device=image.device) / 255.0
|
tensor = torch.tensor(image_array, dtype=image.dtype, device=image.device) / 255.0
|
||||||
return tensor
|
return tensor
|
||||||
|
|
||||||
|
|
||||||
class LTXVPreprocess:
|
class LTXVPreprocess(io.ComfyNode):
|
||||||
@classmethod
|
@classmethod
|
||||||
def INPUT_TYPES(s):
|
def define_schema(cls):
|
||||||
return {
|
return io.Schema(
|
||||||
"required": {
|
node_id="LTXVPreprocess",
|
||||||
"image": ("IMAGE",),
|
category="image",
|
||||||
"img_compression": (
|
inputs=[
|
||||||
"INT",
|
io.Image.Input("image"),
|
||||||
{
|
io.Int.Input(
|
||||||
"default": 35,
|
id="img_compression", default=35, min=0, max=100, tooltip="Amount of compression to apply on image."
|
||||||
"min": 0,
|
|
||||||
"max": 100,
|
|
||||||
"tooltip": "Amount of compression to apply on image.",
|
|
||||||
},
|
|
||||||
),
|
),
|
||||||
}
|
],
|
||||||
}
|
outputs=[
|
||||||
|
io.Image.Output(display_name="output_image"),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
FUNCTION = "preprocess"
|
@classmethod
|
||||||
RETURN_TYPES = ("IMAGE",)
|
def execute(cls, image, img_compression) -> io.NodeOutput:
|
||||||
RETURN_NAMES = ("output_image",)
|
|
||||||
CATEGORY = "image"
|
|
||||||
|
|
||||||
def preprocess(self, image, img_compression):
|
|
||||||
output_images = []
|
output_images = []
|
||||||
for i in range(image.shape[0]):
|
for i in range(image.shape[0]):
|
||||||
output_images.append(preprocess(image[i], img_compression))
|
output_images.append(preprocess(image[i], img_compression))
|
||||||
return (torch.stack(output_images),)
|
return io.NodeOutput(torch.stack(output_images))
|
||||||
|
|
||||||
|
|
||||||
NODE_CLASS_MAPPINGS = {
|
class LtxvExtension(ComfyExtension):
|
||||||
"EmptyLTXVLatentVideo": EmptyLTXVLatentVideo,
|
@override
|
||||||
"LTXVImgToVideo": LTXVImgToVideo,
|
async def get_node_list(self) -> list[type[io.ComfyNode]]:
|
||||||
"ModelSamplingLTXV": ModelSamplingLTXV,
|
return [
|
||||||
"LTXVConditioning": LTXVConditioning,
|
EmptyLTXVLatentVideo,
|
||||||
"LTXVScheduler": LTXVScheduler,
|
LTXVImgToVideo,
|
||||||
"LTXVAddGuide": LTXVAddGuide,
|
ModelSamplingLTXV,
|
||||||
"LTXVPreprocess": LTXVPreprocess,
|
LTXVConditioning,
|
||||||
"LTXVCropGuides": LTXVCropGuides,
|
LTXVScheduler,
|
||||||
}
|
LTXVAddGuide,
|
||||||
|
LTXVPreprocess,
|
||||||
|
LTXVCropGuides,
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
async def comfy_entrypoint() -> LtxvExtension:
|
||||||
|
return LtxvExtension()
|
||||||
|
|||||||
@ -1,24 +1,34 @@
|
|||||||
import torch
|
import torch
|
||||||
import comfy.model_management
|
import comfy.model_management
|
||||||
|
from typing_extensions import override
|
||||||
|
from comfy_api.latest import ComfyExtension, io
|
||||||
|
|
||||||
from kornia.morphology import dilation, erosion, opening, closing, gradient, top_hat, bottom_hat
|
from kornia.morphology import dilation, erosion, opening, closing, gradient, top_hat, bottom_hat
|
||||||
import kornia.color
|
import kornia.color
|
||||||
|
|
||||||
|
|
||||||
class Morphology:
|
class Morphology(io.ComfyNode):
|
||||||
@classmethod
|
@classmethod
|
||||||
def INPUT_TYPES(s):
|
def define_schema(cls):
|
||||||
return {"required": {"image": ("IMAGE",),
|
return io.Schema(
|
||||||
"operation": (["erode", "dilate", "open", "close", "gradient", "bottom_hat", "top_hat"],),
|
node_id="Morphology",
|
||||||
"kernel_size": ("INT", {"default": 3, "min": 3, "max": 999, "step": 1}),
|
display_name="ImageMorphology",
|
||||||
}}
|
category="image/postprocessing",
|
||||||
|
inputs=[
|
||||||
|
io.Image.Input("image"),
|
||||||
|
io.Combo.Input(
|
||||||
|
"operation",
|
||||||
|
options=["erode", "dilate", "open", "close", "gradient", "bottom_hat", "top_hat"],
|
||||||
|
),
|
||||||
|
io.Int.Input("kernel_size", default=3, min=3, max=999, step=1),
|
||||||
|
],
|
||||||
|
outputs=[
|
||||||
|
io.Image.Output(),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
RETURN_TYPES = ("IMAGE",)
|
@classmethod
|
||||||
FUNCTION = "process"
|
def execute(cls, image, operation, kernel_size) -> io.NodeOutput:
|
||||||
|
|
||||||
CATEGORY = "image/postprocessing"
|
|
||||||
|
|
||||||
def process(self, image, operation, kernel_size):
|
|
||||||
device = comfy.model_management.get_torch_device()
|
device = comfy.model_management.get_torch_device()
|
||||||
kernel = torch.ones(kernel_size, kernel_size, device=device)
|
kernel = torch.ones(kernel_size, kernel_size, device=device)
|
||||||
image_k = image.to(device).movedim(-1, 1)
|
image_k = image.to(device).movedim(-1, 1)
|
||||||
@ -39,49 +49,63 @@ class Morphology:
|
|||||||
else:
|
else:
|
||||||
raise ValueError(f"Invalid operation {operation} for morphology. Must be one of 'erode', 'dilate', 'open', 'close', 'gradient', 'tophat', 'bottomhat'")
|
raise ValueError(f"Invalid operation {operation} for morphology. Must be one of 'erode', 'dilate', 'open', 'close', 'gradient', 'tophat', 'bottomhat'")
|
||||||
img_out = output.to(comfy.model_management.intermediate_device()).movedim(1, -1)
|
img_out = output.to(comfy.model_management.intermediate_device()).movedim(1, -1)
|
||||||
return (img_out,)
|
return io.NodeOutput(img_out)
|
||||||
|
|
||||||
|
|
||||||
class ImageRGBToYUV:
|
class ImageRGBToYUV(io.ComfyNode):
|
||||||
@classmethod
|
@classmethod
|
||||||
def INPUT_TYPES(s):
|
def define_schema(cls):
|
||||||
return {"required": { "image": ("IMAGE",),
|
return io.Schema(
|
||||||
}}
|
node_id="ImageRGBToYUV",
|
||||||
|
category="image/batch",
|
||||||
|
inputs=[
|
||||||
|
io.Image.Input("image"),
|
||||||
|
],
|
||||||
|
outputs=[
|
||||||
|
io.Image.Output(display_name="Y"),
|
||||||
|
io.Image.Output(display_name="U"),
|
||||||
|
io.Image.Output(display_name="V"),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
RETURN_TYPES = ("IMAGE", "IMAGE", "IMAGE")
|
@classmethod
|
||||||
RETURN_NAMES = ("Y", "U", "V")
|
def execute(cls, image) -> io.NodeOutput:
|
||||||
FUNCTION = "execute"
|
|
||||||
|
|
||||||
CATEGORY = "image/batch"
|
|
||||||
|
|
||||||
def execute(self, image):
|
|
||||||
out = kornia.color.rgb_to_ycbcr(image.movedim(-1, 1)).movedim(1, -1)
|
out = kornia.color.rgb_to_ycbcr(image.movedim(-1, 1)).movedim(1, -1)
|
||||||
return (out[..., 0:1].expand_as(image), out[..., 1:2].expand_as(image), out[..., 2:3].expand_as(image))
|
return io.NodeOutput(out[..., 0:1].expand_as(image), out[..., 1:2].expand_as(image), out[..., 2:3].expand_as(image))
|
||||||
|
|
||||||
class ImageYUVToRGB:
|
class ImageYUVToRGB(io.ComfyNode):
|
||||||
@classmethod
|
@classmethod
|
||||||
def INPUT_TYPES(s):
|
def define_schema(cls):
|
||||||
return {"required": {"Y": ("IMAGE",),
|
return io.Schema(
|
||||||
"U": ("IMAGE",),
|
node_id="ImageYUVToRGB",
|
||||||
"V": ("IMAGE",),
|
category="image/batch",
|
||||||
}}
|
inputs=[
|
||||||
|
io.Image.Input("Y"),
|
||||||
|
io.Image.Input("U"),
|
||||||
|
io.Image.Input("V"),
|
||||||
|
],
|
||||||
|
outputs=[
|
||||||
|
io.Image.Output(),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
RETURN_TYPES = ("IMAGE",)
|
@classmethod
|
||||||
FUNCTION = "execute"
|
def execute(cls, Y, U, V) -> io.NodeOutput:
|
||||||
|
|
||||||
CATEGORY = "image/batch"
|
|
||||||
|
|
||||||
def execute(self, Y, U, V):
|
|
||||||
image = torch.cat([torch.mean(Y, dim=-1, keepdim=True), torch.mean(U, dim=-1, keepdim=True), torch.mean(V, dim=-1, keepdim=True)], dim=-1)
|
image = torch.cat([torch.mean(Y, dim=-1, keepdim=True), torch.mean(U, dim=-1, keepdim=True), torch.mean(V, dim=-1, keepdim=True)], dim=-1)
|
||||||
out = kornia.color.ycbcr_to_rgb(image.movedim(-1, 1)).movedim(1, -1)
|
out = kornia.color.ycbcr_to_rgb(image.movedim(-1, 1)).movedim(1, -1)
|
||||||
return (out,)
|
return io.NodeOutput(out)
|
||||||
|
|
||||||
NODE_CLASS_MAPPINGS = {
|
|
||||||
"Morphology": Morphology,
|
|
||||||
"ImageRGBToYUV": ImageRGBToYUV,
|
|
||||||
"ImageYUVToRGB": ImageYUVToRGB,
|
|
||||||
}
|
|
||||||
|
|
||||||
NODE_DISPLAY_NAME_MAPPINGS = {
|
class MorphologyExtension(ComfyExtension):
|
||||||
"Morphology": "ImageMorphology",
|
@override
|
||||||
}
|
async def get_node_list(self) -> list[type[io.ComfyNode]]:
|
||||||
|
return [
|
||||||
|
Morphology,
|
||||||
|
ImageRGBToYUV,
|
||||||
|
ImageYUVToRGB,
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
async def comfy_entrypoint() -> MorphologyExtension:
|
||||||
|
return MorphologyExtension()
|
||||||
|
|
||||||
|
|||||||
@ -1,9 +1,12 @@
|
|||||||
# from https://github.com/bebebe666/OptimalSteps
|
# from https://github.com/bebebe666/OptimalSteps
|
||||||
|
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
|
from typing_extensions import override
|
||||||
|
from comfy_api.latest import ComfyExtension, io
|
||||||
|
|
||||||
|
|
||||||
def loglinear_interp(t_steps, num_steps):
|
def loglinear_interp(t_steps, num_steps):
|
||||||
"""
|
"""
|
||||||
Performs log-linear interpolation of a given array of decreasing numbers.
|
Performs log-linear interpolation of a given array of decreasing numbers.
|
||||||
@ -23,25 +26,28 @@ NOISE_LEVELS = {"FLUX": [0.9968, 0.9886, 0.9819, 0.975, 0.966, 0.9471, 0.9158, 0
|
|||||||
"Chroma": [0.992, 0.99, 0.988, 0.985, 0.982, 0.978, 0.973, 0.968, 0.961, 0.953, 0.943, 0.931, 0.917, 0.9, 0.881, 0.858, 0.832, 0.802, 0.769, 0.731, 0.69, 0.646, 0.599, 0.55, 0.501, 0.451, 0.402, 0.355, 0.311, 0.27, 0.232, 0.199, 0.169, 0.143, 0.12, 0.101, 0.084, 0.07, 0.058, 0.048, 0.001],
|
"Chroma": [0.992, 0.99, 0.988, 0.985, 0.982, 0.978, 0.973, 0.968, 0.961, 0.953, 0.943, 0.931, 0.917, 0.9, 0.881, 0.858, 0.832, 0.802, 0.769, 0.731, 0.69, 0.646, 0.599, 0.55, 0.501, 0.451, 0.402, 0.355, 0.311, 0.27, 0.232, 0.199, 0.169, 0.143, 0.12, 0.101, 0.084, 0.07, 0.058, 0.048, 0.001],
|
||||||
}
|
}
|
||||||
|
|
||||||
class OptimalStepsScheduler:
|
class OptimalStepsScheduler(io.ComfyNode):
|
||||||
@classmethod
|
@classmethod
|
||||||
def INPUT_TYPES(s):
|
def define_schema(cls):
|
||||||
return {"required":
|
return io.Schema(
|
||||||
{"model_type": (["FLUX", "Wan", "Chroma"], ),
|
node_id="OptimalStepsScheduler",
|
||||||
"steps": ("INT", {"default": 20, "min": 3, "max": 1000}),
|
category="sampling/custom_sampling/schedulers",
|
||||||
"denoise": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.01}),
|
inputs=[
|
||||||
}
|
io.Combo.Input("model_type", options=["FLUX", "Wan", "Chroma"]),
|
||||||
}
|
io.Int.Input("steps", default=20, min=3, max=1000),
|
||||||
RETURN_TYPES = ("SIGMAS",)
|
io.Float.Input("denoise", default=1.0, min=0.0, max=1.0, step=0.01),
|
||||||
CATEGORY = "sampling/custom_sampling/schedulers"
|
],
|
||||||
|
outputs=[
|
||||||
|
io.Sigmas.Output(),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
FUNCTION = "get_sigmas"
|
@classmethod
|
||||||
|
def execute(cls, model_type, steps, denoise) ->io.NodeOutput:
|
||||||
def get_sigmas(self, model_type, steps, denoise):
|
|
||||||
total_steps = steps
|
total_steps = steps
|
||||||
if denoise < 1.0:
|
if denoise < 1.0:
|
||||||
if denoise <= 0.0:
|
if denoise <= 0.0:
|
||||||
return (torch.FloatTensor([]),)
|
return io.NodeOutput(torch.FloatTensor([]))
|
||||||
total_steps = round(steps * denoise)
|
total_steps = round(steps * denoise)
|
||||||
|
|
||||||
sigmas = NOISE_LEVELS[model_type][:]
|
sigmas = NOISE_LEVELS[model_type][:]
|
||||||
@ -50,8 +56,16 @@ class OptimalStepsScheduler:
|
|||||||
|
|
||||||
sigmas = sigmas[-(total_steps + 1):]
|
sigmas = sigmas[-(total_steps + 1):]
|
||||||
sigmas[-1] = 0
|
sigmas[-1] = 0
|
||||||
return (torch.FloatTensor(sigmas), )
|
return io.NodeOutput(torch.FloatTensor(sigmas))
|
||||||
|
|
||||||
NODE_CLASS_MAPPINGS = {
|
|
||||||
"OptimalStepsScheduler": OptimalStepsScheduler,
|
class OptimalStepsExtension(ComfyExtension):
|
||||||
}
|
@override
|
||||||
|
async def get_node_list(self) -> list[type[io.ComfyNode]]:
|
||||||
|
return [
|
||||||
|
OptimalStepsScheduler,
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
async def comfy_entrypoint() -> OptimalStepsExtension:
|
||||||
|
return OptimalStepsExtension()
|
||||||
|
|||||||
@ -3,25 +3,30 @@
|
|||||||
|
|
||||||
#My modified one here is more basic but has less chances of breaking with ComfyUI updates.
|
#My modified one here is more basic but has less chances of breaking with ComfyUI updates.
|
||||||
|
|
||||||
|
from typing_extensions import override
|
||||||
|
|
||||||
import comfy.model_patcher
|
import comfy.model_patcher
|
||||||
import comfy.samplers
|
import comfy.samplers
|
||||||
|
from comfy_api.latest import ComfyExtension, io
|
||||||
|
|
||||||
class PerturbedAttentionGuidance:
|
|
||||||
|
class PerturbedAttentionGuidance(io.ComfyNode):
|
||||||
@classmethod
|
@classmethod
|
||||||
def INPUT_TYPES(s):
|
def define_schema(cls):
|
||||||
return {
|
return io.Schema(
|
||||||
"required": {
|
node_id="PerturbedAttentionGuidance",
|
||||||
"model": ("MODEL",),
|
category="model_patches/unet",
|
||||||
"scale": ("FLOAT", {"default": 3.0, "min": 0.0, "max": 100.0, "step": 0.01, "round": 0.01}),
|
inputs=[
|
||||||
}
|
io.Model.Input("model"),
|
||||||
}
|
io.Float.Input("scale", default=3.0, min=0.0, max=100.0, step=0.01, round=0.01),
|
||||||
|
],
|
||||||
|
outputs=[
|
||||||
|
io.Model.Output(),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
RETURN_TYPES = ("MODEL",)
|
@classmethod
|
||||||
FUNCTION = "patch"
|
def execute(cls, model, scale) -> io.NodeOutput:
|
||||||
|
|
||||||
CATEGORY = "model_patches/unet"
|
|
||||||
|
|
||||||
def patch(self, model, scale):
|
|
||||||
unet_block = "middle"
|
unet_block = "middle"
|
||||||
unet_block_id = 0
|
unet_block_id = 0
|
||||||
m = model.clone()
|
m = model.clone()
|
||||||
@ -49,8 +54,16 @@ class PerturbedAttentionGuidance:
|
|||||||
|
|
||||||
m.set_model_sampler_post_cfg_function(post_cfg_function)
|
m.set_model_sampler_post_cfg_function(post_cfg_function)
|
||||||
|
|
||||||
return (m,)
|
return io.NodeOutput(m)
|
||||||
|
|
||||||
NODE_CLASS_MAPPINGS = {
|
|
||||||
"PerturbedAttentionGuidance": PerturbedAttentionGuidance,
|
class PAGExtension(ComfyExtension):
|
||||||
}
|
@override
|
||||||
|
async def get_node_list(self) -> list[type[io.ComfyNode]]:
|
||||||
|
return [
|
||||||
|
PerturbedAttentionGuidance,
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
async def comfy_entrypoint() -> PAGExtension:
|
||||||
|
return PAGExtension()
|
||||||
|
|||||||
@ -1,6 +1,8 @@
|
|||||||
import torch
|
import torch
|
||||||
import nodes
|
import nodes
|
||||||
import comfy.utils
|
import comfy.utils
|
||||||
|
from typing_extensions import override
|
||||||
|
from comfy_api.latest import ComfyExtension, io
|
||||||
|
|
||||||
def camera_embeddings(elevation, azimuth):
|
def camera_embeddings(elevation, azimuth):
|
||||||
elevation = torch.as_tensor([elevation])
|
elevation = torch.as_tensor([elevation])
|
||||||
@ -20,26 +22,31 @@ def camera_embeddings(elevation, azimuth):
|
|||||||
return embeddings
|
return embeddings
|
||||||
|
|
||||||
|
|
||||||
class StableZero123_Conditioning:
|
class StableZero123_Conditioning(io.ComfyNode):
|
||||||
@classmethod
|
@classmethod
|
||||||
def INPUT_TYPES(s):
|
def define_schema(cls):
|
||||||
return {"required": { "clip_vision": ("CLIP_VISION",),
|
return io.Schema(
|
||||||
"init_image": ("IMAGE",),
|
node_id="StableZero123_Conditioning",
|
||||||
"vae": ("VAE",),
|
category="conditioning/3d_models",
|
||||||
"width": ("INT", {"default": 256, "min": 16, "max": nodes.MAX_RESOLUTION, "step": 8}),
|
inputs=[
|
||||||
"height": ("INT", {"default": 256, "min": 16, "max": nodes.MAX_RESOLUTION, "step": 8}),
|
io.ClipVision.Input("clip_vision"),
|
||||||
"batch_size": ("INT", {"default": 1, "min": 1, "max": 4096}),
|
io.Image.Input("init_image"),
|
||||||
"elevation": ("FLOAT", {"default": 0.0, "min": -180.0, "max": 180.0, "step": 0.1, "round": False}),
|
io.Vae.Input("vae"),
|
||||||
"azimuth": ("FLOAT", {"default": 0.0, "min": -180.0, "max": 180.0, "step": 0.1, "round": False}),
|
io.Int.Input("width", default=256, min=16, max=nodes.MAX_RESOLUTION, step=8),
|
||||||
}}
|
io.Int.Input("height", default=256, min=16, max=nodes.MAX_RESOLUTION, step=8),
|
||||||
RETURN_TYPES = ("CONDITIONING", "CONDITIONING", "LATENT")
|
io.Int.Input("batch_size", default=1, min=1, max=4096),
|
||||||
RETURN_NAMES = ("positive", "negative", "latent")
|
io.Float.Input("elevation", default=0.0, min=-180.0, max=180.0, step=0.1, round=False),
|
||||||
|
io.Float.Input("azimuth", default=0.0, min=-180.0, max=180.0, step=0.1, round=False)
|
||||||
|
],
|
||||||
|
outputs=[
|
||||||
|
io.Conditioning.Output(display_name="positive"),
|
||||||
|
io.Conditioning.Output(display_name="negative"),
|
||||||
|
io.Latent.Output(display_name="latent")
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
FUNCTION = "encode"
|
@classmethod
|
||||||
|
def execute(cls, clip_vision, init_image, vae, width, height, batch_size, elevation, azimuth) -> io.NodeOutput:
|
||||||
CATEGORY = "conditioning/3d_models"
|
|
||||||
|
|
||||||
def encode(self, clip_vision, init_image, vae, width, height, batch_size, elevation, azimuth):
|
|
||||||
output = clip_vision.encode_image(init_image)
|
output = clip_vision.encode_image(init_image)
|
||||||
pooled = output.image_embeds.unsqueeze(0)
|
pooled = output.image_embeds.unsqueeze(0)
|
||||||
pixels = comfy.utils.common_upscale(init_image.movedim(-1,1), width, height, "bilinear", "center").movedim(1,-1)
|
pixels = comfy.utils.common_upscale(init_image.movedim(-1,1), width, height, "bilinear", "center").movedim(1,-1)
|
||||||
@ -51,30 +58,35 @@ class StableZero123_Conditioning:
|
|||||||
positive = [[cond, {"concat_latent_image": t}]]
|
positive = [[cond, {"concat_latent_image": t}]]
|
||||||
negative = [[torch.zeros_like(pooled), {"concat_latent_image": torch.zeros_like(t)}]]
|
negative = [[torch.zeros_like(pooled), {"concat_latent_image": torch.zeros_like(t)}]]
|
||||||
latent = torch.zeros([batch_size, 4, height // 8, width // 8])
|
latent = torch.zeros([batch_size, 4, height // 8, width // 8])
|
||||||
return (positive, negative, {"samples":latent})
|
return io.NodeOutput(positive, negative, {"samples":latent})
|
||||||
|
|
||||||
class StableZero123_Conditioning_Batched:
|
class StableZero123_Conditioning_Batched(io.ComfyNode):
|
||||||
@classmethod
|
@classmethod
|
||||||
def INPUT_TYPES(s):
|
def define_schema(cls):
|
||||||
return {"required": { "clip_vision": ("CLIP_VISION",),
|
return io.Schema(
|
||||||
"init_image": ("IMAGE",),
|
node_id="StableZero123_Conditioning_Batched",
|
||||||
"vae": ("VAE",),
|
category="conditioning/3d_models",
|
||||||
"width": ("INT", {"default": 256, "min": 16, "max": nodes.MAX_RESOLUTION, "step": 8}),
|
inputs=[
|
||||||
"height": ("INT", {"default": 256, "min": 16, "max": nodes.MAX_RESOLUTION, "step": 8}),
|
io.ClipVision.Input("clip_vision"),
|
||||||
"batch_size": ("INT", {"default": 1, "min": 1, "max": 4096}),
|
io.Image.Input("init_image"),
|
||||||
"elevation": ("FLOAT", {"default": 0.0, "min": -180.0, "max": 180.0, "step": 0.1, "round": False}),
|
io.Vae.Input("vae"),
|
||||||
"azimuth": ("FLOAT", {"default": 0.0, "min": -180.0, "max": 180.0, "step": 0.1, "round": False}),
|
io.Int.Input("width", default=256, min=16, max=nodes.MAX_RESOLUTION, step=8),
|
||||||
"elevation_batch_increment": ("FLOAT", {"default": 0.0, "min": -180.0, "max": 180.0, "step": 0.1, "round": False}),
|
io.Int.Input("height", default=256, min=16, max=nodes.MAX_RESOLUTION, step=8),
|
||||||
"azimuth_batch_increment": ("FLOAT", {"default": 0.0, "min": -180.0, "max": 180.0, "step": 0.1, "round": False}),
|
io.Int.Input("batch_size", default=1, min=1, max=4096),
|
||||||
}}
|
io.Float.Input("elevation", default=0.0, min=-180.0, max=180.0, step=0.1, round=False),
|
||||||
RETURN_TYPES = ("CONDITIONING", "CONDITIONING", "LATENT")
|
io.Float.Input("azimuth", default=0.0, min=-180.0, max=180.0, step=0.1, round=False),
|
||||||
RETURN_NAMES = ("positive", "negative", "latent")
|
io.Float.Input("elevation_batch_increment", default=0.0, min=-180.0, max=180.0, step=0.1, round=False),
|
||||||
|
io.Float.Input("azimuth_batch_increment", default=0.0, min=-180.0, max=180.0, step=0.1, round=False)
|
||||||
|
],
|
||||||
|
outputs=[
|
||||||
|
io.Conditioning.Output(display_name="positive"),
|
||||||
|
io.Conditioning.Output(display_name="negative"),
|
||||||
|
io.Latent.Output(display_name="latent")
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
FUNCTION = "encode"
|
@classmethod
|
||||||
|
def execute(cls, clip_vision, init_image, vae, width, height, batch_size, elevation, azimuth, elevation_batch_increment, azimuth_batch_increment) -> io.NodeOutput:
|
||||||
CATEGORY = "conditioning/3d_models"
|
|
||||||
|
|
||||||
def encode(self, clip_vision, init_image, vae, width, height, batch_size, elevation, azimuth, elevation_batch_increment, azimuth_batch_increment):
|
|
||||||
output = clip_vision.encode_image(init_image)
|
output = clip_vision.encode_image(init_image)
|
||||||
pooled = output.image_embeds.unsqueeze(0)
|
pooled = output.image_embeds.unsqueeze(0)
|
||||||
pixels = comfy.utils.common_upscale(init_image.movedim(-1,1), width, height, "bilinear", "center").movedim(1,-1)
|
pixels = comfy.utils.common_upscale(init_image.movedim(-1,1), width, height, "bilinear", "center").movedim(1,-1)
|
||||||
@ -93,27 +105,32 @@ class StableZero123_Conditioning_Batched:
|
|||||||
positive = [[cond, {"concat_latent_image": t}]]
|
positive = [[cond, {"concat_latent_image": t}]]
|
||||||
negative = [[torch.zeros_like(pooled), {"concat_latent_image": torch.zeros_like(t)}]]
|
negative = [[torch.zeros_like(pooled), {"concat_latent_image": torch.zeros_like(t)}]]
|
||||||
latent = torch.zeros([batch_size, 4, height // 8, width // 8])
|
latent = torch.zeros([batch_size, 4, height // 8, width // 8])
|
||||||
return (positive, negative, {"samples":latent, "batch_index": [0] * batch_size})
|
return io.NodeOutput(positive, negative, {"samples":latent, "batch_index": [0] * batch_size})
|
||||||
|
|
||||||
class SV3D_Conditioning:
|
class SV3D_Conditioning(io.ComfyNode):
|
||||||
@classmethod
|
@classmethod
|
||||||
def INPUT_TYPES(s):
|
def define_schema(cls):
|
||||||
return {"required": { "clip_vision": ("CLIP_VISION",),
|
return io.Schema(
|
||||||
"init_image": ("IMAGE",),
|
node_id="SV3D_Conditioning",
|
||||||
"vae": ("VAE",),
|
category="conditioning/3d_models",
|
||||||
"width": ("INT", {"default": 576, "min": 16, "max": nodes.MAX_RESOLUTION, "step": 8}),
|
inputs=[
|
||||||
"height": ("INT", {"default": 576, "min": 16, "max": nodes.MAX_RESOLUTION, "step": 8}),
|
io.ClipVision.Input("clip_vision"),
|
||||||
"video_frames": ("INT", {"default": 21, "min": 1, "max": 4096}),
|
io.Image.Input("init_image"),
|
||||||
"elevation": ("FLOAT", {"default": 0.0, "min": -90.0, "max": 90.0, "step": 0.1, "round": False}),
|
io.Vae.Input("vae"),
|
||||||
}}
|
io.Int.Input("width", default=576, min=16, max=nodes.MAX_RESOLUTION, step=8),
|
||||||
RETURN_TYPES = ("CONDITIONING", "CONDITIONING", "LATENT")
|
io.Int.Input("height", default=576, min=16, max=nodes.MAX_RESOLUTION, step=8),
|
||||||
RETURN_NAMES = ("positive", "negative", "latent")
|
io.Int.Input("video_frames", default=21, min=1, max=4096),
|
||||||
|
io.Float.Input("elevation", default=0.0, min=-90.0, max=90.0, step=0.1, round=False)
|
||||||
|
],
|
||||||
|
outputs=[
|
||||||
|
io.Conditioning.Output(display_name="positive"),
|
||||||
|
io.Conditioning.Output(display_name="negative"),
|
||||||
|
io.Latent.Output(display_name="latent")
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
FUNCTION = "encode"
|
@classmethod
|
||||||
|
def execute(cls, clip_vision, init_image, vae, width, height, video_frames, elevation) -> io.NodeOutput:
|
||||||
CATEGORY = "conditioning/3d_models"
|
|
||||||
|
|
||||||
def encode(self, clip_vision, init_image, vae, width, height, video_frames, elevation):
|
|
||||||
output = clip_vision.encode_image(init_image)
|
output = clip_vision.encode_image(init_image)
|
||||||
pooled = output.image_embeds.unsqueeze(0)
|
pooled = output.image_embeds.unsqueeze(0)
|
||||||
pixels = comfy.utils.common_upscale(init_image.movedim(-1,1), width, height, "bilinear", "center").movedim(1,-1)
|
pixels = comfy.utils.common_upscale(init_image.movedim(-1,1), width, height, "bilinear", "center").movedim(1,-1)
|
||||||
@ -133,11 +150,17 @@ class SV3D_Conditioning:
|
|||||||
positive = [[pooled, {"concat_latent_image": t, "elevation": elevations, "azimuth": azimuths}]]
|
positive = [[pooled, {"concat_latent_image": t, "elevation": elevations, "azimuth": azimuths}]]
|
||||||
negative = [[torch.zeros_like(pooled), {"concat_latent_image": torch.zeros_like(t), "elevation": elevations, "azimuth": azimuths}]]
|
negative = [[torch.zeros_like(pooled), {"concat_latent_image": torch.zeros_like(t), "elevation": elevations, "azimuth": azimuths}]]
|
||||||
latent = torch.zeros([video_frames, 4, height // 8, width // 8])
|
latent = torch.zeros([video_frames, 4, height // 8, width // 8])
|
||||||
return (positive, negative, {"samples":latent})
|
return io.NodeOutput(positive, negative, {"samples":latent})
|
||||||
|
|
||||||
|
|
||||||
NODE_CLASS_MAPPINGS = {
|
class Stable3DExtension(ComfyExtension):
|
||||||
"StableZero123_Conditioning": StableZero123_Conditioning,
|
@override
|
||||||
"StableZero123_Conditioning_Batched": StableZero123_Conditioning_Batched,
|
async def get_node_list(self) -> list[type[io.ComfyNode]]:
|
||||||
"SV3D_Conditioning": SV3D_Conditioning,
|
return [
|
||||||
}
|
StableZero123_Conditioning,
|
||||||
|
StableZero123_Conditioning_Batched,
|
||||||
|
SV3D_Conditioning,
|
||||||
|
]
|
||||||
|
|
||||||
|
async def comfy_entrypoint() -> Stable3DExtension:
|
||||||
|
return Stable3DExtension()
|
||||||
|
|||||||
@ -1,7 +1,9 @@
|
|||||||
#Taken from: https://github.com/dbolya/tomesd
|
#Taken from: https://github.com/dbolya/tomesd
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
from typing import Tuple, Callable
|
from typing import Tuple, Callable, Optional
|
||||||
|
from typing_extensions import override
|
||||||
|
from comfy_api.latest import ComfyExtension, io
|
||||||
import math
|
import math
|
||||||
|
|
||||||
def do_nothing(x: torch.Tensor, mode:str=None):
|
def do_nothing(x: torch.Tensor, mode:str=None):
|
||||||
@ -144,33 +146,45 @@ def get_functions(x, ratio, original_shape):
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
class TomePatchModel:
|
class TomePatchModel(io.ComfyNode):
|
||||||
@classmethod
|
@classmethod
|
||||||
def INPUT_TYPES(s):
|
def define_schema(cls):
|
||||||
return {"required": { "model": ("MODEL",),
|
return io.Schema(
|
||||||
"ratio": ("FLOAT", {"default": 0.3, "min": 0.0, "max": 1.0, "step": 0.01}),
|
node_id="TomePatchModel",
|
||||||
}}
|
category="model_patches/unet",
|
||||||
RETURN_TYPES = ("MODEL",)
|
inputs=[
|
||||||
FUNCTION = "patch"
|
io.Model.Input("model"),
|
||||||
|
io.Float.Input("ratio", default=0.3, min=0.0, max=1.0, step=0.01),
|
||||||
|
],
|
||||||
|
outputs=[io.Model.Output()],
|
||||||
|
)
|
||||||
|
|
||||||
CATEGORY = "model_patches/unet"
|
@classmethod
|
||||||
|
def execute(cls, model, ratio) -> io.NodeOutput:
|
||||||
def patch(self, model, ratio):
|
u: Optional[Callable] = None
|
||||||
self.u = None
|
|
||||||
def tomesd_m(q, k, v, extra_options):
|
def tomesd_m(q, k, v, extra_options):
|
||||||
|
nonlocal u
|
||||||
#NOTE: In the reference code get_functions takes x (input of the transformer block) as the argument instead of q
|
#NOTE: In the reference code get_functions takes x (input of the transformer block) as the argument instead of q
|
||||||
#however from my basic testing it seems that using q instead gives better results
|
#however from my basic testing it seems that using q instead gives better results
|
||||||
m, self.u = get_functions(q, ratio, extra_options["original_shape"])
|
m, u = get_functions(q, ratio, extra_options["original_shape"])
|
||||||
return m(q), k, v
|
return m(q), k, v
|
||||||
def tomesd_u(n, extra_options):
|
def tomesd_u(n, extra_options):
|
||||||
return self.u(n)
|
nonlocal u
|
||||||
|
return u(n)
|
||||||
|
|
||||||
m = model.clone()
|
m = model.clone()
|
||||||
m.set_model_attn1_patch(tomesd_m)
|
m.set_model_attn1_patch(tomesd_m)
|
||||||
m.set_model_attn1_output_patch(tomesd_u)
|
m.set_model_attn1_output_patch(tomesd_u)
|
||||||
return (m, )
|
return io.NodeOutput(m)
|
||||||
|
|
||||||
|
|
||||||
NODE_CLASS_MAPPINGS = {
|
class TomePatchModelExtension(ComfyExtension):
|
||||||
"TomePatchModel": TomePatchModel,
|
@override
|
||||||
}
|
async def get_node_list(self) -> list[type[io.ComfyNode]]:
|
||||||
|
return [
|
||||||
|
TomePatchModel,
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
async def comfy_entrypoint() -> TomePatchModelExtension:
|
||||||
|
return TomePatchModelExtension()
|
||||||
|
|||||||
@ -1,23 +1,39 @@
|
|||||||
|
from typing_extensions import override
|
||||||
|
from comfy_api.latest import ComfyExtension, io
|
||||||
from comfy_api.torch_helpers import set_torch_compile_wrapper
|
from comfy_api.torch_helpers import set_torch_compile_wrapper
|
||||||
|
|
||||||
|
|
||||||
class TorchCompileModel:
|
class TorchCompileModel(io.ComfyNode):
|
||||||
@classmethod
|
@classmethod
|
||||||
def INPUT_TYPES(s):
|
def define_schema(cls) -> io.Schema:
|
||||||
return {"required": { "model": ("MODEL",),
|
return io.Schema(
|
||||||
"backend": (["inductor", "cudagraphs"],),
|
node_id="TorchCompileModel",
|
||||||
}}
|
category="_for_testing",
|
||||||
RETURN_TYPES = ("MODEL",)
|
inputs=[
|
||||||
FUNCTION = "patch"
|
io.Model.Input("model"),
|
||||||
|
io.Combo.Input(
|
||||||
|
"backend",
|
||||||
|
options=["inductor", "cudagraphs"],
|
||||||
|
),
|
||||||
|
],
|
||||||
|
outputs=[io.Model.Output()],
|
||||||
|
is_experimental=True,
|
||||||
|
)
|
||||||
|
|
||||||
CATEGORY = "_for_testing"
|
@classmethod
|
||||||
EXPERIMENTAL = True
|
def execute(cls, model, backend) -> io.NodeOutput:
|
||||||
|
|
||||||
def patch(self, model, backend):
|
|
||||||
m = model.clone()
|
m = model.clone()
|
||||||
set_torch_compile_wrapper(model=m, backend=backend)
|
set_torch_compile_wrapper(model=m, backend=backend)
|
||||||
return (m, )
|
return io.NodeOutput(m)
|
||||||
|
|
||||||
NODE_CLASS_MAPPINGS = {
|
|
||||||
"TorchCompileModel": TorchCompileModel,
|
class TorchCompileExtension(ComfyExtension):
|
||||||
}
|
@override
|
||||||
|
async def get_node_list(self) -> list[type[io.ComfyNode]]:
|
||||||
|
return [
|
||||||
|
TorchCompileModel,
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
async def comfy_entrypoint() -> TorchCompileExtension:
|
||||||
|
return TorchCompileExtension()
|
||||||
|
|||||||
@ -1,3 +1,3 @@
|
|||||||
# This file is automatically generated by the build process when version is
|
# This file is automatically generated by the build process when version is
|
||||||
# updated in pyproject.toml.
|
# updated in pyproject.toml.
|
||||||
__version__ = "0.3.62"
|
__version__ = "0.3.63"
|
||||||
|
|||||||
@ -1,96 +1,70 @@
|
|||||||
class Example:
|
from typing_extensions import override
|
||||||
|
|
||||||
|
from comfy_api.latest import ComfyExtension, io
|
||||||
|
|
||||||
|
|
||||||
|
class Example(io.ComfyNode):
|
||||||
"""
|
"""
|
||||||
A example node
|
An example node
|
||||||
|
|
||||||
Class methods
|
Class methods
|
||||||
-------------
|
-------------
|
||||||
INPUT_TYPES (dict):
|
define_schema (io.Schema):
|
||||||
Tell the main program input parameters of nodes.
|
Tell the main program the metadata, input, output parameters of nodes.
|
||||||
IS_CHANGED:
|
fingerprint_inputs:
|
||||||
optional method to control when the node is re executed.
|
optional method to control when the node is re executed.
|
||||||
|
check_lazy_status:
|
||||||
|
optional method to control list of input names that need to be evaluated.
|
||||||
|
|
||||||
Attributes
|
|
||||||
----------
|
|
||||||
RETURN_TYPES (`tuple`):
|
|
||||||
The type of each element in the output tuple.
|
|
||||||
RETURN_NAMES (`tuple`):
|
|
||||||
Optional: The name of each output in the output tuple.
|
|
||||||
FUNCTION (`str`):
|
|
||||||
The name of the entry-point method. For example, if `FUNCTION = "execute"` then it will run Example().execute()
|
|
||||||
OUTPUT_NODE ([`bool`]):
|
|
||||||
If this node is an output node that outputs a result/image from the graph. The SaveImage node is an example.
|
|
||||||
The backend iterates on these output nodes and tries to execute all their parents if their parent graph is properly connected.
|
|
||||||
Assumed to be False if not present.
|
|
||||||
CATEGORY (`str`):
|
|
||||||
The category the node should appear in the UI.
|
|
||||||
DEPRECATED (`bool`):
|
|
||||||
Indicates whether the node is deprecated. Deprecated nodes are hidden by default in the UI, but remain
|
|
||||||
functional in existing workflows that use them.
|
|
||||||
EXPERIMENTAL (`bool`):
|
|
||||||
Indicates whether the node is experimental. Experimental nodes are marked as such in the UI and may be subject to
|
|
||||||
significant changes or removal in future versions. Use with caution in production workflows.
|
|
||||||
execute(s) -> tuple || None:
|
|
||||||
The entry point method. The name of this method must be the same as the value of property `FUNCTION`.
|
|
||||||
For example, if `FUNCTION = "execute"` then this method's name must be `execute`, if `FUNCTION = "foo"` then it must be `foo`.
|
|
||||||
"""
|
"""
|
||||||
def __init__(self):
|
|
||||||
pass
|
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def INPUT_TYPES(s):
|
def define_schema(cls) -> io.Schema:
|
||||||
"""
|
"""
|
||||||
Return a dictionary which contains config for all input fields.
|
Return a schema which contains all information about the node.
|
||||||
Some types (string): "MODEL", "VAE", "CLIP", "CONDITIONING", "LATENT", "IMAGE", "INT", "STRING", "FLOAT".
|
Some types: "Model", "Vae", "Clip", "Conditioning", "Latent", "Image", "Int", "String", "Float", "Combo".
|
||||||
Input types "INT", "STRING" or "FLOAT" are special values for fields on the node.
|
For outputs the "io.Model.Output" should be used, for inputs the "io.Model.Input" can be used.
|
||||||
The type can be a list for selection.
|
The type can be a "Combo" - this will be a list for selection.
|
||||||
|
|
||||||
Returns: `dict`:
|
|
||||||
- Key input_fields_group (`string`): Can be either required, hidden or optional. A node class must have property `required`
|
|
||||||
- Value input_fields (`dict`): Contains input fields config:
|
|
||||||
* Key field_name (`string`): Name of a entry-point method's argument
|
|
||||||
* Value field_config (`tuple`):
|
|
||||||
+ First value is a string indicate the type of field or a list for selection.
|
|
||||||
+ Second value is a config for type "INT", "STRING" or "FLOAT".
|
|
||||||
"""
|
"""
|
||||||
return {
|
return io.Schema(
|
||||||
"required": {
|
node_id="Example",
|
||||||
"image": ("IMAGE",),
|
display_name="Example Node",
|
||||||
"int_field": ("INT", {
|
category="Example",
|
||||||
"default": 0,
|
inputs=[
|
||||||
"min": 0, #Minimum value
|
io.Image.Input("image"),
|
||||||
"max": 4096, #Maximum value
|
io.Int.Input(
|
||||||
"step": 64, #Slider's step
|
"int_field",
|
||||||
"display": "number", # Cosmetic only: display as "number" or "slider"
|
min=0,
|
||||||
"lazy": True # Will only be evaluated if check_lazy_status requires it
|
max=4096,
|
||||||
}),
|
step=64, # Slider's step
|
||||||
"float_field": ("FLOAT", {
|
display_mode=io.NumberDisplay.number, # Cosmetic only: display as "number" or "slider"
|
||||||
"default": 1.0,
|
lazy=True, # Will only be evaluated if check_lazy_status requires it
|
||||||
"min": 0.0,
|
),
|
||||||
"max": 10.0,
|
io.Float.Input(
|
||||||
"step": 0.01,
|
"float_field",
|
||||||
"round": 0.001, #The value representing the precision to round to, will be set to the step value by default. Can be set to False to disable rounding.
|
default=1.0,
|
||||||
"display": "number",
|
min=0.0,
|
||||||
"lazy": True
|
max=10.0,
|
||||||
}),
|
step=0.01,
|
||||||
"print_to_screen": (["enable", "disable"],),
|
round=0.001, #The value representing the precision to round to, will be set to the step value by default. Can be set to False to disable rounding.
|
||||||
"string_field": ("STRING", {
|
display_mode=io.NumberDisplay.number,
|
||||||
"multiline": False, #True if you want the field to look like the one on the ClipTextEncode node
|
lazy=True,
|
||||||
"default": "Hello World!",
|
),
|
||||||
"lazy": True
|
io.Combo.Input("print_to_screen", options=["enable", "disable"]),
|
||||||
}),
|
io.String.Input(
|
||||||
},
|
"string_field",
|
||||||
}
|
multiline=False, # True if you want the field to look like the one on the ClipTextEncode node
|
||||||
|
default="Hello world!",
|
||||||
|
lazy=True,
|
||||||
|
)
|
||||||
|
],
|
||||||
|
outputs=[
|
||||||
|
io.Image.Output(),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
RETURN_TYPES = ("IMAGE",)
|
@classmethod
|
||||||
#RETURN_NAMES = ("image_output_name",)
|
def check_lazy_status(cls, image, string_field, int_field, float_field, print_to_screen):
|
||||||
|
|
||||||
FUNCTION = "test"
|
|
||||||
|
|
||||||
#OUTPUT_NODE = False
|
|
||||||
|
|
||||||
CATEGORY = "Example"
|
|
||||||
|
|
||||||
def check_lazy_status(self, image, string_field, int_field, float_field, print_to_screen):
|
|
||||||
"""
|
"""
|
||||||
Return a list of input names that need to be evaluated.
|
Return a list of input names that need to be evaluated.
|
||||||
|
|
||||||
@ -107,7 +81,8 @@ class Example:
|
|||||||
else:
|
else:
|
||||||
return []
|
return []
|
||||||
|
|
||||||
def test(self, image, string_field, int_field, float_field, print_to_screen):
|
@classmethod
|
||||||
|
def execute(cls, image, string_field, int_field, float_field, print_to_screen) -> io.NodeOutput:
|
||||||
if print_to_screen == "enable":
|
if print_to_screen == "enable":
|
||||||
print(f"""Your input contains:
|
print(f"""Your input contains:
|
||||||
string_field aka input text: {string_field}
|
string_field aka input text: {string_field}
|
||||||
@ -116,7 +91,7 @@ class Example:
|
|||||||
""")
|
""")
|
||||||
#do some processing on the image, in this example I just invert it
|
#do some processing on the image, in this example I just invert it
|
||||||
image = 1.0 - image
|
image = 1.0 - image
|
||||||
return (image,)
|
return io.NodeOutput(image)
|
||||||
|
|
||||||
"""
|
"""
|
||||||
The node will always be re executed if any of the inputs change but
|
The node will always be re executed if any of the inputs change but
|
||||||
@ -127,7 +102,7 @@ class Example:
|
|||||||
changes between executions the LoadImage node is executed again.
|
changes between executions the LoadImage node is executed again.
|
||||||
"""
|
"""
|
||||||
#@classmethod
|
#@classmethod
|
||||||
#def IS_CHANGED(s, image, string_field, int_field, float_field, print_to_screen):
|
#def fingerprint_inputs(s, image, string_field, int_field, float_field, print_to_screen):
|
||||||
# return ""
|
# return ""
|
||||||
|
|
||||||
# Set the web directory, any .js file in that directory will be loaded by the frontend as a frontend extension
|
# Set the web directory, any .js file in that directory will be loaded by the frontend as a frontend extension
|
||||||
@ -143,13 +118,13 @@ async def get_hello(request):
|
|||||||
return web.json_response("hello")
|
return web.json_response("hello")
|
||||||
|
|
||||||
|
|
||||||
# A dictionary that contains all nodes you want to export with their names
|
class ExampleExtension(ComfyExtension):
|
||||||
# NOTE: names should be globally unique
|
@override
|
||||||
NODE_CLASS_MAPPINGS = {
|
async def get_node_list(self) -> list[type[io.ComfyNode]]:
|
||||||
"Example": Example
|
return [
|
||||||
}
|
Example,
|
||||||
|
]
|
||||||
|
|
||||||
# A dictionary that contains the friendly/humanly readable titles for the nodes
|
|
||||||
NODE_DISPLAY_NAME_MAPPINGS = {
|
async def comfy_entrypoint() -> ExampleExtension: # ComfyUI calls this to load your extension and its nodes.
|
||||||
"Example": "Example Node"
|
return ExampleExtension()
|
||||||
}
|
|
||||||
|
|||||||
1
main.py
1
main.py
@ -115,6 +115,7 @@ if os.name == "nt":
|
|||||||
os.environ['MIMALLOC_PURGE_DELAY'] = '0'
|
os.environ['MIMALLOC_PURGE_DELAY'] = '0'
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
os.environ['TORCH_ROCM_AOTRITON_ENABLE_EXPERIMENTAL'] = '1'
|
||||||
if args.default_device is not None:
|
if args.default_device is not None:
|
||||||
default_dev = args.default_device
|
default_dev = args.default_device
|
||||||
devices = list(range(32))
|
devices = list(range(32))
|
||||||
|
|||||||
1
nodes.py
1
nodes.py
@ -2297,6 +2297,7 @@ async def init_builtin_extra_nodes():
|
|||||||
"nodes_gits.py",
|
"nodes_gits.py",
|
||||||
"nodes_controlnet.py",
|
"nodes_controlnet.py",
|
||||||
"nodes_hunyuan.py",
|
"nodes_hunyuan.py",
|
||||||
|
"nodes_eps.py",
|
||||||
"nodes_flux.py",
|
"nodes_flux.py",
|
||||||
"nodes_lora_extract.py",
|
"nodes_lora_extract.py",
|
||||||
"nodes_torch_compile.py",
|
"nodes_torch_compile.py",
|
||||||
|
|||||||
@ -1,6 +1,6 @@
|
|||||||
[project]
|
[project]
|
||||||
name = "ComfyUI"
|
name = "ComfyUI"
|
||||||
version = "0.3.62"
|
version = "0.3.63"
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
license = { file = "LICENSE" }
|
license = { file = "LICENSE" }
|
||||||
requires-python = ">=3.9"
|
requires-python = ">=3.9"
|
||||||
@ -22,3 +22,49 @@ lint.select = [
|
|||||||
"F",
|
"F",
|
||||||
]
|
]
|
||||||
exclude = ["*.ipynb", "**/generated/*.pyi"]
|
exclude = ["*.ipynb", "**/generated/*.pyi"]
|
||||||
|
|
||||||
|
[tool.pylint]
|
||||||
|
master.py-version = "3.9"
|
||||||
|
master.extension-pkg-allow-list = [
|
||||||
|
"pydantic",
|
||||||
|
]
|
||||||
|
reports.output-format = "colorized"
|
||||||
|
similarities.ignore-imports = "yes"
|
||||||
|
messages_control.disable = [
|
||||||
|
"missing-module-docstring",
|
||||||
|
"missing-class-docstring",
|
||||||
|
"missing-function-docstring",
|
||||||
|
"line-too-long",
|
||||||
|
"too-few-public-methods",
|
||||||
|
"too-many-public-methods",
|
||||||
|
"too-many-instance-attributes",
|
||||||
|
"too-many-positional-arguments",
|
||||||
|
"broad-exception-raised",
|
||||||
|
"too-many-lines",
|
||||||
|
"invalid-name",
|
||||||
|
"unused-argument",
|
||||||
|
"broad-exception-caught",
|
||||||
|
"consider-using-with",
|
||||||
|
"fixme",
|
||||||
|
"too-many-statements",
|
||||||
|
"too-many-branches",
|
||||||
|
"too-many-locals",
|
||||||
|
"too-many-arguments",
|
||||||
|
"duplicate-code",
|
||||||
|
"abstract-method",
|
||||||
|
"superfluous-parens",
|
||||||
|
"arguments-differ",
|
||||||
|
"redefined-builtin",
|
||||||
|
"unnecessary-lambda",
|
||||||
|
"dangerous-default-value",
|
||||||
|
"invalid-overridden-method",
|
||||||
|
# next warnings should be fixed in future
|
||||||
|
"bad-classmethod-argument", # Class method should have 'cls' as first argument
|
||||||
|
"wrong-import-order", # Standard imports should be placed before third party imports
|
||||||
|
"logging-fstring-interpolation", # Use lazy % formatting in logging functions
|
||||||
|
"ungrouped-imports",
|
||||||
|
"unnecessary-pass",
|
||||||
|
"unnecessary-lambda-assignment",
|
||||||
|
"no-else-return",
|
||||||
|
"unused-variable",
|
||||||
|
]
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
comfyui-frontend-package==1.26.13
|
comfyui-frontend-package==1.27.7
|
||||||
comfyui-workflow-templates==0.1.91
|
comfyui-workflow-templates==0.1.93
|
||||||
comfyui-embedded-docs==0.2.6
|
comfyui-embedded-docs==0.2.6
|
||||||
torch
|
torch
|
||||||
torchsde
|
torchsde
|
||||||
@ -25,6 +25,5 @@ av>=14.2.0
|
|||||||
#non essential dependencies:
|
#non essential dependencies:
|
||||||
kornia>=0.7.1
|
kornia>=0.7.1
|
||||||
spandrel
|
spandrel
|
||||||
soundfile
|
|
||||||
pydantic~=2.0
|
pydantic~=2.0
|
||||||
pydantic-settings~=2.0
|
pydantic-settings~=2.0
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user