mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2025-12-19 19:13:02 +08:00
Merge branch 'master' into dr-support-pip-cm
This commit is contained in:
commit
6b20418ad1
@ -23,8 +23,6 @@ class MusicDCAE(torch.nn.Module):
|
|||||||
else:
|
else:
|
||||||
self.source_sample_rate = source_sample_rate
|
self.source_sample_rate = source_sample_rate
|
||||||
|
|
||||||
# self.resampler = torchaudio.transforms.Resample(source_sample_rate, 44100)
|
|
||||||
|
|
||||||
self.transform = transforms.Compose([
|
self.transform = transforms.Compose([
|
||||||
transforms.Normalize(0.5, 0.5),
|
transforms.Normalize(0.5, 0.5),
|
||||||
])
|
])
|
||||||
@ -37,10 +35,6 @@ class MusicDCAE(torch.nn.Module):
|
|||||||
self.scale_factor = 0.1786
|
self.scale_factor = 0.1786
|
||||||
self.shift_factor = -1.9091
|
self.shift_factor = -1.9091
|
||||||
|
|
||||||
def load_audio(self, audio_path):
|
|
||||||
audio, sr = torchaudio.load(audio_path)
|
|
||||||
return audio, sr
|
|
||||||
|
|
||||||
def forward_mel(self, audios):
|
def forward_mel(self, audios):
|
||||||
mels = []
|
mels = []
|
||||||
for i in range(len(audios)):
|
for i in range(len(audios)):
|
||||||
@ -73,10 +67,8 @@ class MusicDCAE(torch.nn.Module):
|
|||||||
latent = self.dcae.encoder(mel.unsqueeze(0))
|
latent = self.dcae.encoder(mel.unsqueeze(0))
|
||||||
latents.append(latent)
|
latents.append(latent)
|
||||||
latents = torch.cat(latents, dim=0)
|
latents = torch.cat(latents, dim=0)
|
||||||
# latent_lengths = (audio_lengths / sr * 44100 / 512 / self.time_dimention_multiple).long()
|
|
||||||
latents = (latents - self.shift_factor) * self.scale_factor
|
latents = (latents - self.shift_factor) * self.scale_factor
|
||||||
return latents
|
return latents
|
||||||
# return latents, latent_lengths
|
|
||||||
|
|
||||||
@torch.no_grad()
|
@torch.no_grad()
|
||||||
def decode(self, latents, audio_lengths=None, sr=None):
|
def decode(self, latents, audio_lengths=None, sr=None):
|
||||||
@ -91,9 +83,7 @@ class MusicDCAE(torch.nn.Module):
|
|||||||
wav = self.vocoder.decode(mels[0]).squeeze(1)
|
wav = self.vocoder.decode(mels[0]).squeeze(1)
|
||||||
|
|
||||||
if sr is not None:
|
if sr is not None:
|
||||||
# resampler = torchaudio.transforms.Resample(44100, sr).to(latents.device).to(latents.dtype)
|
|
||||||
wav = torchaudio.functional.resample(wav, 44100, sr)
|
wav = torchaudio.functional.resample(wav, 44100, sr)
|
||||||
# wav = resampler(wav)
|
|
||||||
else:
|
else:
|
||||||
sr = 44100
|
sr = 44100
|
||||||
pred_wavs.append(wav)
|
pred_wavs.append(wav)
|
||||||
@ -101,7 +91,6 @@ class MusicDCAE(torch.nn.Module):
|
|||||||
if audio_lengths is not None:
|
if audio_lengths is not None:
|
||||||
pred_wavs = [wav[:, :length].cpu() for wav, length in zip(pred_wavs, audio_lengths)]
|
pred_wavs = [wav[:, :length].cpu() for wav, length in zip(pred_wavs, audio_lengths)]
|
||||||
return torch.stack(pred_wavs)
|
return torch.stack(pred_wavs)
|
||||||
# return sr, pred_wavs
|
|
||||||
|
|
||||||
def forward(self, audios, audio_lengths=None, sr=None):
|
def forward(self, audios, audio_lengths=None, sr=None):
|
||||||
latents, latent_lengths = self.encode(audios=audios, audio_lengths=audio_lengths, sr=sr)
|
latents, latent_lengths = self.encode(audios=audios, audio_lengths=audio_lengths, sr=sr)
|
||||||
|
|||||||
@ -365,8 +365,8 @@ def detect_unet_config(state_dict, key_prefix, metadata=None):
|
|||||||
dit_config["patch_size"] = 2
|
dit_config["patch_size"] = 2
|
||||||
dit_config["in_channels"] = 16
|
dit_config["in_channels"] = 16
|
||||||
dit_config["dim"] = 2304
|
dit_config["dim"] = 2304
|
||||||
dit_config["cap_feat_dim"] = 2304
|
dit_config["cap_feat_dim"] = state_dict['{}cap_embedder.1.weight'.format(key_prefix)].shape[1]
|
||||||
dit_config["n_layers"] = 26
|
dit_config["n_layers"] = count_blocks(state_dict_keys, '{}layers.'.format(key_prefix) + '{}.')
|
||||||
dit_config["n_heads"] = 24
|
dit_config["n_heads"] = 24
|
||||||
dit_config["n_kv_heads"] = 8
|
dit_config["n_kv_heads"] = 8
|
||||||
dit_config["qk_norm"] = True
|
dit_config["qk_norm"] = True
|
||||||
|
|||||||
@ -890,6 +890,7 @@ class TEModel(Enum):
|
|||||||
QWEN25_3B = 10
|
QWEN25_3B = 10
|
||||||
QWEN25_7B = 11
|
QWEN25_7B = 11
|
||||||
BYT5_SMALL_GLYPH = 12
|
BYT5_SMALL_GLYPH = 12
|
||||||
|
GEMMA_3_4B = 13
|
||||||
|
|
||||||
def detect_te_model(sd):
|
def detect_te_model(sd):
|
||||||
if "text_model.encoder.layers.30.mlp.fc1.weight" in sd:
|
if "text_model.encoder.layers.30.mlp.fc1.weight" in sd:
|
||||||
@ -912,6 +913,8 @@ def detect_te_model(sd):
|
|||||||
return TEModel.BYT5_SMALL_GLYPH
|
return TEModel.BYT5_SMALL_GLYPH
|
||||||
return TEModel.T5_BASE
|
return TEModel.T5_BASE
|
||||||
if 'model.layers.0.post_feedforward_layernorm.weight' in sd:
|
if 'model.layers.0.post_feedforward_layernorm.weight' in sd:
|
||||||
|
if 'model.layers.0.self_attn.q_norm.weight' in sd:
|
||||||
|
return TEModel.GEMMA_3_4B
|
||||||
return TEModel.GEMMA_2_2B
|
return TEModel.GEMMA_2_2B
|
||||||
if 'model.layers.0.self_attn.k_proj.bias' in sd:
|
if 'model.layers.0.self_attn.k_proj.bias' in sd:
|
||||||
weight = sd['model.layers.0.self_attn.k_proj.bias']
|
weight = sd['model.layers.0.self_attn.k_proj.bias']
|
||||||
@ -1016,6 +1019,10 @@ def load_text_encoder_state_dicts(state_dicts=[], embedding_directory=None, clip
|
|||||||
clip_target.clip = comfy.text_encoders.lumina2.te(**llama_detect(clip_data))
|
clip_target.clip = comfy.text_encoders.lumina2.te(**llama_detect(clip_data))
|
||||||
clip_target.tokenizer = comfy.text_encoders.lumina2.LuminaTokenizer
|
clip_target.tokenizer = comfy.text_encoders.lumina2.LuminaTokenizer
|
||||||
tokenizer_data["spiece_model"] = clip_data[0].get("spiece_model", None)
|
tokenizer_data["spiece_model"] = clip_data[0].get("spiece_model", None)
|
||||||
|
elif te_model == TEModel.GEMMA_3_4B:
|
||||||
|
clip_target.clip = comfy.text_encoders.lumina2.te(**llama_detect(clip_data), model_type="gemma3_4b")
|
||||||
|
clip_target.tokenizer = comfy.text_encoders.lumina2.NTokenizer
|
||||||
|
tokenizer_data["spiece_model"] = clip_data[0].get("spiece_model", None)
|
||||||
elif te_model == TEModel.LLAMA3_8:
|
elif te_model == TEModel.LLAMA3_8:
|
||||||
clip_target.clip = comfy.text_encoders.hidream.hidream_clip(**llama_detect(clip_data),
|
clip_target.clip = comfy.text_encoders.hidream.hidream_clip(**llama_detect(clip_data),
|
||||||
clip_l=False, clip_g=False, t5=False, llama=True, dtype_t5=None, t5xxl_scaled_fp8=None)
|
clip_l=False, clip_g=False, t5=False, llama=True, dtype_t5=None, t5xxl_scaled_fp8=None)
|
||||||
|
|||||||
@ -3,6 +3,7 @@ import torch.nn as nn
|
|||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from typing import Optional, Any
|
from typing import Optional, Any
|
||||||
import math
|
import math
|
||||||
|
import logging
|
||||||
|
|
||||||
from comfy.ldm.modules.attention import optimized_attention_for_device
|
from comfy.ldm.modules.attention import optimized_attention_for_device
|
||||||
import comfy.model_management
|
import comfy.model_management
|
||||||
@ -28,6 +29,9 @@ class Llama2Config:
|
|||||||
mlp_activation = "silu"
|
mlp_activation = "silu"
|
||||||
qkv_bias = False
|
qkv_bias = False
|
||||||
rope_dims = None
|
rope_dims = None
|
||||||
|
q_norm = None
|
||||||
|
k_norm = None
|
||||||
|
rope_scale = None
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class Qwen25_3BConfig:
|
class Qwen25_3BConfig:
|
||||||
@ -46,6 +50,9 @@ class Qwen25_3BConfig:
|
|||||||
mlp_activation = "silu"
|
mlp_activation = "silu"
|
||||||
qkv_bias = True
|
qkv_bias = True
|
||||||
rope_dims = None
|
rope_dims = None
|
||||||
|
q_norm = None
|
||||||
|
k_norm = None
|
||||||
|
rope_scale = None
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class Qwen25_7BVLI_Config:
|
class Qwen25_7BVLI_Config:
|
||||||
@ -64,6 +71,9 @@ class Qwen25_7BVLI_Config:
|
|||||||
mlp_activation = "silu"
|
mlp_activation = "silu"
|
||||||
qkv_bias = True
|
qkv_bias = True
|
||||||
rope_dims = [16, 24, 24]
|
rope_dims = [16, 24, 24]
|
||||||
|
q_norm = None
|
||||||
|
k_norm = None
|
||||||
|
rope_scale = None
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class Gemma2_2B_Config:
|
class Gemma2_2B_Config:
|
||||||
@ -82,6 +92,32 @@ class Gemma2_2B_Config:
|
|||||||
mlp_activation = "gelu_pytorch_tanh"
|
mlp_activation = "gelu_pytorch_tanh"
|
||||||
qkv_bias = False
|
qkv_bias = False
|
||||||
rope_dims = None
|
rope_dims = None
|
||||||
|
q_norm = None
|
||||||
|
k_norm = None
|
||||||
|
sliding_attention = None
|
||||||
|
rope_scale = None
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class Gemma3_4B_Config:
|
||||||
|
vocab_size: int = 262208
|
||||||
|
hidden_size: int = 2560
|
||||||
|
intermediate_size: int = 10240
|
||||||
|
num_hidden_layers: int = 34
|
||||||
|
num_attention_heads: int = 8
|
||||||
|
num_key_value_heads: int = 4
|
||||||
|
max_position_embeddings: int = 131072
|
||||||
|
rms_norm_eps: float = 1e-6
|
||||||
|
rope_theta = [10000.0, 1000000.0]
|
||||||
|
transformer_type: str = "gemma3"
|
||||||
|
head_dim = 256
|
||||||
|
rms_norm_add = True
|
||||||
|
mlp_activation = "gelu_pytorch_tanh"
|
||||||
|
qkv_bias = False
|
||||||
|
rope_dims = None
|
||||||
|
q_norm = "gemma3"
|
||||||
|
k_norm = "gemma3"
|
||||||
|
sliding_attention = [False, False, False, False, False, 1024]
|
||||||
|
rope_scale = [1.0, 8.0]
|
||||||
|
|
||||||
class RMSNorm(nn.Module):
|
class RMSNorm(nn.Module):
|
||||||
def __init__(self, dim: int, eps: float = 1e-5, add=False, device=None, dtype=None):
|
def __init__(self, dim: int, eps: float = 1e-5, add=False, device=None, dtype=None):
|
||||||
@ -106,9 +142,20 @@ def rotate_half(x):
|
|||||||
return torch.cat((-x2, x1), dim=-1)
|
return torch.cat((-x2, x1), dim=-1)
|
||||||
|
|
||||||
|
|
||||||
def precompute_freqs_cis(head_dim, position_ids, theta, rope_dims=None, device=None):
|
def precompute_freqs_cis(head_dim, position_ids, theta, rope_scale=None, rope_dims=None, device=None):
|
||||||
|
if not isinstance(theta, list):
|
||||||
|
theta = [theta]
|
||||||
|
|
||||||
|
out = []
|
||||||
|
for index, t in enumerate(theta):
|
||||||
theta_numerator = torch.arange(0, head_dim, 2, device=device).float()
|
theta_numerator = torch.arange(0, head_dim, 2, device=device).float()
|
||||||
inv_freq = 1.0 / (theta ** (theta_numerator / head_dim))
|
inv_freq = 1.0 / (t ** (theta_numerator / head_dim))
|
||||||
|
|
||||||
|
if rope_scale is not None:
|
||||||
|
if isinstance(rope_scale, list):
|
||||||
|
inv_freq /= rope_scale[index]
|
||||||
|
else:
|
||||||
|
inv_freq /= rope_scale
|
||||||
|
|
||||||
inv_freq_expanded = inv_freq[None, :, None].float().expand(position_ids.shape[0], -1, 1)
|
inv_freq_expanded = inv_freq[None, :, None].float().expand(position_ids.shape[0], -1, 1)
|
||||||
position_ids_expanded = position_ids[:, None, :].float()
|
position_ids_expanded = position_ids[:, None, :].float()
|
||||||
@ -123,8 +170,12 @@ def precompute_freqs_cis(head_dim, position_ids, theta, rope_dims=None, device=N
|
|||||||
else:
|
else:
|
||||||
cos = cos.unsqueeze(1)
|
cos = cos.unsqueeze(1)
|
||||||
sin = sin.unsqueeze(1)
|
sin = sin.unsqueeze(1)
|
||||||
|
out.append((cos, sin))
|
||||||
|
|
||||||
return (cos, sin)
|
if len(out) == 1:
|
||||||
|
return out[0]
|
||||||
|
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
def apply_rope(xq, xk, freqs_cis):
|
def apply_rope(xq, xk, freqs_cis):
|
||||||
@ -152,6 +203,14 @@ class Attention(nn.Module):
|
|||||||
self.v_proj = ops.Linear(config.hidden_size, self.num_kv_heads * self.head_dim, bias=config.qkv_bias, device=device, dtype=dtype)
|
self.v_proj = ops.Linear(config.hidden_size, self.num_kv_heads * self.head_dim, bias=config.qkv_bias, device=device, dtype=dtype)
|
||||||
self.o_proj = ops.Linear(self.inner_size, config.hidden_size, bias=False, device=device, dtype=dtype)
|
self.o_proj = ops.Linear(self.inner_size, config.hidden_size, bias=False, device=device, dtype=dtype)
|
||||||
|
|
||||||
|
self.q_norm = None
|
||||||
|
self.k_norm = None
|
||||||
|
|
||||||
|
if config.q_norm == "gemma3":
|
||||||
|
self.q_norm = RMSNorm(self.head_dim, eps=config.rms_norm_eps, add=config.rms_norm_add, device=device, dtype=dtype)
|
||||||
|
if config.k_norm == "gemma3":
|
||||||
|
self.k_norm = RMSNorm(self.head_dim, eps=config.rms_norm_eps, add=config.rms_norm_add, device=device, dtype=dtype)
|
||||||
|
|
||||||
def forward(
|
def forward(
|
||||||
self,
|
self,
|
||||||
hidden_states: torch.Tensor,
|
hidden_states: torch.Tensor,
|
||||||
@ -168,6 +227,11 @@ class Attention(nn.Module):
|
|||||||
xk = xk.view(batch_size, seq_length, self.num_kv_heads, self.head_dim).transpose(1, 2)
|
xk = xk.view(batch_size, seq_length, self.num_kv_heads, self.head_dim).transpose(1, 2)
|
||||||
xv = xv.view(batch_size, seq_length, self.num_kv_heads, self.head_dim).transpose(1, 2)
|
xv = xv.view(batch_size, seq_length, self.num_kv_heads, self.head_dim).transpose(1, 2)
|
||||||
|
|
||||||
|
if self.q_norm is not None:
|
||||||
|
xq = self.q_norm(xq)
|
||||||
|
if self.k_norm is not None:
|
||||||
|
xk = self.k_norm(xk)
|
||||||
|
|
||||||
xq, xk = apply_rope(xq, xk, freqs_cis=freqs_cis)
|
xq, xk = apply_rope(xq, xk, freqs_cis=freqs_cis)
|
||||||
|
|
||||||
xk = xk.repeat_interleave(self.num_heads // self.num_kv_heads, dim=1)
|
xk = xk.repeat_interleave(self.num_heads // self.num_kv_heads, dim=1)
|
||||||
@ -192,7 +256,7 @@ class MLP(nn.Module):
|
|||||||
return self.down_proj(self.activation(self.gate_proj(x)) * self.up_proj(x))
|
return self.down_proj(self.activation(self.gate_proj(x)) * self.up_proj(x))
|
||||||
|
|
||||||
class TransformerBlock(nn.Module):
|
class TransformerBlock(nn.Module):
|
||||||
def __init__(self, config: Llama2Config, device=None, dtype=None, ops: Any = None):
|
def __init__(self, config: Llama2Config, index, device=None, dtype=None, ops: Any = None):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.self_attn = Attention(config, device=device, dtype=dtype, ops=ops)
|
self.self_attn = Attention(config, device=device, dtype=dtype, ops=ops)
|
||||||
self.mlp = MLP(config, device=device, dtype=dtype, ops=ops)
|
self.mlp = MLP(config, device=device, dtype=dtype, ops=ops)
|
||||||
@ -226,7 +290,7 @@ class TransformerBlock(nn.Module):
|
|||||||
return x
|
return x
|
||||||
|
|
||||||
class TransformerBlockGemma2(nn.Module):
|
class TransformerBlockGemma2(nn.Module):
|
||||||
def __init__(self, config: Llama2Config, device=None, dtype=None, ops: Any = None):
|
def __init__(self, config: Llama2Config, index, device=None, dtype=None, ops: Any = None):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.self_attn = Attention(config, device=device, dtype=dtype, ops=ops)
|
self.self_attn = Attention(config, device=device, dtype=dtype, ops=ops)
|
||||||
self.mlp = MLP(config, device=device, dtype=dtype, ops=ops)
|
self.mlp = MLP(config, device=device, dtype=dtype, ops=ops)
|
||||||
@ -235,6 +299,13 @@ class TransformerBlockGemma2(nn.Module):
|
|||||||
self.pre_feedforward_layernorm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps, add=config.rms_norm_add, device=device, dtype=dtype)
|
self.pre_feedforward_layernorm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps, add=config.rms_norm_add, device=device, dtype=dtype)
|
||||||
self.post_feedforward_layernorm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps, add=config.rms_norm_add, device=device, dtype=dtype)
|
self.post_feedforward_layernorm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps, add=config.rms_norm_add, device=device, dtype=dtype)
|
||||||
|
|
||||||
|
if config.sliding_attention is not None: # TODO: implement. (Not that necessary since models are trained on less than 1024 tokens)
|
||||||
|
self.sliding_attention = config.sliding_attention[index % len(config.sliding_attention)]
|
||||||
|
else:
|
||||||
|
self.sliding_attention = False
|
||||||
|
|
||||||
|
self.transformer_type = config.transformer_type
|
||||||
|
|
||||||
def forward(
|
def forward(
|
||||||
self,
|
self,
|
||||||
x: torch.Tensor,
|
x: torch.Tensor,
|
||||||
@ -242,6 +313,14 @@ class TransformerBlockGemma2(nn.Module):
|
|||||||
freqs_cis: Optional[torch.Tensor] = None,
|
freqs_cis: Optional[torch.Tensor] = None,
|
||||||
optimized_attention=None,
|
optimized_attention=None,
|
||||||
):
|
):
|
||||||
|
if self.transformer_type == 'gemma3':
|
||||||
|
if self.sliding_attention:
|
||||||
|
if x.shape[1] > self.sliding_attention:
|
||||||
|
logging.warning("Warning: sliding attention not implemented, results may be incorrect")
|
||||||
|
freqs_cis = freqs_cis[1]
|
||||||
|
else:
|
||||||
|
freqs_cis = freqs_cis[0]
|
||||||
|
|
||||||
# Self Attention
|
# Self Attention
|
||||||
residual = x
|
residual = x
|
||||||
x = self.input_layernorm(x)
|
x = self.input_layernorm(x)
|
||||||
@ -276,7 +355,7 @@ class Llama2_(nn.Module):
|
|||||||
device=device,
|
device=device,
|
||||||
dtype=dtype
|
dtype=dtype
|
||||||
)
|
)
|
||||||
if self.config.transformer_type == "gemma2":
|
if self.config.transformer_type == "gemma2" or self.config.transformer_type == "gemma3":
|
||||||
transformer = TransformerBlockGemma2
|
transformer = TransformerBlockGemma2
|
||||||
self.normalize_in = True
|
self.normalize_in = True
|
||||||
else:
|
else:
|
||||||
@ -284,8 +363,8 @@ class Llama2_(nn.Module):
|
|||||||
self.normalize_in = False
|
self.normalize_in = False
|
||||||
|
|
||||||
self.layers = nn.ModuleList([
|
self.layers = nn.ModuleList([
|
||||||
transformer(config, device=device, dtype=dtype, ops=ops)
|
transformer(config, index=i, device=device, dtype=dtype, ops=ops)
|
||||||
for _ in range(config.num_hidden_layers)
|
for i in range(config.num_hidden_layers)
|
||||||
])
|
])
|
||||||
self.norm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps, add=config.rms_norm_add, device=device, dtype=dtype)
|
self.norm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps, add=config.rms_norm_add, device=device, dtype=dtype)
|
||||||
# self.lm_head = ops.Linear(config.hidden_size, config.vocab_size, bias=False, device=device, dtype=dtype)
|
# self.lm_head = ops.Linear(config.hidden_size, config.vocab_size, bias=False, device=device, dtype=dtype)
|
||||||
@ -305,6 +384,7 @@ class Llama2_(nn.Module):
|
|||||||
freqs_cis = precompute_freqs_cis(self.config.head_dim,
|
freqs_cis = precompute_freqs_cis(self.config.head_dim,
|
||||||
position_ids,
|
position_ids,
|
||||||
self.config.rope_theta,
|
self.config.rope_theta,
|
||||||
|
self.config.rope_scale,
|
||||||
self.config.rope_dims,
|
self.config.rope_dims,
|
||||||
device=x.device)
|
device=x.device)
|
||||||
|
|
||||||
@ -433,3 +513,12 @@ class Gemma2_2B(BaseLlama, torch.nn.Module):
|
|||||||
|
|
||||||
self.model = Llama2_(config, device=device, dtype=dtype, ops=operations)
|
self.model = Llama2_(config, device=device, dtype=dtype, ops=operations)
|
||||||
self.dtype = dtype
|
self.dtype = dtype
|
||||||
|
|
||||||
|
class Gemma3_4B(BaseLlama, torch.nn.Module):
|
||||||
|
def __init__(self, config_dict, dtype, device, operations):
|
||||||
|
super().__init__()
|
||||||
|
config = Gemma3_4B_Config(**config_dict)
|
||||||
|
self.num_layers = config.num_hidden_layers
|
||||||
|
|
||||||
|
self.model = Llama2_(config, device=device, dtype=dtype, ops=operations)
|
||||||
|
self.dtype = dtype
|
||||||
|
|||||||
@ -11,23 +11,41 @@ class Gemma2BTokenizer(sd1_clip.SDTokenizer):
|
|||||||
def state_dict(self):
|
def state_dict(self):
|
||||||
return {"spiece_model": self.tokenizer.serialize_model()}
|
return {"spiece_model": self.tokenizer.serialize_model()}
|
||||||
|
|
||||||
|
class Gemma3_4BTokenizer(sd1_clip.SDTokenizer):
|
||||||
|
def __init__(self, embedding_directory=None, tokenizer_data={}):
|
||||||
|
tokenizer = tokenizer_data.get("spiece_model", None)
|
||||||
|
super().__init__(tokenizer, pad_with_end=False, embedding_size=2560, embedding_key='gemma3_4b', tokenizer_class=SPieceTokenizer, has_end_token=False, pad_to_max_length=False, max_length=99999999, min_length=1, tokenizer_args={"add_bos": True, "add_eos": False}, tokenizer_data=tokenizer_data)
|
||||||
|
|
||||||
|
def state_dict(self):
|
||||||
|
return {"spiece_model": self.tokenizer.serialize_model()}
|
||||||
|
|
||||||
class LuminaTokenizer(sd1_clip.SD1Tokenizer):
|
class LuminaTokenizer(sd1_clip.SD1Tokenizer):
|
||||||
def __init__(self, embedding_directory=None, tokenizer_data={}):
|
def __init__(self, embedding_directory=None, tokenizer_data={}):
|
||||||
super().__init__(embedding_directory=embedding_directory, tokenizer_data=tokenizer_data, name="gemma2_2b", tokenizer=Gemma2BTokenizer)
|
super().__init__(embedding_directory=embedding_directory, tokenizer_data=tokenizer_data, name="gemma2_2b", tokenizer=Gemma2BTokenizer)
|
||||||
|
|
||||||
|
class NTokenizer(sd1_clip.SD1Tokenizer):
|
||||||
|
def __init__(self, embedding_directory=None, tokenizer_data={}):
|
||||||
|
super().__init__(embedding_directory=embedding_directory, tokenizer_data=tokenizer_data, name="gemma3_4b", tokenizer=Gemma3_4BTokenizer)
|
||||||
|
|
||||||
class Gemma2_2BModel(sd1_clip.SDClipModel):
|
class Gemma2_2BModel(sd1_clip.SDClipModel):
|
||||||
def __init__(self, device="cpu", layer="hidden", layer_idx=-2, dtype=None, attention_mask=True, model_options={}):
|
def __init__(self, device="cpu", layer="hidden", layer_idx=-2, dtype=None, attention_mask=True, model_options={}):
|
||||||
super().__init__(device=device, layer=layer, layer_idx=layer_idx, textmodel_json_config={}, dtype=dtype, special_tokens={"start": 2, "pad": 0}, layer_norm_hidden_state=False, model_class=comfy.text_encoders.llama.Gemma2_2B, enable_attention_masks=attention_mask, return_attention_masks=attention_mask, model_options=model_options)
|
super().__init__(device=device, layer=layer, layer_idx=layer_idx, textmodel_json_config={}, dtype=dtype, special_tokens={"start": 2, "pad": 0}, layer_norm_hidden_state=False, model_class=comfy.text_encoders.llama.Gemma2_2B, enable_attention_masks=attention_mask, return_attention_masks=attention_mask, model_options=model_options)
|
||||||
|
|
||||||
|
class Gemma3_4BModel(sd1_clip.SDClipModel):
|
||||||
|
def __init__(self, device="cpu", layer="hidden", layer_idx=-2, dtype=None, attention_mask=True, model_options={}):
|
||||||
|
super().__init__(device=device, layer=layer, layer_idx=layer_idx, textmodel_json_config={}, dtype=dtype, special_tokens={"start": 2, "pad": 0}, layer_norm_hidden_state=False, model_class=comfy.text_encoders.llama.Gemma3_4B, enable_attention_masks=attention_mask, return_attention_masks=attention_mask, model_options=model_options)
|
||||||
|
|
||||||
class LuminaModel(sd1_clip.SD1ClipModel):
|
class LuminaModel(sd1_clip.SD1ClipModel):
|
||||||
def __init__(self, device="cpu", dtype=None, model_options={}):
|
def __init__(self, device="cpu", dtype=None, model_options={}, name="gemma2_2b", clip_model=Gemma2_2BModel):
|
||||||
super().__init__(device=device, dtype=dtype, name="gemma2_2b", clip_model=Gemma2_2BModel, model_options=model_options)
|
super().__init__(device=device, dtype=dtype, name=name, clip_model=clip_model, model_options=model_options)
|
||||||
|
|
||||||
|
|
||||||
def te(dtype_llama=None, llama_scaled_fp8=None):
|
def te(dtype_llama=None, llama_scaled_fp8=None, model_type="gemma2_2b"):
|
||||||
|
if model_type == "gemma2_2b":
|
||||||
|
model = Gemma2_2BModel
|
||||||
|
elif model_type == "gemma3_4b":
|
||||||
|
model = Gemma3_4BModel
|
||||||
|
|
||||||
class LuminaTEModel_(LuminaModel):
|
class LuminaTEModel_(LuminaModel):
|
||||||
def __init__(self, device="cpu", dtype=None, model_options={}):
|
def __init__(self, device="cpu", dtype=None, model_options={}):
|
||||||
if llama_scaled_fp8 is not None and "scaled_fp8" not in model_options:
|
if llama_scaled_fp8 is not None and "scaled_fp8" not in model_options:
|
||||||
@ -35,5 +53,5 @@ def te(dtype_llama=None, llama_scaled_fp8=None):
|
|||||||
model_options["scaled_fp8"] = llama_scaled_fp8
|
model_options["scaled_fp8"] = llama_scaled_fp8
|
||||||
if dtype_llama is not None:
|
if dtype_llama is not None:
|
||||||
dtype = dtype_llama
|
dtype = dtype_llama
|
||||||
super().__init__(device=device, dtype=dtype, model_options=model_options)
|
super().__init__(device=device, dtype=dtype, name=model_type, model_options=model_options, clip_model=model)
|
||||||
return LuminaTEModel_
|
return LuminaTEModel_
|
||||||
|
|||||||
@ -152,7 +152,7 @@ def validate_aspect_ratio(
|
|||||||
raise TypeError(
|
raise TypeError(
|
||||||
f"Aspect ratio cannot reduce to any less than {minimum_ratio_str} ({minimum_ratio}), but was {aspect_ratio} ({calculated_ratio})."
|
f"Aspect ratio cannot reduce to any less than {minimum_ratio_str} ({minimum_ratio}), but was {aspect_ratio} ({calculated_ratio})."
|
||||||
)
|
)
|
||||||
elif calculated_ratio > maximum_ratio:
|
if calculated_ratio > maximum_ratio:
|
||||||
raise TypeError(
|
raise TypeError(
|
||||||
f"Aspect ratio cannot reduce to any greater than {maximum_ratio_str} ({maximum_ratio}), but was {aspect_ratio} ({calculated_ratio})."
|
f"Aspect ratio cannot reduce to any greater than {maximum_ratio_str} ({maximum_ratio}), but was {aspect_ratio} ({calculated_ratio})."
|
||||||
)
|
)
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@ -473,7 +473,7 @@ class MoonvalleyImg2VideoNode(comfy_io.ComfyNode):
|
|||||||
height=width_height["height"],
|
height=width_height["height"],
|
||||||
use_negative_prompts=True,
|
use_negative_prompts=True,
|
||||||
)
|
)
|
||||||
"""Upload image to comfy backend to have a URL available for further processing"""
|
|
||||||
# Get MIME type from tensor - assuming PNG format for image tensors
|
# Get MIME type from tensor - assuming PNG format for image tensors
|
||||||
mime_type = "image/png"
|
mime_type = "image/png"
|
||||||
|
|
||||||
@ -591,7 +591,6 @@ class MoonvalleyVideo2VideoNode(comfy_io.ComfyNode):
|
|||||||
validated_video = validate_video_to_video_input(video)
|
validated_video = validate_video_to_video_input(video)
|
||||||
video_url = await upload_video_to_comfyapi(validated_video, auth_kwargs=auth)
|
video_url = await upload_video_to_comfyapi(validated_video, auth_kwargs=auth)
|
||||||
|
|
||||||
"""Validate prompts and inference input"""
|
|
||||||
validate_prompts(prompt, negative_prompt)
|
validate_prompts(prompt, negative_prompt)
|
||||||
|
|
||||||
# Only include motion_intensity for Motion Transfer
|
# Only include motion_intensity for Motion Transfer
|
||||||
|
|||||||
@ -5,14 +5,16 @@ Pika API docs: https://pika-827374fb.mintlify.app/api-reference
|
|||||||
"""
|
"""
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import io
|
from io import BytesIO
|
||||||
import logging
|
import logging
|
||||||
from typing import Optional, TypeVar
|
from typing import Optional, TypeVar
|
||||||
|
from enum import Enum
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
from comfy.comfy_types.node_typing import IO, ComfyNodeABC, InputTypeOptions
|
from typing_extensions import override
|
||||||
|
from comfy_api.latest import ComfyExtension, io as comfy_io
|
||||||
from comfy_api.input_impl import VideoFromFile
|
from comfy_api.input_impl import VideoFromFile
|
||||||
from comfy_api.input_impl.video_types import VideoCodec, VideoContainer, VideoInput
|
from comfy_api.input_impl.video_types import VideoCodec, VideoContainer, VideoInput
|
||||||
from comfy_api_nodes.apinode_utils import (
|
from comfy_api_nodes.apinode_utils import (
|
||||||
@ -20,7 +22,6 @@ from comfy_api_nodes.apinode_utils import (
|
|||||||
tensor_to_bytesio,
|
tensor_to_bytesio,
|
||||||
)
|
)
|
||||||
from comfy_api_nodes.apis import (
|
from comfy_api_nodes.apis import (
|
||||||
IngredientsMode,
|
|
||||||
PikaBodyGenerate22C2vGenerate22PikascenesPost,
|
PikaBodyGenerate22C2vGenerate22PikascenesPost,
|
||||||
PikaBodyGenerate22I2vGenerate22I2vPost,
|
PikaBodyGenerate22I2vGenerate22I2vPost,
|
||||||
PikaBodyGenerate22KeyframeGenerate22PikaframesPost,
|
PikaBodyGenerate22KeyframeGenerate22PikaframesPost,
|
||||||
@ -28,10 +29,7 @@ from comfy_api_nodes.apis import (
|
|||||||
PikaBodyGeneratePikadditionsGeneratePikadditionsPost,
|
PikaBodyGeneratePikadditionsGeneratePikadditionsPost,
|
||||||
PikaBodyGeneratePikaffectsGeneratePikaffectsPost,
|
PikaBodyGeneratePikaffectsGeneratePikaffectsPost,
|
||||||
PikaBodyGeneratePikaswapsGeneratePikaswapsPost,
|
PikaBodyGeneratePikaswapsGeneratePikaswapsPost,
|
||||||
PikaDurationEnum,
|
|
||||||
Pikaffect,
|
|
||||||
PikaGenerateResponse,
|
PikaGenerateResponse,
|
||||||
PikaResolutionEnum,
|
|
||||||
PikaVideoResponse,
|
PikaVideoResponse,
|
||||||
)
|
)
|
||||||
from comfy_api_nodes.apis.client import (
|
from comfy_api_nodes.apis.client import (
|
||||||
@ -41,7 +39,6 @@ from comfy_api_nodes.apis.client import (
|
|||||||
PollingOperation,
|
PollingOperation,
|
||||||
SynchronousOperation,
|
SynchronousOperation,
|
||||||
)
|
)
|
||||||
from comfy_api_nodes.mapper_utils import model_field_to_node_input
|
|
||||||
|
|
||||||
R = TypeVar("R")
|
R = TypeVar("R")
|
||||||
|
|
||||||
@ -58,6 +55,35 @@ PATH_PIKASCENES = f"/proxy/pika/generate/{PIKA_API_VERSION}/pikascenes"
|
|||||||
PATH_VIDEO_GET = "/proxy/pika/videos"
|
PATH_VIDEO_GET = "/proxy/pika/videos"
|
||||||
|
|
||||||
|
|
||||||
|
class PikaDurationEnum(int, Enum):
|
||||||
|
integer_5 = 5
|
||||||
|
integer_10 = 10
|
||||||
|
|
||||||
|
|
||||||
|
class PikaResolutionEnum(str, Enum):
|
||||||
|
field_1080p = "1080p"
|
||||||
|
field_720p = "720p"
|
||||||
|
|
||||||
|
|
||||||
|
class Pikaffect(str, Enum):
|
||||||
|
Cake_ify = "Cake-ify"
|
||||||
|
Crumble = "Crumble"
|
||||||
|
Crush = "Crush"
|
||||||
|
Decapitate = "Decapitate"
|
||||||
|
Deflate = "Deflate"
|
||||||
|
Dissolve = "Dissolve"
|
||||||
|
Explode = "Explode"
|
||||||
|
Eye_pop = "Eye-pop"
|
||||||
|
Inflate = "Inflate"
|
||||||
|
Levitate = "Levitate"
|
||||||
|
Melt = "Melt"
|
||||||
|
Peel = "Peel"
|
||||||
|
Poke = "Poke"
|
||||||
|
Squish = "Squish"
|
||||||
|
Ta_da = "Ta-da"
|
||||||
|
Tear = "Tear"
|
||||||
|
|
||||||
|
|
||||||
class PikaApiError(Exception):
|
class PikaApiError(Exception):
|
||||||
"""Exception for Pika API errors."""
|
"""Exception for Pika API errors."""
|
||||||
|
|
||||||
@ -74,60 +100,11 @@ def is_valid_initial_response(response: PikaGenerateResponse) -> bool:
|
|||||||
return hasattr(response, "video_id") and response.video_id is not None
|
return hasattr(response, "video_id") and response.video_id is not None
|
||||||
|
|
||||||
|
|
||||||
class PikaNodeBase(ComfyNodeABC):
|
async def poll_for_task_status(
|
||||||
"""Base class for Pika nodes."""
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def get_base_inputs_types(
|
|
||||||
cls, request_model
|
|
||||||
) -> dict[str, tuple[IO, InputTypeOptions]]:
|
|
||||||
"""Get the base required inputs types common to all Pika nodes."""
|
|
||||||
return {
|
|
||||||
"prompt_text": model_field_to_node_input(
|
|
||||||
IO.STRING,
|
|
||||||
request_model,
|
|
||||||
"promptText",
|
|
||||||
multiline=True,
|
|
||||||
),
|
|
||||||
"negative_prompt": model_field_to_node_input(
|
|
||||||
IO.STRING,
|
|
||||||
request_model,
|
|
||||||
"negativePrompt",
|
|
||||||
multiline=True,
|
|
||||||
),
|
|
||||||
"seed": model_field_to_node_input(
|
|
||||||
IO.INT,
|
|
||||||
request_model,
|
|
||||||
"seed",
|
|
||||||
min=0,
|
|
||||||
max=0xFFFFFFFF,
|
|
||||||
control_after_generate=True,
|
|
||||||
),
|
|
||||||
"resolution": model_field_to_node_input(
|
|
||||||
IO.COMBO,
|
|
||||||
request_model,
|
|
||||||
"resolution",
|
|
||||||
enum_type=PikaResolutionEnum,
|
|
||||||
),
|
|
||||||
"duration": model_field_to_node_input(
|
|
||||||
IO.COMBO,
|
|
||||||
request_model,
|
|
||||||
"duration",
|
|
||||||
enum_type=PikaDurationEnum,
|
|
||||||
),
|
|
||||||
}
|
|
||||||
|
|
||||||
CATEGORY = "api node/video/Pika"
|
|
||||||
API_NODE = True
|
|
||||||
FUNCTION = "api_call"
|
|
||||||
RETURN_TYPES = ("VIDEO",)
|
|
||||||
|
|
||||||
async def poll_for_task_status(
|
|
||||||
self,
|
|
||||||
task_id: str,
|
task_id: str,
|
||||||
auth_kwargs: Optional[dict[str, str]] = None,
|
auth_kwargs: Optional[dict[str, str]] = None,
|
||||||
node_id: Optional[str] = None,
|
node_id: Optional[str] = None,
|
||||||
) -> PikaGenerateResponse:
|
) -> PikaGenerateResponse:
|
||||||
polling_operation = PollingOperation(
|
polling_operation = PollingOperation(
|
||||||
poll_endpoint=ApiEndpoint(
|
poll_endpoint=ApiEndpoint(
|
||||||
path=f"{PATH_VIDEO_GET}/{task_id}",
|
path=f"{PATH_VIDEO_GET}/{task_id}",
|
||||||
@ -154,12 +131,12 @@ class PikaNodeBase(ComfyNodeABC):
|
|||||||
)
|
)
|
||||||
return await polling_operation.execute()
|
return await polling_operation.execute()
|
||||||
|
|
||||||
async def execute_task(
|
|
||||||
self,
|
async def execute_task(
|
||||||
initial_operation: SynchronousOperation[R, PikaGenerateResponse],
|
initial_operation: SynchronousOperation[R, PikaGenerateResponse],
|
||||||
auth_kwargs: Optional[dict[str, str]] = None,
|
auth_kwargs: Optional[dict[str, str]] = None,
|
||||||
node_id: Optional[str] = None,
|
node_id: Optional[str] = None,
|
||||||
) -> tuple[VideoFromFile]:
|
) -> tuple[VideoFromFile]:
|
||||||
"""Executes the initial operation then polls for the task status until it is completed.
|
"""Executes the initial operation then polls for the task status until it is completed.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
@ -176,7 +153,7 @@ class PikaNodeBase(ComfyNodeABC):
|
|||||||
raise PikaApiError(error_msg)
|
raise PikaApiError(error_msg)
|
||||||
|
|
||||||
task_id = initial_response.video_id
|
task_id = initial_response.video_id
|
||||||
final_response = await self.poll_for_task_status(task_id, auth_kwargs)
|
final_response = await poll_for_task_status(task_id, auth_kwargs, node_id=node_id)
|
||||||
if not is_valid_video_response(final_response):
|
if not is_valid_video_response(final_response):
|
||||||
error_msg = (
|
error_msg = (
|
||||||
f"Pika task {task_id} succeeded but no video data found in response."
|
f"Pika task {task_id} succeeded but no video data found in response."
|
||||||
@ -190,39 +167,54 @@ class PikaNodeBase(ComfyNodeABC):
|
|||||||
return (await download_url_to_video_output(video_url),)
|
return (await download_url_to_video_output(video_url),)
|
||||||
|
|
||||||
|
|
||||||
class PikaImageToVideoV2_2(PikaNodeBase):
|
def get_base_inputs_types() -> list[comfy_io.Input]:
|
||||||
|
"""Get the base required inputs types common to all Pika nodes."""
|
||||||
|
return [
|
||||||
|
comfy_io.String.Input("prompt_text", multiline=True),
|
||||||
|
comfy_io.String.Input("negative_prompt", multiline=True),
|
||||||
|
comfy_io.Int.Input("seed", min=0, max=0xFFFFFFFF, control_after_generate=True),
|
||||||
|
comfy_io.Combo.Input(
|
||||||
|
"resolution", options=[resolution.value for resolution in PikaResolutionEnum], default="1080p"
|
||||||
|
),
|
||||||
|
comfy_io.Combo.Input(
|
||||||
|
"duration", options=[duration.value for duration in PikaDurationEnum], default=5
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
class PikaImageToVideoV2_2(comfy_io.ComfyNode):
|
||||||
"""Pika 2.2 Image to Video Node."""
|
"""Pika 2.2 Image to Video Node."""
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def INPUT_TYPES(cls):
|
def define_schema(cls) -> comfy_io.Schema:
|
||||||
return {
|
return comfy_io.Schema(
|
||||||
"required": {
|
node_id="PikaImageToVideoNode2_2",
|
||||||
"image": (
|
display_name="Pika Image to Video",
|
||||||
IO.IMAGE,
|
description="Sends an image and prompt to the Pika API v2.2 to generate a video.",
|
||||||
{"tooltip": "The image to convert to video"},
|
category="api node/video/Pika",
|
||||||
),
|
inputs=[
|
||||||
**cls.get_base_inputs_types(PikaBodyGenerate22I2vGenerate22I2vPost),
|
comfy_io.Image.Input("image", tooltip="The image to convert to video"),
|
||||||
},
|
*get_base_inputs_types(),
|
||||||
"hidden": {
|
],
|
||||||
"auth_token": "AUTH_TOKEN_COMFY_ORG",
|
outputs=[comfy_io.Video.Output()],
|
||||||
"comfy_api_key": "API_KEY_COMFY_ORG",
|
hidden=[
|
||||||
"unique_id": "UNIQUE_ID",
|
comfy_io.Hidden.auth_token_comfy_org,
|
||||||
},
|
comfy_io.Hidden.api_key_comfy_org,
|
||||||
}
|
comfy_io.Hidden.unique_id,
|
||||||
|
],
|
||||||
|
is_api_node=True,
|
||||||
|
)
|
||||||
|
|
||||||
DESCRIPTION = "Sends an image and prompt to the Pika API v2.2 to generate a video."
|
@classmethod
|
||||||
|
async def execute(
|
||||||
async def api_call(
|
cls,
|
||||||
self,
|
|
||||||
image: torch.Tensor,
|
image: torch.Tensor,
|
||||||
prompt_text: str,
|
prompt_text: str,
|
||||||
negative_prompt: str,
|
negative_prompt: str,
|
||||||
seed: int,
|
seed: int,
|
||||||
resolution: str,
|
resolution: str,
|
||||||
duration: int,
|
duration: int,
|
||||||
unique_id: str,
|
) -> comfy_io.NodeOutput:
|
||||||
**kwargs,
|
|
||||||
) -> tuple[VideoFromFile]:
|
|
||||||
# Convert image to BytesIO
|
# Convert image to BytesIO
|
||||||
image_bytes_io = tensor_to_bytesio(image)
|
image_bytes_io = tensor_to_bytesio(image)
|
||||||
image_bytes_io.seek(0)
|
image_bytes_io.seek(0)
|
||||||
@ -237,7 +229,10 @@ class PikaImageToVideoV2_2(PikaNodeBase):
|
|||||||
resolution=resolution,
|
resolution=resolution,
|
||||||
duration=duration,
|
duration=duration,
|
||||||
)
|
)
|
||||||
|
auth = {
|
||||||
|
"auth_token": cls.hidden.auth_token_comfy_org,
|
||||||
|
"comfy_api_key": cls.hidden.api_key_comfy_org,
|
||||||
|
}
|
||||||
initial_operation = SynchronousOperation(
|
initial_operation = SynchronousOperation(
|
||||||
endpoint=ApiEndpoint(
|
endpoint=ApiEndpoint(
|
||||||
path=PATH_IMAGE_TO_VIDEO,
|
path=PATH_IMAGE_TO_VIDEO,
|
||||||
@ -248,50 +243,55 @@ class PikaImageToVideoV2_2(PikaNodeBase):
|
|||||||
request=pika_request_data,
|
request=pika_request_data,
|
||||||
files=pika_files,
|
files=pika_files,
|
||||||
content_type="multipart/form-data",
|
content_type="multipart/form-data",
|
||||||
auth_kwargs=kwargs,
|
auth_kwargs=auth,
|
||||||
)
|
)
|
||||||
|
return await execute_task(initial_operation, auth_kwargs=auth, node_id=cls.hidden.unique_id)
|
||||||
return await self.execute_task(initial_operation, auth_kwargs=kwargs, node_id=unique_id)
|
|
||||||
|
|
||||||
|
|
||||||
class PikaTextToVideoNodeV2_2(PikaNodeBase):
|
class PikaTextToVideoNodeV2_2(comfy_io.ComfyNode):
|
||||||
"""Pika Text2Video v2.2 Node."""
|
"""Pika Text2Video v2.2 Node."""
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def INPUT_TYPES(cls):
|
def define_schema(cls) -> comfy_io.Schema:
|
||||||
return {
|
return comfy_io.Schema(
|
||||||
"required": {
|
node_id="PikaTextToVideoNode2_2",
|
||||||
**cls.get_base_inputs_types(PikaBodyGenerate22T2vGenerate22T2vPost),
|
display_name="Pika Text to Video",
|
||||||
"aspect_ratio": model_field_to_node_input(
|
description="Sends a text prompt to the Pika API v2.2 to generate a video.",
|
||||||
IO.FLOAT,
|
category="api node/video/Pika",
|
||||||
PikaBodyGenerate22T2vGenerate22T2vPost,
|
inputs=[
|
||||||
"aspectRatio",
|
*get_base_inputs_types(),
|
||||||
|
comfy_io.Float.Input(
|
||||||
|
"aspect_ratio",
|
||||||
step=0.001,
|
step=0.001,
|
||||||
min=0.4,
|
min=0.4,
|
||||||
max=2.5,
|
max=2.5,
|
||||||
default=1.7777777777777777,
|
default=1.7777777777777777,
|
||||||
),
|
tooltip="Aspect ratio (width / height)",
|
||||||
},
|
)
|
||||||
"hidden": {
|
],
|
||||||
"auth_token": "AUTH_TOKEN_COMFY_ORG",
|
outputs=[comfy_io.Video.Output()],
|
||||||
"comfy_api_key": "API_KEY_COMFY_ORG",
|
hidden=[
|
||||||
"unique_id": "UNIQUE_ID",
|
comfy_io.Hidden.auth_token_comfy_org,
|
||||||
},
|
comfy_io.Hidden.api_key_comfy_org,
|
||||||
}
|
comfy_io.Hidden.unique_id,
|
||||||
|
],
|
||||||
|
is_api_node=True,
|
||||||
|
)
|
||||||
|
|
||||||
DESCRIPTION = "Sends a text prompt to the Pika API v2.2 to generate a video."
|
@classmethod
|
||||||
|
async def execute(
|
||||||
async def api_call(
|
cls,
|
||||||
self,
|
|
||||||
prompt_text: str,
|
prompt_text: str,
|
||||||
negative_prompt: str,
|
negative_prompt: str,
|
||||||
seed: int,
|
seed: int,
|
||||||
resolution: str,
|
resolution: str,
|
||||||
duration: int,
|
duration: int,
|
||||||
aspect_ratio: float,
|
aspect_ratio: float,
|
||||||
unique_id: str,
|
) -> comfy_io.NodeOutput:
|
||||||
**kwargs,
|
auth = {
|
||||||
) -> tuple[VideoFromFile]:
|
"auth_token": cls.hidden.auth_token_comfy_org,
|
||||||
|
"comfy_api_key": cls.hidden.api_key_comfy_org,
|
||||||
|
}
|
||||||
initial_operation = SynchronousOperation(
|
initial_operation = SynchronousOperation(
|
||||||
endpoint=ApiEndpoint(
|
endpoint=ApiEndpoint(
|
||||||
path=PATH_TEXT_TO_VIDEO,
|
path=PATH_TEXT_TO_VIDEO,
|
||||||
@ -307,62 +307,75 @@ class PikaTextToVideoNodeV2_2(PikaNodeBase):
|
|||||||
duration=duration,
|
duration=duration,
|
||||||
aspectRatio=aspect_ratio,
|
aspectRatio=aspect_ratio,
|
||||||
),
|
),
|
||||||
auth_kwargs=kwargs,
|
auth_kwargs=auth,
|
||||||
content_type="application/x-www-form-urlencoded",
|
content_type="application/x-www-form-urlencoded",
|
||||||
)
|
)
|
||||||
|
return await execute_task(initial_operation, auth_kwargs=auth, node_id=cls.hidden.unique_id)
|
||||||
return await self.execute_task(initial_operation, auth_kwargs=kwargs, node_id=unique_id)
|
|
||||||
|
|
||||||
|
|
||||||
class PikaScenesV2_2(PikaNodeBase):
|
class PikaScenesV2_2(comfy_io.ComfyNode):
|
||||||
"""PikaScenes v2.2 Node."""
|
"""PikaScenes v2.2 Node."""
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def INPUT_TYPES(cls):
|
def define_schema(cls) -> comfy_io.Schema:
|
||||||
image_ingredient_input = (
|
return comfy_io.Schema(
|
||||||
IO.IMAGE,
|
node_id="PikaScenesV2_2",
|
||||||
{"tooltip": "Image that will be used as ingredient to create a video."},
|
display_name="Pika Scenes (Video Image Composition)",
|
||||||
)
|
description="Combine your images to create a video with the objects in them. Upload multiple images as ingredients and generate a high-quality video that incorporates all of them.",
|
||||||
return {
|
category="api node/video/Pika",
|
||||||
"required": {
|
inputs=[
|
||||||
**cls.get_base_inputs_types(
|
*get_base_inputs_types(),
|
||||||
PikaBodyGenerate22C2vGenerate22PikascenesPost,
|
comfy_io.Combo.Input(
|
||||||
),
|
"ingredients_mode",
|
||||||
"ingredients_mode": model_field_to_node_input(
|
options=["creative", "precise"],
|
||||||
IO.COMBO,
|
|
||||||
PikaBodyGenerate22C2vGenerate22PikascenesPost,
|
|
||||||
"ingredientsMode",
|
|
||||||
enum_type=IngredientsMode,
|
|
||||||
default="creative",
|
default="creative",
|
||||||
),
|
),
|
||||||
"aspect_ratio": model_field_to_node_input(
|
comfy_io.Float.Input(
|
||||||
IO.FLOAT,
|
"aspect_ratio",
|
||||||
PikaBodyGenerate22C2vGenerate22PikascenesPost,
|
|
||||||
"aspectRatio",
|
|
||||||
step=0.001,
|
step=0.001,
|
||||||
min=0.4,
|
min=0.4,
|
||||||
max=2.5,
|
max=2.5,
|
||||||
default=1.7777777777777777,
|
default=1.7777777777777777,
|
||||||
|
tooltip="Aspect ratio (width / height)",
|
||||||
),
|
),
|
||||||
},
|
comfy_io.Image.Input(
|
||||||
"optional": {
|
"image_ingredient_1",
|
||||||
"image_ingredient_1": image_ingredient_input,
|
optional=True,
|
||||||
"image_ingredient_2": image_ingredient_input,
|
tooltip="Image that will be used as ingredient to create a video.",
|
||||||
"image_ingredient_3": image_ingredient_input,
|
),
|
||||||
"image_ingredient_4": image_ingredient_input,
|
comfy_io.Image.Input(
|
||||||
"image_ingredient_5": image_ingredient_input,
|
"image_ingredient_2",
|
||||||
},
|
optional=True,
|
||||||
"hidden": {
|
tooltip="Image that will be used as ingredient to create a video.",
|
||||||
"auth_token": "AUTH_TOKEN_COMFY_ORG",
|
),
|
||||||
"comfy_api_key": "API_KEY_COMFY_ORG",
|
comfy_io.Image.Input(
|
||||||
"unique_id": "UNIQUE_ID",
|
"image_ingredient_3",
|
||||||
},
|
optional=True,
|
||||||
}
|
tooltip="Image that will be used as ingredient to create a video.",
|
||||||
|
),
|
||||||
|
comfy_io.Image.Input(
|
||||||
|
"image_ingredient_4",
|
||||||
|
optional=True,
|
||||||
|
tooltip="Image that will be used as ingredient to create a video.",
|
||||||
|
),
|
||||||
|
comfy_io.Image.Input(
|
||||||
|
"image_ingredient_5",
|
||||||
|
optional=True,
|
||||||
|
tooltip="Image that will be used as ingredient to create a video.",
|
||||||
|
),
|
||||||
|
],
|
||||||
|
outputs=[comfy_io.Video.Output()],
|
||||||
|
hidden=[
|
||||||
|
comfy_io.Hidden.auth_token_comfy_org,
|
||||||
|
comfy_io.Hidden.api_key_comfy_org,
|
||||||
|
comfy_io.Hidden.unique_id,
|
||||||
|
],
|
||||||
|
is_api_node=True,
|
||||||
|
)
|
||||||
|
|
||||||
DESCRIPTION = "Combine your images to create a video with the objects in them. Upload multiple images as ingredients and generate a high-quality video that incorporates all of them."
|
@classmethod
|
||||||
|
async def execute(
|
||||||
async def api_call(
|
cls,
|
||||||
self,
|
|
||||||
prompt_text: str,
|
prompt_text: str,
|
||||||
negative_prompt: str,
|
negative_prompt: str,
|
||||||
seed: int,
|
seed: int,
|
||||||
@ -370,14 +383,12 @@ class PikaScenesV2_2(PikaNodeBase):
|
|||||||
duration: int,
|
duration: int,
|
||||||
ingredients_mode: str,
|
ingredients_mode: str,
|
||||||
aspect_ratio: float,
|
aspect_ratio: float,
|
||||||
unique_id: str,
|
|
||||||
image_ingredient_1: Optional[torch.Tensor] = None,
|
image_ingredient_1: Optional[torch.Tensor] = None,
|
||||||
image_ingredient_2: Optional[torch.Tensor] = None,
|
image_ingredient_2: Optional[torch.Tensor] = None,
|
||||||
image_ingredient_3: Optional[torch.Tensor] = None,
|
image_ingredient_3: Optional[torch.Tensor] = None,
|
||||||
image_ingredient_4: Optional[torch.Tensor] = None,
|
image_ingredient_4: Optional[torch.Tensor] = None,
|
||||||
image_ingredient_5: Optional[torch.Tensor] = None,
|
image_ingredient_5: Optional[torch.Tensor] = None,
|
||||||
**kwargs,
|
) -> comfy_io.NodeOutput:
|
||||||
) -> tuple[VideoFromFile]:
|
|
||||||
# Convert all passed images to BytesIO
|
# Convert all passed images to BytesIO
|
||||||
all_image_bytes_io = []
|
all_image_bytes_io = []
|
||||||
for image in [
|
for image in [
|
||||||
@ -406,7 +417,10 @@ class PikaScenesV2_2(PikaNodeBase):
|
|||||||
duration=duration,
|
duration=duration,
|
||||||
aspectRatio=aspect_ratio,
|
aspectRatio=aspect_ratio,
|
||||||
)
|
)
|
||||||
|
auth = {
|
||||||
|
"auth_token": cls.hidden.auth_token_comfy_org,
|
||||||
|
"comfy_api_key": cls.hidden.api_key_comfy_org,
|
||||||
|
}
|
||||||
initial_operation = SynchronousOperation(
|
initial_operation = SynchronousOperation(
|
||||||
endpoint=ApiEndpoint(
|
endpoint=ApiEndpoint(
|
||||||
path=PATH_PIKASCENES,
|
path=PATH_PIKASCENES,
|
||||||
@ -417,63 +431,54 @@ class PikaScenesV2_2(PikaNodeBase):
|
|||||||
request=pika_request_data,
|
request=pika_request_data,
|
||||||
files=pika_files,
|
files=pika_files,
|
||||||
content_type="multipart/form-data",
|
content_type="multipart/form-data",
|
||||||
auth_kwargs=kwargs,
|
auth_kwargs=auth,
|
||||||
)
|
)
|
||||||
|
|
||||||
return await self.execute_task(initial_operation, auth_kwargs=kwargs, node_id=unique_id)
|
return await execute_task(initial_operation, auth_kwargs=auth, node_id=cls.hidden.unique_id)
|
||||||
|
|
||||||
|
|
||||||
class PikAdditionsNode(PikaNodeBase):
|
class PikAdditionsNode(comfy_io.ComfyNode):
|
||||||
"""Pika Pikadditions Node. Add an image into a video."""
|
"""Pika Pikadditions Node. Add an image into a video."""
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def INPUT_TYPES(cls):
|
def define_schema(cls) -> comfy_io.Schema:
|
||||||
return {
|
return comfy_io.Schema(
|
||||||
"required": {
|
node_id="Pikadditions",
|
||||||
"video": (IO.VIDEO, {"tooltip": "The video to add an image to."}),
|
display_name="Pikadditions (Video Object Insertion)",
|
||||||
"image": (IO.IMAGE, {"tooltip": "The image to add to the video."}),
|
description="Add any object or image into your video. Upload a video and specify what you'd like to add to create a seamlessly integrated result.",
|
||||||
"prompt_text": model_field_to_node_input(
|
category="api node/video/Pika",
|
||||||
IO.STRING,
|
inputs=[
|
||||||
PikaBodyGeneratePikadditionsGeneratePikadditionsPost,
|
comfy_io.Video.Input("video", tooltip="The video to add an image to."),
|
||||||
"promptText",
|
comfy_io.Image.Input("image", tooltip="The image to add to the video."),
|
||||||
multiline=True,
|
comfy_io.String.Input("prompt_text", multiline=True),
|
||||||
),
|
comfy_io.String.Input("negative_prompt", multiline=True),
|
||||||
"negative_prompt": model_field_to_node_input(
|
comfy_io.Int.Input(
|
||||||
IO.STRING,
|
|
||||||
PikaBodyGeneratePikadditionsGeneratePikadditionsPost,
|
|
||||||
"negativePrompt",
|
|
||||||
multiline=True,
|
|
||||||
),
|
|
||||||
"seed": model_field_to_node_input(
|
|
||||||
IO.INT,
|
|
||||||
PikaBodyGeneratePikadditionsGeneratePikadditionsPost,
|
|
||||||
"seed",
|
"seed",
|
||||||
min=0,
|
min=0,
|
||||||
max=0xFFFFFFFF,
|
max=0xFFFFFFFF,
|
||||||
control_after_generate=True,
|
control_after_generate=True,
|
||||||
),
|
),
|
||||||
},
|
],
|
||||||
"hidden": {
|
outputs=[comfy_io.Video.Output()],
|
||||||
"auth_token": "AUTH_TOKEN_COMFY_ORG",
|
hidden=[
|
||||||
"comfy_api_key": "API_KEY_COMFY_ORG",
|
comfy_io.Hidden.auth_token_comfy_org,
|
||||||
"unique_id": "UNIQUE_ID",
|
comfy_io.Hidden.api_key_comfy_org,
|
||||||
},
|
comfy_io.Hidden.unique_id,
|
||||||
}
|
],
|
||||||
|
is_api_node=True,
|
||||||
|
)
|
||||||
|
|
||||||
DESCRIPTION = "Add any object or image into your video. Upload a video and specify what you'd like to add to create a seamlessly integrated result."
|
@classmethod
|
||||||
|
async def execute(
|
||||||
async def api_call(
|
cls,
|
||||||
self,
|
|
||||||
video: VideoInput,
|
video: VideoInput,
|
||||||
image: torch.Tensor,
|
image: torch.Tensor,
|
||||||
prompt_text: str,
|
prompt_text: str,
|
||||||
negative_prompt: str,
|
negative_prompt: str,
|
||||||
seed: int,
|
seed: int,
|
||||||
unique_id: str,
|
) -> comfy_io.NodeOutput:
|
||||||
**kwargs,
|
|
||||||
) -> tuple[VideoFromFile]:
|
|
||||||
# Convert video to BytesIO
|
# Convert video to BytesIO
|
||||||
video_bytes_io = io.BytesIO()
|
video_bytes_io = BytesIO()
|
||||||
video.save_to(video_bytes_io, format=VideoContainer.MP4, codec=VideoCodec.H264)
|
video.save_to(video_bytes_io, format=VideoContainer.MP4, codec=VideoCodec.H264)
|
||||||
video_bytes_io.seek(0)
|
video_bytes_io.seek(0)
|
||||||
|
|
||||||
@ -492,7 +497,10 @@ class PikAdditionsNode(PikaNodeBase):
|
|||||||
negativePrompt=negative_prompt,
|
negativePrompt=negative_prompt,
|
||||||
seed=seed,
|
seed=seed,
|
||||||
)
|
)
|
||||||
|
auth = {
|
||||||
|
"auth_token": cls.hidden.auth_token_comfy_org,
|
||||||
|
"comfy_api_key": cls.hidden.api_key_comfy_org,
|
||||||
|
}
|
||||||
initial_operation = SynchronousOperation(
|
initial_operation = SynchronousOperation(
|
||||||
endpoint=ApiEndpoint(
|
endpoint=ApiEndpoint(
|
||||||
path=PATH_PIKADDITIONS,
|
path=PATH_PIKADDITIONS,
|
||||||
@ -503,74 +511,51 @@ class PikAdditionsNode(PikaNodeBase):
|
|||||||
request=pika_request_data,
|
request=pika_request_data,
|
||||||
files=pika_files,
|
files=pika_files,
|
||||||
content_type="multipart/form-data",
|
content_type="multipart/form-data",
|
||||||
auth_kwargs=kwargs,
|
auth_kwargs=auth,
|
||||||
)
|
)
|
||||||
|
|
||||||
return await self.execute_task(initial_operation, auth_kwargs=kwargs, node_id=unique_id)
|
return await execute_task(initial_operation, auth_kwargs=auth, node_id=cls.hidden.unique_id)
|
||||||
|
|
||||||
|
|
||||||
class PikaSwapsNode(PikaNodeBase):
|
class PikaSwapsNode(comfy_io.ComfyNode):
|
||||||
"""Pika Pikaswaps Node."""
|
"""Pika Pikaswaps Node."""
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def INPUT_TYPES(cls):
|
def define_schema(cls) -> comfy_io.Schema:
|
||||||
return {
|
return comfy_io.Schema(
|
||||||
"required": {
|
node_id="Pikaswaps",
|
||||||
"video": (IO.VIDEO, {"tooltip": "The video to swap an object in."}),
|
display_name="Pika Swaps (Video Object Replacement)",
|
||||||
"image": (
|
description="Swap out any object or region of your video with a new image or object. Define areas to replace either with a mask or coordinates.",
|
||||||
IO.IMAGE,
|
category="api node/video/Pika",
|
||||||
{
|
inputs=[
|
||||||
"tooltip": "The image used to replace the masked object in the video."
|
comfy_io.Video.Input("video", tooltip="The video to swap an object in."),
|
||||||
},
|
comfy_io.Image.Input("image", tooltip="The image used to replace the masked object in the video."),
|
||||||
),
|
comfy_io.Mask.Input("mask", tooltip="Use the mask to define areas in the video to replace"),
|
||||||
"mask": (
|
comfy_io.String.Input("prompt_text", multiline=True),
|
||||||
IO.MASK,
|
comfy_io.String.Input("negative_prompt", multiline=True),
|
||||||
{"tooltip": "Use the mask to define areas in the video to replace"},
|
comfy_io.Int.Input("seed", min=0, max=0xFFFFFFFF, control_after_generate=True),
|
||||||
),
|
],
|
||||||
"prompt_text": model_field_to_node_input(
|
outputs=[comfy_io.Video.Output()],
|
||||||
IO.STRING,
|
hidden=[
|
||||||
PikaBodyGeneratePikaswapsGeneratePikaswapsPost,
|
comfy_io.Hidden.auth_token_comfy_org,
|
||||||
"promptText",
|
comfy_io.Hidden.api_key_comfy_org,
|
||||||
multiline=True,
|
comfy_io.Hidden.unique_id,
|
||||||
),
|
],
|
||||||
"negative_prompt": model_field_to_node_input(
|
is_api_node=True,
|
||||||
IO.STRING,
|
)
|
||||||
PikaBodyGeneratePikaswapsGeneratePikaswapsPost,
|
|
||||||
"negativePrompt",
|
|
||||||
multiline=True,
|
|
||||||
),
|
|
||||||
"seed": model_field_to_node_input(
|
|
||||||
IO.INT,
|
|
||||||
PikaBodyGeneratePikaswapsGeneratePikaswapsPost,
|
|
||||||
"seed",
|
|
||||||
min=0,
|
|
||||||
max=0xFFFFFFFF,
|
|
||||||
control_after_generate=True,
|
|
||||||
),
|
|
||||||
},
|
|
||||||
"hidden": {
|
|
||||||
"auth_token": "AUTH_TOKEN_COMFY_ORG",
|
|
||||||
"comfy_api_key": "API_KEY_COMFY_ORG",
|
|
||||||
"unique_id": "UNIQUE_ID",
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
DESCRIPTION = "Swap out any object or region of your video with a new image or object. Define areas to replace either with a mask or coordinates."
|
@classmethod
|
||||||
RETURN_TYPES = ("VIDEO",)
|
async def execute(
|
||||||
|
cls,
|
||||||
async def api_call(
|
|
||||||
self,
|
|
||||||
video: VideoInput,
|
video: VideoInput,
|
||||||
image: torch.Tensor,
|
image: torch.Tensor,
|
||||||
mask: torch.Tensor,
|
mask: torch.Tensor,
|
||||||
prompt_text: str,
|
prompt_text: str,
|
||||||
negative_prompt: str,
|
negative_prompt: str,
|
||||||
seed: int,
|
seed: int,
|
||||||
unique_id: str,
|
) -> comfy_io.NodeOutput:
|
||||||
**kwargs,
|
|
||||||
) -> tuple[VideoFromFile]:
|
|
||||||
# Convert video to BytesIO
|
# Convert video to BytesIO
|
||||||
video_bytes_io = io.BytesIO()
|
video_bytes_io = BytesIO()
|
||||||
video.save_to(video_bytes_io, format=VideoContainer.MP4, codec=VideoCodec.H264)
|
video.save_to(video_bytes_io, format=VideoContainer.MP4, codec=VideoCodec.H264)
|
||||||
video_bytes_io.seek(0)
|
video_bytes_io.seek(0)
|
||||||
|
|
||||||
@ -579,7 +564,7 @@ class PikaSwapsNode(PikaNodeBase):
|
|||||||
mask = mask.repeat(1, 3, 1, 1)
|
mask = mask.repeat(1, 3, 1, 1)
|
||||||
|
|
||||||
# Convert 3-channel binary mask to BytesIO
|
# Convert 3-channel binary mask to BytesIO
|
||||||
mask_bytes_io = io.BytesIO()
|
mask_bytes_io = BytesIO()
|
||||||
mask_bytes_io.write(mask.numpy().astype(np.uint8))
|
mask_bytes_io.write(mask.numpy().astype(np.uint8))
|
||||||
mask_bytes_io.seek(0)
|
mask_bytes_io.seek(0)
|
||||||
|
|
||||||
@ -599,7 +584,10 @@ class PikaSwapsNode(PikaNodeBase):
|
|||||||
negativePrompt=negative_prompt,
|
negativePrompt=negative_prompt,
|
||||||
seed=seed,
|
seed=seed,
|
||||||
)
|
)
|
||||||
|
auth = {
|
||||||
|
"auth_token": cls.hidden.auth_token_comfy_org,
|
||||||
|
"comfy_api_key": cls.hidden.api_key_comfy_org,
|
||||||
|
}
|
||||||
initial_operation = SynchronousOperation(
|
initial_operation = SynchronousOperation(
|
||||||
endpoint=ApiEndpoint(
|
endpoint=ApiEndpoint(
|
||||||
path=PATH_PIKADDITIONS,
|
path=PATH_PIKADDITIONS,
|
||||||
@ -610,71 +598,52 @@ class PikaSwapsNode(PikaNodeBase):
|
|||||||
request=pika_request_data,
|
request=pika_request_data,
|
||||||
files=pika_files,
|
files=pika_files,
|
||||||
content_type="multipart/form-data",
|
content_type="multipart/form-data",
|
||||||
auth_kwargs=kwargs,
|
auth_kwargs=auth,
|
||||||
)
|
)
|
||||||
|
return await execute_task(initial_operation, auth_kwargs=auth, node_id=cls.hidden.unique_id)
|
||||||
return await self.execute_task(initial_operation, auth_kwargs=kwargs, node_id=unique_id)
|
|
||||||
|
|
||||||
|
|
||||||
class PikaffectsNode(PikaNodeBase):
|
class PikaffectsNode(comfy_io.ComfyNode):
|
||||||
"""Pika Pikaffects Node."""
|
"""Pika Pikaffects Node."""
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def INPUT_TYPES(cls):
|
def define_schema(cls) -> comfy_io.Schema:
|
||||||
return {
|
return comfy_io.Schema(
|
||||||
"required": {
|
node_id="Pikaffects",
|
||||||
"image": (
|
display_name="Pikaffects (Video Effects)",
|
||||||
IO.IMAGE,
|
description="Generate a video with a specific Pikaffect. Supported Pikaffects: Cake-ify, Crumble, Crush, Decapitate, Deflate, Dissolve, Explode, Eye-pop, Inflate, Levitate, Melt, Peel, Poke, Squish, Ta-da, Tear",
|
||||||
{"tooltip": "The reference image to apply the Pikaffect to."},
|
category="api node/video/Pika",
|
||||||
|
inputs=[
|
||||||
|
comfy_io.Image.Input("image", tooltip="The reference image to apply the Pikaffect to."),
|
||||||
|
comfy_io.Combo.Input(
|
||||||
|
"pikaffect", options=[pikaffect.value for pikaffect in Pikaffect], default="Cake-ify"
|
||||||
),
|
),
|
||||||
"pikaffect": model_field_to_node_input(
|
comfy_io.String.Input("prompt_text", multiline=True),
|
||||||
IO.COMBO,
|
comfy_io.String.Input("negative_prompt", multiline=True),
|
||||||
PikaBodyGeneratePikaffectsGeneratePikaffectsPost,
|
comfy_io.Int.Input("seed", min=0, max=0xFFFFFFFF, control_after_generate=True),
|
||||||
"pikaffect",
|
],
|
||||||
enum_type=Pikaffect,
|
outputs=[comfy_io.Video.Output()],
|
||||||
default="Cake-ify",
|
hidden=[
|
||||||
),
|
comfy_io.Hidden.auth_token_comfy_org,
|
||||||
"prompt_text": model_field_to_node_input(
|
comfy_io.Hidden.api_key_comfy_org,
|
||||||
IO.STRING,
|
comfy_io.Hidden.unique_id,
|
||||||
PikaBodyGeneratePikaffectsGeneratePikaffectsPost,
|
],
|
||||||
"promptText",
|
is_api_node=True,
|
||||||
multiline=True,
|
)
|
||||||
),
|
|
||||||
"negative_prompt": model_field_to_node_input(
|
|
||||||
IO.STRING,
|
|
||||||
PikaBodyGeneratePikaffectsGeneratePikaffectsPost,
|
|
||||||
"negativePrompt",
|
|
||||||
multiline=True,
|
|
||||||
),
|
|
||||||
"seed": model_field_to_node_input(
|
|
||||||
IO.INT,
|
|
||||||
PikaBodyGeneratePikaffectsGeneratePikaffectsPost,
|
|
||||||
"seed",
|
|
||||||
min=0,
|
|
||||||
max=0xFFFFFFFF,
|
|
||||||
control_after_generate=True,
|
|
||||||
),
|
|
||||||
},
|
|
||||||
"hidden": {
|
|
||||||
"auth_token": "AUTH_TOKEN_COMFY_ORG",
|
|
||||||
"comfy_api_key": "API_KEY_COMFY_ORG",
|
|
||||||
"unique_id": "UNIQUE_ID",
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
DESCRIPTION = "Generate a video with a specific Pikaffect. Supported Pikaffects: Cake-ify, Crumble, Crush, Decapitate, Deflate, Dissolve, Explode, Eye-pop, Inflate, Levitate, Melt, Peel, Poke, Squish, Ta-da, Tear"
|
@classmethod
|
||||||
|
async def execute(
|
||||||
async def api_call(
|
cls,
|
||||||
self,
|
|
||||||
image: torch.Tensor,
|
image: torch.Tensor,
|
||||||
pikaffect: str,
|
pikaffect: str,
|
||||||
prompt_text: str,
|
prompt_text: str,
|
||||||
negative_prompt: str,
|
negative_prompt: str,
|
||||||
seed: int,
|
seed: int,
|
||||||
unique_id: str,
|
) -> comfy_io.NodeOutput:
|
||||||
**kwargs,
|
auth = {
|
||||||
) -> tuple[VideoFromFile]:
|
"auth_token": cls.hidden.auth_token_comfy_org,
|
||||||
|
"comfy_api_key": cls.hidden.api_key_comfy_org,
|
||||||
|
}
|
||||||
initial_operation = SynchronousOperation(
|
initial_operation = SynchronousOperation(
|
||||||
endpoint=ApiEndpoint(
|
endpoint=ApiEndpoint(
|
||||||
path=PATH_PIKAFFECTS,
|
path=PATH_PIKAFFECTS,
|
||||||
@ -690,36 +659,38 @@ class PikaffectsNode(PikaNodeBase):
|
|||||||
),
|
),
|
||||||
files={"image": ("image.png", tensor_to_bytesio(image), "image/png")},
|
files={"image": ("image.png", tensor_to_bytesio(image), "image/png")},
|
||||||
content_type="multipart/form-data",
|
content_type="multipart/form-data",
|
||||||
auth_kwargs=kwargs,
|
auth_kwargs=auth,
|
||||||
)
|
)
|
||||||
|
return await execute_task(initial_operation, auth_kwargs=auth, node_id=cls.hidden.unique_id)
|
||||||
return await self.execute_task(initial_operation, auth_kwargs=kwargs, node_id=unique_id)
|
|
||||||
|
|
||||||
|
|
||||||
class PikaStartEndFrameNode2_2(PikaNodeBase):
|
class PikaStartEndFrameNode2_2(comfy_io.ComfyNode):
|
||||||
"""PikaFrames v2.2 Node."""
|
"""PikaFrames v2.2 Node."""
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def INPUT_TYPES(cls):
|
def define_schema(cls) -> comfy_io.Schema:
|
||||||
return {
|
return comfy_io.Schema(
|
||||||
"required": {
|
node_id="PikaStartEndFrameNode2_2",
|
||||||
"image_start": (IO.IMAGE, {"tooltip": "The first image to combine."}),
|
display_name="Pika Start and End Frame to Video",
|
||||||
"image_end": (IO.IMAGE, {"tooltip": "The last image to combine."}),
|
description="Generate a video by combining your first and last frame. Upload two images to define the start and end points, and let the AI create a smooth transition between them.",
|
||||||
**cls.get_base_inputs_types(
|
category="api node/video/Pika",
|
||||||
PikaBodyGenerate22KeyframeGenerate22PikaframesPost
|
inputs=[
|
||||||
),
|
comfy_io.Image.Input("image_start", tooltip="The first image to combine."),
|
||||||
},
|
comfy_io.Image.Input("image_end", tooltip="The last image to combine."),
|
||||||
"hidden": {
|
*get_base_inputs_types(),
|
||||||
"auth_token": "AUTH_TOKEN_COMFY_ORG",
|
],
|
||||||
"comfy_api_key": "API_KEY_COMFY_ORG",
|
outputs=[comfy_io.Video.Output()],
|
||||||
"unique_id": "UNIQUE_ID",
|
hidden=[
|
||||||
},
|
comfy_io.Hidden.auth_token_comfy_org,
|
||||||
}
|
comfy_io.Hidden.api_key_comfy_org,
|
||||||
|
comfy_io.Hidden.unique_id,
|
||||||
|
],
|
||||||
|
is_api_node=True,
|
||||||
|
)
|
||||||
|
|
||||||
DESCRIPTION = "Generate a video by combining your first and last frame. Upload two images to define the start and end points, and let the AI create a smooth transition between them."
|
@classmethod
|
||||||
|
async def execute(
|
||||||
async def api_call(
|
cls,
|
||||||
self,
|
|
||||||
image_start: torch.Tensor,
|
image_start: torch.Tensor,
|
||||||
image_end: torch.Tensor,
|
image_end: torch.Tensor,
|
||||||
prompt_text: str,
|
prompt_text: str,
|
||||||
@ -727,15 +698,15 @@ class PikaStartEndFrameNode2_2(PikaNodeBase):
|
|||||||
seed: int,
|
seed: int,
|
||||||
resolution: str,
|
resolution: str,
|
||||||
duration: int,
|
duration: int,
|
||||||
unique_id: str,
|
) -> comfy_io.NodeOutput:
|
||||||
**kwargs,
|
|
||||||
) -> tuple[VideoFromFile]:
|
|
||||||
|
|
||||||
pika_files = [
|
pika_files = [
|
||||||
("keyFrames", ("image_start.png", tensor_to_bytesio(image_start), "image/png")),
|
("keyFrames", ("image_start.png", tensor_to_bytesio(image_start), "image/png")),
|
||||||
("keyFrames", ("image_end.png", tensor_to_bytesio(image_end), "image/png")),
|
("keyFrames", ("image_end.png", tensor_to_bytesio(image_end), "image/png")),
|
||||||
]
|
]
|
||||||
|
auth = {
|
||||||
|
"auth_token": cls.hidden.auth_token_comfy_org,
|
||||||
|
"comfy_api_key": cls.hidden.api_key_comfy_org,
|
||||||
|
}
|
||||||
initial_operation = SynchronousOperation(
|
initial_operation = SynchronousOperation(
|
||||||
endpoint=ApiEndpoint(
|
endpoint=ApiEndpoint(
|
||||||
path=PATH_PIKAFRAMES,
|
path=PATH_PIKAFRAMES,
|
||||||
@ -752,28 +723,24 @@ class PikaStartEndFrameNode2_2(PikaNodeBase):
|
|||||||
),
|
),
|
||||||
files=pika_files,
|
files=pika_files,
|
||||||
content_type="multipart/form-data",
|
content_type="multipart/form-data",
|
||||||
auth_kwargs=kwargs,
|
auth_kwargs=auth,
|
||||||
)
|
)
|
||||||
|
return await execute_task(initial_operation, auth_kwargs=auth, node_id=cls.hidden.unique_id)
|
||||||
return await self.execute_task(initial_operation, auth_kwargs=kwargs, node_id=unique_id)
|
|
||||||
|
|
||||||
|
|
||||||
NODE_CLASS_MAPPINGS = {
|
class PikaApiNodesExtension(ComfyExtension):
|
||||||
"PikaImageToVideoNode2_2": PikaImageToVideoV2_2,
|
@override
|
||||||
"PikaTextToVideoNode2_2": PikaTextToVideoNodeV2_2,
|
async def get_node_list(self) -> list[type[comfy_io.ComfyNode]]:
|
||||||
"PikaScenesV2_2": PikaScenesV2_2,
|
return [
|
||||||
"Pikadditions": PikAdditionsNode,
|
PikaImageToVideoV2_2,
|
||||||
"Pikaswaps": PikaSwapsNode,
|
PikaTextToVideoNodeV2_2,
|
||||||
"Pikaffects": PikaffectsNode,
|
PikaScenesV2_2,
|
||||||
"PikaStartEndFrameNode2_2": PikaStartEndFrameNode2_2,
|
PikAdditionsNode,
|
||||||
}
|
PikaSwapsNode,
|
||||||
|
PikaffectsNode,
|
||||||
|
PikaStartEndFrameNode2_2,
|
||||||
|
]
|
||||||
|
|
||||||
NODE_DISPLAY_NAME_MAPPINGS = {
|
|
||||||
"PikaImageToVideoNode2_2": "Pika Image to Video",
|
async def comfy_entrypoint() -> PikaApiNodesExtension:
|
||||||
"PikaTextToVideoNode2_2": "Pika Text to Video",
|
return PikaApiNodesExtension()
|
||||||
"PikaScenesV2_2": "Pika Scenes (Video Image Composition)",
|
|
||||||
"Pikadditions": "Pikadditions (Video Object Insertion)",
|
|
||||||
"Pikaswaps": "Pika Swaps (Video Object Replacement)",
|
|
||||||
"Pikaffects": "Pikaffects (Video Effects)",
|
|
||||||
"PikaStartEndFrameNode2_2": "Pika Start and End Frame to Video",
|
|
||||||
}
|
|
||||||
|
|||||||
@ -146,7 +146,7 @@ class PixverseTextToVideoNode(comfy_io.ComfyNode):
|
|||||||
comfy_io.String.Input(
|
comfy_io.String.Input(
|
||||||
"negative_prompt",
|
"negative_prompt",
|
||||||
default="",
|
default="",
|
||||||
force_input=True,
|
multiline=True,
|
||||||
tooltip="An optional text description of undesired elements on an image.",
|
tooltip="An optional text description of undesired elements on an image.",
|
||||||
optional=True,
|
optional=True,
|
||||||
),
|
),
|
||||||
@ -284,7 +284,7 @@ class PixverseImageToVideoNode(comfy_io.ComfyNode):
|
|||||||
comfy_io.String.Input(
|
comfy_io.String.Input(
|
||||||
"negative_prompt",
|
"negative_prompt",
|
||||||
default="",
|
default="",
|
||||||
force_input=True,
|
multiline=True,
|
||||||
tooltip="An optional text description of undesired elements on an image.",
|
tooltip="An optional text description of undesired elements on an image.",
|
||||||
optional=True,
|
optional=True,
|
||||||
),
|
),
|
||||||
@ -425,7 +425,7 @@ class PixverseTransitionVideoNode(comfy_io.ComfyNode):
|
|||||||
comfy_io.String.Input(
|
comfy_io.String.Input(
|
||||||
"negative_prompt",
|
"negative_prompt",
|
||||||
default="",
|
default="",
|
||||||
force_input=True,
|
multiline=True,
|
||||||
tooltip="An optional text description of undesired elements on an image.",
|
tooltip="An optional text description of undesired elements on an image.",
|
||||||
optional=True,
|
optional=True,
|
||||||
),
|
),
|
||||||
|
|||||||
@ -107,7 +107,7 @@ def recraft_multipart_parser(data, parent_key=None, formatter: callable=None, co
|
|||||||
# if list already exists exists, just extend list with data
|
# if list already exists exists, just extend list with data
|
||||||
for check_list in lists_to_check:
|
for check_list in lists_to_check:
|
||||||
for conv_tuple in check_list:
|
for conv_tuple in check_list:
|
||||||
if conv_tuple[0] == parent_key and type(conv_tuple[1]) is list:
|
if conv_tuple[0] == parent_key and isinstance(conv_tuple[1], list):
|
||||||
conv_tuple[1].append(formatter(data))
|
conv_tuple[1].append(formatter(data))
|
||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
@ -119,7 +119,7 @@ def recraft_multipart_parser(data, parent_key=None, formatter: callable=None, co
|
|||||||
if formatter is None:
|
if formatter is None:
|
||||||
formatter = lambda v: v # Multipart representation of value
|
formatter = lambda v: v # Multipart representation of value
|
||||||
|
|
||||||
if type(data) is not dict:
|
if not isinstance(data, dict):
|
||||||
# if list already exists exists, just extend list with data
|
# if list already exists exists, just extend list with data
|
||||||
added = handle_converted_lists(data, parent_key, converted_to_check)
|
added = handle_converted_lists(data, parent_key, converted_to_check)
|
||||||
if added:
|
if added:
|
||||||
@ -136,9 +136,9 @@ def recraft_multipart_parser(data, parent_key=None, formatter: callable=None, co
|
|||||||
|
|
||||||
for key, value in data.items():
|
for key, value in data.items():
|
||||||
current_key = key if parent_key is None else f"{parent_key}[{key}]"
|
current_key = key if parent_key is None else f"{parent_key}[{key}]"
|
||||||
if type(value) is dict:
|
if isinstance(value, dict):
|
||||||
converted.extend(recraft_multipart_parser(value, current_key, formatter, next_check).items())
|
converted.extend(recraft_multipart_parser(value, current_key, formatter, next_check).items())
|
||||||
elif type(value) is list:
|
elif isinstance(value, list):
|
||||||
for ind, list_value in enumerate(value):
|
for ind, list_value in enumerate(value):
|
||||||
iter_key = f"{current_key}[]"
|
iter_key = f"{current_key}[]"
|
||||||
converted.extend(recraft_multipart_parser(list_value, iter_key, formatter, next_check, is_list=True).items())
|
converted.extend(recraft_multipart_parser(list_value, iter_key, formatter, next_check, is_list=True).items())
|
||||||
|
|||||||
@ -360,7 +360,7 @@ class RecordAudio:
|
|||||||
def load(self, audio):
|
def load(self, audio):
|
||||||
audio_path = folder_paths.get_annotated_filepath(audio)
|
audio_path = folder_paths.get_annotated_filepath(audio)
|
||||||
|
|
||||||
waveform, sample_rate = torchaudio.load(audio_path)
|
waveform, sample_rate = load(audio_path)
|
||||||
audio = {"waveform": waveform.unsqueeze(0), "sample_rate": sample_rate}
|
audio = {"waveform": waveform.unsqueeze(0), "sample_rate": sample_rate}
|
||||||
return (audio, )
|
return (audio, )
|
||||||
|
|
||||||
|
|||||||
@ -1,3 +1,3 @@
|
|||||||
# This file is automatically generated by the build process when version is
|
# This file is automatically generated by the build process when version is
|
||||||
# updated in pyproject.toml.
|
# updated in pyproject.toml.
|
||||||
__version__ = "0.3.62"
|
__version__ = "0.3.63"
|
||||||
|
|||||||
@ -1,6 +1,6 @@
|
|||||||
[project]
|
[project]
|
||||||
name = "ComfyUI"
|
name = "ComfyUI"
|
||||||
version = "0.3.62"
|
version = "0.3.63"
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
license = { file = "LICENSE" }
|
license = { file = "LICENSE" }
|
||||||
requires-python = ">=3.9"
|
requires-python = ">=3.9"
|
||||||
@ -57,18 +57,14 @@ messages_control.disable = [
|
|||||||
"redefined-builtin",
|
"redefined-builtin",
|
||||||
"unnecessary-lambda",
|
"unnecessary-lambda",
|
||||||
"dangerous-default-value",
|
"dangerous-default-value",
|
||||||
|
"invalid-overridden-method",
|
||||||
# next warnings should be fixed in future
|
# next warnings should be fixed in future
|
||||||
"bad-classmethod-argument", # Class method should have 'cls' as first argument
|
"bad-classmethod-argument", # Class method should have 'cls' as first argument
|
||||||
"wrong-import-order", # Standard imports should be placed before third party imports
|
"wrong-import-order", # Standard imports should be placed before third party imports
|
||||||
"logging-fstring-interpolation", # Use lazy % formatting in logging functions
|
"logging-fstring-interpolation", # Use lazy % formatting in logging functions
|
||||||
"ungrouped-imports",
|
"ungrouped-imports",
|
||||||
"unnecessary-pass",
|
"unnecessary-pass",
|
||||||
"unidiomatic-typecheck",
|
|
||||||
"unnecessary-lambda-assignment",
|
"unnecessary-lambda-assignment",
|
||||||
"no-else-return",
|
"no-else-return",
|
||||||
"no-else-raise",
|
|
||||||
"invalid-overridden-method",
|
|
||||||
"unused-variable",
|
"unused-variable",
|
||||||
"pointless-string-statement",
|
|
||||||
"redefined-outer-name",
|
|
||||||
]
|
]
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
comfyui-frontend-package==1.27.7
|
comfyui-frontend-package==1.27.7
|
||||||
comfyui-workflow-templates==0.1.91
|
comfyui-workflow-templates==0.1.93
|
||||||
comfyui-embedded-docs==0.2.6
|
comfyui-embedded-docs==0.2.6
|
||||||
comfyui_manager==4.0.2
|
comfyui_manager==4.0.2
|
||||||
torch
|
torch
|
||||||
@ -26,6 +26,5 @@ av>=14.2.0
|
|||||||
#non essential dependencies:
|
#non essential dependencies:
|
||||||
kornia>=0.7.1
|
kornia>=0.7.1
|
||||||
spandrel
|
spandrel
|
||||||
soundfile
|
|
||||||
pydantic~=2.0
|
pydantic~=2.0
|
||||||
pydantic-settings~=2.0
|
pydantic-settings~=2.0
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user