mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2026-05-27 17:37:39 +08:00
Compare commits
12 Commits
3afc420ce5
...
c949fa3042
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c949fa3042 | ||
|
|
cf9cbec596 | ||
|
|
96f1cee9f5 | ||
|
|
97f58baaaf | ||
|
|
e8e8fee224 | ||
|
|
e9c311b245 | ||
|
|
e6e0936128 | ||
|
|
b633244635 | ||
|
|
38ecad8f8a | ||
|
|
a7d82baa06 | ||
|
|
d10fc2d652 | ||
|
|
a164c82913 |
@ -1,2 +1,2 @@
|
||||
# Admins
|
||||
* @comfyanonymous @kosinkadink @guill
|
||||
* @comfyanonymous @kosinkadink @guill @alexisrolland @rattus128
|
||||
|
||||
@ -224,6 +224,7 @@ class Flux2(LatentFormat):
|
||||
|
||||
self.latent_rgb_factors_bias = [-0.0329, -0.0718, -0.0851]
|
||||
self.latent_rgb_factors_reshape = lambda t: t.reshape(t.shape[0], 32, 2, 2, t.shape[-2], t.shape[-1]).permute(0, 1, 4, 2, 5, 3).reshape(t.shape[0], 32, t.shape[-2] * 2, t.shape[-1] * 2)
|
||||
self.taesd_decoder_name = "taef2_decoder"
|
||||
|
||||
def process_in(self, latent):
|
||||
return latent
|
||||
|
||||
@ -342,6 +342,12 @@ def model_lora_keys_unet(model, key_map={}):
|
||||
key_map["base_model.model.{}".format(key_lora)] = k # Official base model loras
|
||||
key_map["lycoris_{}".format(key_lora.replace(".", "_"))] = k # LyCORIS/LoKR format
|
||||
|
||||
if isinstance(model, comfy.model_base.ErnieImage):
|
||||
for k in sdk:
|
||||
if k.startswith("diffusion_model.") and k.endswith(".weight"):
|
||||
key_lora = k[len("diffusion_model."):-len(".weight")]
|
||||
key_map["transformer.{}".format(key_lora)] = k
|
||||
|
||||
return key_map
|
||||
|
||||
|
||||
|
||||
@ -479,7 +479,10 @@ class VAE:
|
||||
encoder_config={'target': "comfy.ldm.modules.diffusionmodules.model.Encoder", 'params': encoder_config},
|
||||
decoder_config={'target': "comfy.ldm.modules.temporal_ae.VideoDecoder", 'params': decoder_config})
|
||||
elif "taesd_decoder.1.weight" in sd:
|
||||
self.latent_channels = sd["taesd_decoder.1.weight"].shape[1]
|
||||
if isinstance(metadata, dict) and "tae_latent_channels" in metadata:
|
||||
self.latent_channels = metadata["tae_latent_channels"]
|
||||
else:
|
||||
self.latent_channels = sd["taesd_decoder.1.weight"].shape[1]
|
||||
self.first_stage_model = comfy.taesd.taesd.TAESD(latent_channels=self.latent_channels)
|
||||
elif "vquantizer.codebook.weight" in sd: #VQGan: stage a of stable cascade
|
||||
self.first_stage_model = StageA()
|
||||
|
||||
@ -1879,6 +1879,86 @@ class CogVideoX_I2V(CogVideoX_T2V):
|
||||
out = model_base.CogVideoX(self, image_to_video=True, device=device)
|
||||
return out
|
||||
|
||||
models = [LotusD, Stable_Zero123, SD15_instructpix2pix, SD15, SD20, SD21UnclipL, SD21UnclipH, SDXL_instructpix2pix, SDXLRefiner, SDXL, SSD1B, KOALA_700M, KOALA_1B, Segmind_Vega, SD_X4Upscaler, Stable_Cascade_C, Stable_Cascade_B, SV3D_u, SV3D_p, SD3, StableAudio, AuraFlow, PixArtAlpha, PixArtSigma, HunyuanDiT, HunyuanDiT1, FluxInpaint, Flux, LongCatImage, FluxSchnell, GenmoMochi, LTXV, LTXAV, HunyuanVideo15_SR_Distilled, HunyuanVideo15, HunyuanImage21Refiner, HunyuanImage21, HunyuanVideoSkyreelsI2V, HunyuanVideoI2V, HunyuanVideo, CosmosT2V, CosmosI2V, CosmosT2IPredict2, CosmosI2VPredict2, ZImagePixelSpace, ZImage, Lumina2, WAN22_T2V, WAN21_T2V, WAN21_I2V, WAN21_FunControl2V, WAN21_Vace, WAN21_Camera, WAN22_Camera, WAN22_S2V, WAN21_HuMo, WAN22_Animate, WAN21_FlowRVS, WAN21_SCAIL, Hunyuan3Dv2mini, Hunyuan3Dv2, Hunyuan3Dv2_1, HiDream, Chroma, ChromaRadiance, ACEStep, ACEStep15, Omnigen2, QwenImage, Flux2, Kandinsky5Image, Kandinsky5, Anima, RT_DETR_v4, ErnieImage, SAM3, SAM31, CogVideoX_I2V, CogVideoX_T2V]
|
||||
|
||||
models += [SVD_img2vid]
|
||||
models = [
|
||||
LotusD,
|
||||
Stable_Zero123,
|
||||
SD15_instructpix2pix,
|
||||
SD15,
|
||||
SD20,
|
||||
SD21UnclipL,
|
||||
SD21UnclipH,
|
||||
SDXL_instructpix2pix,
|
||||
SDXLRefiner,
|
||||
SDXL,
|
||||
SSD1B,
|
||||
KOALA_700M,
|
||||
KOALA_1B,
|
||||
Segmind_Vega,
|
||||
SD_X4Upscaler,
|
||||
Stable_Cascade_C,
|
||||
Stable_Cascade_B,
|
||||
SV3D_u,
|
||||
SV3D_p,
|
||||
SD3,
|
||||
StableAudio,
|
||||
AuraFlow,
|
||||
PixArtAlpha,
|
||||
PixArtSigma,
|
||||
HunyuanDiT,
|
||||
HunyuanDiT1,
|
||||
FluxInpaint,
|
||||
Flux,
|
||||
LongCatImage,
|
||||
FluxSchnell,
|
||||
GenmoMochi,
|
||||
LTXV,
|
||||
LTXAV,
|
||||
HunyuanVideo15_SR_Distilled,
|
||||
HunyuanVideo15,
|
||||
HunyuanImage21Refiner,
|
||||
HunyuanImage21,
|
||||
HunyuanVideoSkyreelsI2V,
|
||||
HunyuanVideoI2V,
|
||||
HunyuanVideo,
|
||||
CosmosT2V,
|
||||
CosmosI2V,
|
||||
CosmosT2IPredict2,
|
||||
CosmosI2VPredict2,
|
||||
ZImagePixelSpace,
|
||||
ZImage,
|
||||
Lumina2,
|
||||
WAN22_T2V,
|
||||
WAN21_T2V,
|
||||
WAN21_I2V,
|
||||
WAN21_FunControl2V,
|
||||
WAN21_Vace,
|
||||
WAN21_Camera,
|
||||
WAN22_Camera,
|
||||
WAN22_S2V,
|
||||
WAN21_HuMo,
|
||||
WAN22_Animate,
|
||||
WAN21_FlowRVS,
|
||||
WAN21_SCAIL,
|
||||
Hunyuan3Dv2mini,
|
||||
Hunyuan3Dv2,
|
||||
Hunyuan3Dv2_1,
|
||||
HiDream,
|
||||
Chroma,
|
||||
ChromaRadiance,
|
||||
ACEStep,
|
||||
ACEStep15,
|
||||
Omnigen2,
|
||||
QwenImage,
|
||||
Flux2,
|
||||
Kandinsky5Image,
|
||||
Kandinsky5,
|
||||
Anima,
|
||||
RT_DETR_v4,
|
||||
ErnieImage,
|
||||
SAM3,
|
||||
SAM31,
|
||||
CogVideoX_I2V,
|
||||
CogVideoX_T2V,
|
||||
SVD_img2vid,
|
||||
]
|
||||
|
||||
@ -17,32 +17,79 @@ class Clamp(nn.Module):
|
||||
return torch.tanh(x / 3) * 3
|
||||
|
||||
class Block(nn.Module):
|
||||
def __init__(self, n_in, n_out):
|
||||
def __init__(self, n_in: int, n_out: int, use_midblock_gn: bool = False):
|
||||
super().__init__()
|
||||
self.conv = nn.Sequential(conv(n_in, n_out), nn.ReLU(), conv(n_out, n_out), nn.ReLU(), conv(n_out, n_out))
|
||||
self.skip = comfy.ops.disable_weight_init.Conv2d(n_in, n_out, 1, bias=False) if n_in != n_out else nn.Identity()
|
||||
self.fuse = nn.ReLU()
|
||||
def forward(self, x):
|
||||
if not use_midblock_gn:
|
||||
self.pool = None
|
||||
return
|
||||
n_gn = n_in * 4
|
||||
self.pool = nn.Sequential(
|
||||
comfy.ops.disable_weight_init.Conv2d(n_in, n_gn, 1, bias=False),
|
||||
comfy.ops.disable_weight_init.GroupNorm(4, n_gn),
|
||||
nn.ReLU(inplace=True),
|
||||
comfy.ops.disable_weight_init.Conv2d(n_gn, n_in, 1, bias=False),
|
||||
)
|
||||
|
||||
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
||||
if self.pool is not None:
|
||||
x = x + self.pool(x)
|
||||
return self.fuse(self.conv(x) + self.skip(x))
|
||||
|
||||
def Encoder(latent_channels=4):
|
||||
return nn.Sequential(
|
||||
conv(3, 64), Block(64, 64),
|
||||
conv(64, 64, stride=2, bias=False), Block(64, 64), Block(64, 64), Block(64, 64),
|
||||
conv(64, 64, stride=2, bias=False), Block(64, 64), Block(64, 64), Block(64, 64),
|
||||
conv(64, 64, stride=2, bias=False), Block(64, 64), Block(64, 64), Block(64, 64),
|
||||
conv(64, latent_channels),
|
||||
)
|
||||
class Encoder(nn.Sequential):
|
||||
def __init__(self, latent_channels: int = 4, use_gn: bool = False):
|
||||
super().__init__(
|
||||
conv(3, 64), Block(64, 64),
|
||||
conv(64, 64, stride=2, bias=False), Block(64, 64), Block(64, 64), Block(64, 64),
|
||||
conv(64, 64, stride=2, bias=False), Block(64, 64), Block(64, 64), Block(64, 64),
|
||||
conv(64, 64, stride=2, bias=False), Block(64, 64, use_gn), Block(64, 64, use_gn), Block(64, 64, use_gn),
|
||||
conv(64, latent_channels),
|
||||
)
|
||||
|
||||
class Decoder(nn.Sequential):
|
||||
def __init__(self, latent_channels: int = 4, use_gn: bool = False):
|
||||
super().__init__(
|
||||
Clamp(), conv(latent_channels, 64), nn.ReLU(),
|
||||
Block(64, 64, use_gn), Block(64, 64, use_gn), Block(64, 64, use_gn), nn.Upsample(scale_factor=2), conv(64, 64, bias=False),
|
||||
Block(64, 64), Block(64, 64), Block(64, 64), nn.Upsample(scale_factor=2), conv(64, 64, bias=False),
|
||||
Block(64, 64), Block(64, 64), Block(64, 64), nn.Upsample(scale_factor=2), conv(64, 64, bias=False),
|
||||
Block(64, 64), conv(64, 3),
|
||||
)
|
||||
|
||||
class DecoderFlux2(Decoder):
|
||||
def __init__(self, latent_channels: int = 128, use_gn: bool = True):
|
||||
if latent_channels != 128 or not use_gn:
|
||||
raise ValueError("Unexpected parameters for Flux2 TAE module")
|
||||
super().__init__(latent_channels=32, use_gn=True)
|
||||
|
||||
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
||||
B, C, H, W = x.shape
|
||||
x = (
|
||||
x
|
||||
.reshape(B, 32, 2, 2, H, W)
|
||||
.permute(0, 1, 4, 2, 5, 3)
|
||||
.reshape(B, 32, H * 2, W * 2)
|
||||
)
|
||||
return super().forward(x)
|
||||
|
||||
class EncoderFlux2(Encoder):
|
||||
def __init__(self, latent_channels: int = 128, use_gn: bool = True):
|
||||
if latent_channels != 128 or not use_gn:
|
||||
raise ValueError("Unexpected parameters for Flux2 TAE module")
|
||||
super().__init__(latent_channels=32, use_gn=True)
|
||||
|
||||
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
||||
result = super().forward(x)
|
||||
B, C, H, W = result.shape
|
||||
return (
|
||||
result
|
||||
.reshape(B, C, H // 2, 2, W // 2, 2)
|
||||
.permute(0, 1, 3, 5, 2, 4)
|
||||
.reshape(B, 128, H // 2, W // 2)
|
||||
)
|
||||
|
||||
def Decoder(latent_channels=4):
|
||||
return nn.Sequential(
|
||||
Clamp(), conv(latent_channels, 64), nn.ReLU(),
|
||||
Block(64, 64), Block(64, 64), Block(64, 64), nn.Upsample(scale_factor=2), conv(64, 64, bias=False),
|
||||
Block(64, 64), Block(64, 64), Block(64, 64), nn.Upsample(scale_factor=2), conv(64, 64, bias=False),
|
||||
Block(64, 64), Block(64, 64), Block(64, 64), nn.Upsample(scale_factor=2), conv(64, 64, bias=False),
|
||||
Block(64, 64), conv(64, 3),
|
||||
)
|
||||
|
||||
class TAESD(nn.Module):
|
||||
latent_magnitude = 3
|
||||
@ -51,8 +98,15 @@ class TAESD(nn.Module):
|
||||
def __init__(self, encoder_path=None, decoder_path=None, latent_channels=4):
|
||||
"""Initialize pretrained TAESD on the given device from the given checkpoints."""
|
||||
super().__init__()
|
||||
self.taesd_encoder = Encoder(latent_channels=latent_channels)
|
||||
self.taesd_decoder = Decoder(latent_channels=latent_channels)
|
||||
if latent_channels == 128:
|
||||
encoder_class = EncoderFlux2
|
||||
decoder_class = DecoderFlux2
|
||||
else:
|
||||
encoder_class = Encoder
|
||||
decoder_class = Decoder
|
||||
self.taesd_encoder = encoder_class(latent_channels=latent_channels)
|
||||
self.taesd_decoder = decoder_class(latent_channels=latent_channels)
|
||||
|
||||
self.vae_scale = torch.nn.Parameter(torch.tensor(1.0))
|
||||
self.vae_shift = torch.nn.Parameter(torch.tensor(0.0))
|
||||
if encoder_path is not None:
|
||||
@ -61,19 +115,19 @@ class TAESD(nn.Module):
|
||||
self.taesd_decoder.load_state_dict(comfy.utils.load_torch_file(decoder_path, safe_load=True))
|
||||
|
||||
@staticmethod
|
||||
def scale_latents(x):
|
||||
def scale_latents(x: torch.Tensor) -> torch.Tensor:
|
||||
"""raw latents -> [0, 1]"""
|
||||
return x.div(2 * TAESD.latent_magnitude).add(TAESD.latent_shift).clamp(0, 1)
|
||||
|
||||
@staticmethod
|
||||
def unscale_latents(x):
|
||||
def unscale_latents(x: torch.Tensor) -> torch.Tensor:
|
||||
"""[0, 1] -> raw latents"""
|
||||
return x.sub(TAESD.latent_shift).mul(2 * TAESD.latent_magnitude)
|
||||
|
||||
def decode(self, x):
|
||||
def decode(self, x: torch.Tensor) -> torch.Tensor:
|
||||
x_sample = self.taesd_decoder((x - self.vae_shift) * self.vae_scale)
|
||||
x_sample = x_sample.sub(0.5).mul(2)
|
||||
return x_sample
|
||||
|
||||
def encode(self, x):
|
||||
def encode(self, x: torch.Tensor) -> torch.Tensor:
|
||||
return (self.taesd_encoder(x * 0.5 + 0.5) / self.vae_scale) + self.vae_shift
|
||||
|
||||
@ -251,6 +251,7 @@ class VideoFromFile(VideoInput):
|
||||
container.seek(start_pts, stream=video_stream)
|
||||
|
||||
image_format = 'gbrpf32le'
|
||||
process_image_format = lambda a: a
|
||||
audio = None
|
||||
|
||||
streams = [video_stream]
|
||||
@ -283,11 +284,25 @@ class VideoFromFile(VideoInput):
|
||||
break
|
||||
|
||||
if not checked_alpha:
|
||||
alpha_channel = False
|
||||
for comp in frame.format.components:
|
||||
if comp.is_alpha or frame.format.name == "pal8":
|
||||
alphas = []
|
||||
image_format = 'gbrapf32le'
|
||||
alpha_channel = True
|
||||
break
|
||||
if frame.format.name in ("yuvj420p", "yuvj422p", "yuvj444p", "rgb24", "rgba", "pal8"):
|
||||
process_image_format = lambda a: a.float() / 255.0
|
||||
if alpha_channel:
|
||||
image_format = 'rgba'
|
||||
else:
|
||||
image_format = 'rgb24'
|
||||
else:
|
||||
process_image_format = lambda a: a
|
||||
if alpha_channel:
|
||||
image_format = 'gbrapf32le'
|
||||
else:
|
||||
image_format = 'gbrpf32le'
|
||||
|
||||
checked_alpha = True
|
||||
|
||||
img = frame.to_ndarray(format=image_format) # shape: (H, W, 4)
|
||||
@ -323,9 +338,9 @@ class VideoFromFile(VideoInput):
|
||||
else:
|
||||
audio_frames.append(frame.to_ndarray())
|
||||
|
||||
images = torch.stack(frames) if len(frames) > 0 else torch.zeros(0, 0, 0, 3)
|
||||
images = process_image_format(torch.stack(frames)) if len(frames) > 0 else torch.zeros(0, 0, 0, 3)
|
||||
if alphas is not None:
|
||||
alphas = torch.stack(alphas) if len(alphas) > 0 else torch.zeros(0, 0, 0, 1)
|
||||
alphas = process_image_format(torch.stack(alphas)) if len(alphas) > 0 else torch.zeros(0, 0, 0, 1)
|
||||
|
||||
# Get frame rate
|
||||
frame_rate = Fraction(video_stream.average_rate) if video_stream.average_rate else Fraction(1)
|
||||
|
||||
@ -157,6 +157,11 @@ class SeedanceCreateAssetResponse(BaseModel):
|
||||
asset_id: str = Field(...)
|
||||
|
||||
|
||||
class SeedanceVirtualLibraryCreateAssetRequest(BaseModel):
|
||||
url: str = Field(..., description="Publicly accessible URL of the image asset to upload.")
|
||||
hash: str = Field(..., description="Dedup key. Re-submitting the same hash returns the existing asset id.")
|
||||
|
||||
|
||||
# Dollars per 1K tokens, keyed by (model_id, has_video_input).
|
||||
SEEDANCE2_PRICE_PER_1K_TOKENS = {
|
||||
("dreamina-seedance-2-0-260128", False): 0.007,
|
||||
|
||||
@ -1,3 +1,4 @@
|
||||
import hashlib
|
||||
import logging
|
||||
import math
|
||||
import re
|
||||
@ -20,6 +21,7 @@ from comfy_api_nodes.apis.bytedance import (
|
||||
SeedanceCreateAssetResponse,
|
||||
SeedanceCreateVisualValidateSessionResponse,
|
||||
SeedanceGetVisualValidateSessionResponse,
|
||||
SeedanceVirtualLibraryCreateAssetRequest,
|
||||
Seedream4Options,
|
||||
Seedream4TaskCreationRequest,
|
||||
TaskAudioContent,
|
||||
@ -271,6 +273,30 @@ async def _wait_for_asset_active(cls: type[IO.ComfyNode], asset_id: str, group_i
|
||||
)
|
||||
|
||||
|
||||
async def _seedance_virtual_library_upload_image_asset(
|
||||
cls: type[IO.ComfyNode],
|
||||
image: torch.Tensor,
|
||||
*,
|
||||
wait_label: str = "Uploading image",
|
||||
) -> str:
|
||||
"""Upload an image into the caller's per-customer Seedance virtual library."""
|
||||
public_url = await upload_image_to_comfyapi(cls, image, wait_label=wait_label)
|
||||
normalized = image.detach().cpu().contiguous().to(torch.float32)
|
||||
digest = hashlib.sha256()
|
||||
digest.update(str(tuple(normalized.shape)).encode("utf-8"))
|
||||
digest.update(b"\0")
|
||||
digest.update(normalized.numpy().tobytes())
|
||||
image_hash = digest.hexdigest()
|
||||
create_resp = await sync_op(
|
||||
cls,
|
||||
ApiEndpoint(path="/proxy/seedance/virtual-library/assets", method="POST"),
|
||||
response_model=SeedanceCreateAssetResponse,
|
||||
data=SeedanceVirtualLibraryCreateAssetRequest(url=public_url, hash=image_hash),
|
||||
)
|
||||
await _wait_for_asset_active(cls, create_resp.asset_id, group_id="virtual-library")
|
||||
return f"asset://{create_resp.asset_id}"
|
||||
|
||||
|
||||
def _seedance2_price_extractor(model_id: str, has_video_input: bool):
|
||||
"""Returns a price_extractor closure for Seedance 2.0 poll_op."""
|
||||
rate = SEEDANCE2_PRICE_PER_1K_TOKENS.get((model_id, has_video_input))
|
||||
@ -1507,7 +1533,9 @@ class ByteDance2FirstLastFrameNode(IO.ComfyNode):
|
||||
if first_frame_asset_id:
|
||||
first_frame_url = image_assets[first_frame_asset_id]
|
||||
else:
|
||||
first_frame_url = await upload_image_to_comfyapi(cls, first_frame, wait_label="Uploading first frame.")
|
||||
first_frame_url = await _seedance_virtual_library_upload_image_asset(
|
||||
cls, first_frame, wait_label="Uploading first frame."
|
||||
)
|
||||
|
||||
content: list[TaskTextContent | TaskImageContent] = [
|
||||
TaskTextContent(text=model["prompt"]),
|
||||
@ -1527,7 +1555,9 @@ class ByteDance2FirstLastFrameNode(IO.ComfyNode):
|
||||
content.append(
|
||||
TaskImageContent(
|
||||
image_url=TaskImageContentUrl(
|
||||
url=await upload_image_to_comfyapi(cls, last_frame, wait_label="Uploading last frame.")
|
||||
url=await _seedance_virtual_library_upload_image_asset(
|
||||
cls, last_frame, wait_label="Uploading last frame."
|
||||
)
|
||||
),
|
||||
role="last_frame",
|
||||
),
|
||||
@ -1805,9 +1835,9 @@ class ByteDance2ReferenceNode(IO.ComfyNode):
|
||||
content.append(
|
||||
TaskImageContent(
|
||||
image_url=TaskImageContentUrl(
|
||||
url=await upload_image_to_comfyapi(
|
||||
url=await _seedance_virtual_library_upload_image_asset(
|
||||
cls,
|
||||
image=reference_images[key],
|
||||
reference_images[key],
|
||||
wait_label=f"Uploading image {i}",
|
||||
),
|
||||
),
|
||||
|
||||
@ -415,8 +415,9 @@ class OpenAIGPTImage1(IO.ComfyNode):
|
||||
"1152x2048",
|
||||
"3840x2160",
|
||||
"2160x3840",
|
||||
"Custom",
|
||||
],
|
||||
tooltip="Image size",
|
||||
tooltip="Image size. Select 'Custom' to use the custom width and height (GPT Image 2 only).",
|
||||
optional=True,
|
||||
),
|
||||
IO.Int.Input(
|
||||
@ -445,6 +446,24 @@ class OpenAIGPTImage1(IO.ComfyNode):
|
||||
default="gpt-image-2",
|
||||
optional=True,
|
||||
),
|
||||
IO.Int.Input(
|
||||
"custom_width",
|
||||
default=1024,
|
||||
min=1024,
|
||||
max=3840,
|
||||
step=16,
|
||||
tooltip="Used only when `size` is 'Custom'. Must be a multiple of 16 (GPT Image 2 only).",
|
||||
optional=True,
|
||||
),
|
||||
IO.Int.Input(
|
||||
"custom_height",
|
||||
default=1024,
|
||||
min=1024,
|
||||
max=3840,
|
||||
step=16,
|
||||
tooltip="Used only when `size` is 'Custom'. Must be a multiple of 16 (GPT Image 2 only).",
|
||||
optional=True,
|
||||
),
|
||||
],
|
||||
outputs=[
|
||||
IO.Image.Output(),
|
||||
@ -471,9 +490,9 @@ class OpenAIGPTImage1(IO.ComfyNode):
|
||||
"high": [0.133, 0.22]
|
||||
},
|
||||
"gpt-image-2": {
|
||||
"low": [0.0048, 0.012],
|
||||
"medium": [0.041, 0.112],
|
||||
"high": [0.165, 0.43]
|
||||
"low": [0.0048, 0.019],
|
||||
"medium": [0.041, 0.168],
|
||||
"high": [0.165, 0.67]
|
||||
}
|
||||
};
|
||||
$range := $lookup($lookup($ranges, widgets.model), widgets.quality);
|
||||
@ -503,6 +522,8 @@ class OpenAIGPTImage1(IO.ComfyNode):
|
||||
mask: Input.Image | None = None,
|
||||
n: int = 1,
|
||||
size: str = "1024x1024",
|
||||
custom_width: int = 1024,
|
||||
custom_height: int = 1024,
|
||||
model: str = "gpt-image-1",
|
||||
) -> IO.NodeOutput:
|
||||
validate_string(prompt, strip_whitespace=False)
|
||||
@ -510,7 +531,25 @@ class OpenAIGPTImage1(IO.ComfyNode):
|
||||
if mask is not None and image is None:
|
||||
raise ValueError("Cannot use a mask without an input image")
|
||||
|
||||
if model in ("gpt-image-1", "gpt-image-1.5"):
|
||||
if size == "Custom":
|
||||
if model != "gpt-image-2":
|
||||
raise ValueError("Custom resolution is only supported by GPT Image 2 model")
|
||||
if custom_width % 16 != 0 or custom_height % 16 != 0:
|
||||
raise ValueError(f"Custom width and height must be multiples of 16, got {custom_width}x{custom_height}")
|
||||
if max(custom_width, custom_height) > 3840:
|
||||
raise ValueError(f"Custom resolution max edge must be <= 3840, got {custom_width}x{custom_height}")
|
||||
ratio = max(custom_width, custom_height) / min(custom_width, custom_height)
|
||||
if ratio > 3:
|
||||
raise ValueError(
|
||||
f"Custom resolution aspect ratio must not exceed 3:1, got {custom_width}x{custom_height}"
|
||||
)
|
||||
total_pixels = custom_width * custom_height
|
||||
if not 655_360 <= total_pixels <= 8_294_400:
|
||||
raise ValueError(
|
||||
f"Custom resolution total pixels must be between 655,360 and 8,294,400, got {total_pixels}"
|
||||
)
|
||||
size = f"{custom_width}x{custom_height}"
|
||||
elif model in ("gpt-image-1", "gpt-image-1.5"):
|
||||
if size not in ("auto", "1024x1024", "1024x1536", "1536x1024"):
|
||||
raise ValueError(f"Resolution {size} is only supported by GPT Image 2 model")
|
||||
|
||||
|
||||
53
nodes.py
53
nodes.py
@ -728,50 +728,26 @@ class LoraLoaderModelOnly(LoraLoader):
|
||||
|
||||
class VAELoader:
|
||||
video_taes = ["taehv", "lighttaew2_2", "lighttaew2_1", "lighttaehy1_5", "taeltx_2"]
|
||||
image_taes = ["taesd", "taesdxl", "taesd3", "taef1"]
|
||||
image_taes = ["taesd", "taesdxl", "taesd3", "taef1", "taef2"]
|
||||
|
||||
@staticmethod
|
||||
def vae_list(s):
|
||||
vaes = folder_paths.get_filename_list("vae")
|
||||
approx_vaes = folder_paths.get_filename_list("vae_approx")
|
||||
sdxl_taesd_enc = False
|
||||
sdxl_taesd_dec = False
|
||||
sd1_taesd_enc = False
|
||||
sd1_taesd_dec = False
|
||||
sd3_taesd_enc = False
|
||||
sd3_taesd_dec = False
|
||||
f1_taesd_enc = False
|
||||
f1_taesd_dec = False
|
||||
|
||||
have_img_encoder, have_img_decoder = set(), set()
|
||||
for v in approx_vaes:
|
||||
if v.startswith("taesd_decoder."):
|
||||
sd1_taesd_dec = True
|
||||
elif v.startswith("taesd_encoder."):
|
||||
sd1_taesd_enc = True
|
||||
elif v.startswith("taesdxl_decoder."):
|
||||
sdxl_taesd_dec = True
|
||||
elif v.startswith("taesdxl_encoder."):
|
||||
sdxl_taesd_enc = True
|
||||
elif v.startswith("taesd3_decoder."):
|
||||
sd3_taesd_dec = True
|
||||
elif v.startswith("taesd3_encoder."):
|
||||
sd3_taesd_enc = True
|
||||
elif v.startswith("taef1_encoder."):
|
||||
f1_taesd_dec = True
|
||||
elif v.startswith("taef1_decoder."):
|
||||
f1_taesd_enc = True
|
||||
else:
|
||||
parts = v.split("_", 1)
|
||||
if len(parts) != 2 or parts[0] not in s.image_taes:
|
||||
for tae in s.video_taes:
|
||||
if v.startswith(tae):
|
||||
vaes.append(v)
|
||||
|
||||
if sd1_taesd_dec and sd1_taesd_enc:
|
||||
vaes.append("taesd")
|
||||
if sdxl_taesd_dec and sdxl_taesd_enc:
|
||||
vaes.append("taesdxl")
|
||||
if sd3_taesd_dec and sd3_taesd_enc:
|
||||
vaes.append("taesd3")
|
||||
if f1_taesd_dec and f1_taesd_enc:
|
||||
vaes.append("taef1")
|
||||
break
|
||||
continue
|
||||
if parts[1].startswith("encoder."):
|
||||
have_img_encoder.add(parts[0])
|
||||
elif parts[1].startswith("decoder."):
|
||||
have_img_decoder.add(parts[0])
|
||||
vaes += [k for k in have_img_decoder if k in have_img_encoder]
|
||||
vaes.append("pixel_space")
|
||||
return vaes
|
||||
|
||||
@ -827,6 +803,11 @@ class VAELoader:
|
||||
else:
|
||||
vae_path = folder_paths.get_full_path_or_raise("vae", vae_name)
|
||||
sd, metadata = comfy.utils.load_torch_file(vae_path, return_metadata=True)
|
||||
if vae_name == "taef2":
|
||||
if metadata is None:
|
||||
metadata = {"tae_latent_channels": 128}
|
||||
else:
|
||||
metadata["tae_latent_channels"] = 128
|
||||
vae = comfy.sd.VAE(sd=sd, metadata=metadata)
|
||||
vae.throw_exception_if_invalid()
|
||||
return (vae,)
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
comfyui-frontend-package==1.42.15
|
||||
comfyui-workflow-templates==0.9.63
|
||||
comfyui-workflow-templates==0.9.65
|
||||
comfyui-embedded-docs==0.4.4
|
||||
torch
|
||||
torchsde
|
||||
@ -19,7 +19,7 @@ scipy
|
||||
tqdm
|
||||
psutil
|
||||
alembic
|
||||
SQLAlchemy>=2.0
|
||||
SQLAlchemy>=2.0.0
|
||||
filelock
|
||||
av>=14.2.0
|
||||
comfy-kitchen>=0.2.8
|
||||
|
||||
Loading…
Reference in New Issue
Block a user