Compare commits

..

4 Commits

Author SHA1 Message Date
Alexander Piskun
c7896dd75d
Merge 28d538ddf9 into 0a7d2ffd68 2026-05-12 12:20:26 +09:00
comfyanonymous
0a7d2ffd68
Support anima TE lora kohya format. (#13847) 2026-05-11 20:01:52 -07:00
rattus
20e439419c
model_patcher: Fix safetensors saving of fp8 (#13835)
Some checks are pending
Python Linting / Run Ruff (push) Waiting to run
Python Linting / Run Pylint (push) Waiting to run
Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.10, [self-hosted Linux], stable) (push) Waiting to run
Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.11, [self-hosted Linux], stable) (push) Waiting to run
Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.12, [self-hosted Linux], stable) (push) Waiting to run
Full Comfy CI Workflow Runs / test-unix-nightly (12.1, , linux, 3.11, [self-hosted Linux], nightly) (push) Waiting to run
Execution Tests / test (macos-latest) (push) Waiting to run
Execution Tests / test (ubuntu-latest) (push) Waiting to run
Execution Tests / test (windows-latest) (push) Waiting to run
Test server launches without errors / test (push) Waiting to run
Unit Tests / test (macos-latest) (push) Waiting to run
Unit Tests / test (ubuntu-latest) (push) Waiting to run
Unit Tests / test (windows-2022) (push) Waiting to run
This was missing proper weight scale casting in the saving path.
2026-05-11 12:48:10 -07:00
Alexander Piskun
428c323780
[Partner Nodes] new OpenAI Image node with DynamicCombo and Autogrow (#13838)
Some checks are pending
Python Linting / Run Ruff (push) Waiting to run
Python Linting / Run Pylint (push) Waiting to run
Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.10, [self-hosted Linux], stable) (push) Waiting to run
Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.11, [self-hosted Linux], stable) (push) Waiting to run
Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.12, [self-hosted Linux], stable) (push) Waiting to run
Full Comfy CI Workflow Runs / test-unix-nightly (12.1, , linux, 3.11, [self-hosted Linux], nightly) (push) Waiting to run
Execution Tests / test (macos-latest) (push) Waiting to run
Execution Tests / test (ubuntu-latest) (push) Waiting to run
Execution Tests / test (windows-latest) (push) Waiting to run
Test server launches without errors / test (push) Waiting to run
Unit Tests / test (macos-latest) (push) Waiting to run
Unit Tests / test (ubuntu-latest) (push) Waiting to run
Unit Tests / test (windows-2022) (push) Waiting to run
2026-05-11 06:19:35 -07:00
3 changed files with 373 additions and 3 deletions

View File

@ -97,12 +97,14 @@ def load_lora(lora, to_load, log_missing=True):
def model_lora_keys_clip(model, key_map={}):
sdk = model.state_dict().keys()
prefix_set = set()
for k in sdk:
if k.endswith(".weight"):
key_map["text_encoders.{}".format(k[:-len(".weight")])] = k #generic lora format without any weird key names
tp = k.find(".transformer.") #also map without wrapper prefix for composite text encoder models
if tp > 0 and not k.startswith("clip_"):
key_map["text_encoders.{}".format(k[tp + 1:-len(".weight")])] = k
prefix_set.add(k.split('.')[0])
text_model_lora_key = "lora_te_text_model_encoder_layers_{}_{}"
clip_l_present = False
@ -163,6 +165,13 @@ def model_lora_keys_clip(model, key_map={}):
lora_key = "lora_te1_{}".format(l_key.replace(".", "_"))
key_map[lora_key] = k
if len(prefix_set) == 1:
full_prefix = "{}.transformer.model.".format(next(iter(prefix_set))) # kohya anima and maybe other single TE models that use a single llama arch based te
for k in sdk:
if k.endswith(".weight"):
if k.startswith(full_prefix):
l_key = k[len(full_prefix):-len(".weight")]
key_map["lora_te_{}".format(l_key.replace(".", "_"))] = k
k = "clip_g.transformer.text_projection.weight"
if k in sdk:

View File

@ -242,6 +242,37 @@ class LazyCastingParam(torch.nn.Parameter):
return self.model.patch_weight_to_device(self.key, device_to=self.model.load_device, return_weight=True).to("cpu")
class LazyCastingQuantizedParam:
def __init__(self, model, key):
self.model = model
self.key = key
self.cpu_state_dict = None
def state_dict_tensor(self, state_dict_key):
if self.cpu_state_dict is None:
weight = self.model.patch_weight_to_device(self.key, device_to=self.model.load_device, return_weight=True)
self.cpu_state_dict = {k: v.to("cpu") for k, v in weight.state_dict(self.key).items()}
return self.cpu_state_dict[state_dict_key]
class LazyCastingParamPiece(torch.nn.Parameter):
def __new__(cls, caster, state_dict_key, tensor):
return super().__new__(cls, tensor)
def __init__(self, caster, state_dict_key, tensor):
self.caster = caster
self.state_dict_key = state_dict_key
@property
def device(self):
return CustomTorchDevice
def to(self, *args, **kwargs):
caster = self.caster
del self.caster
return caster.state_dict_tensor(self.state_dict_key)
class ModelPatcher:
def __init__(self, model, load_device, offload_device, size=0, weight_inplace_update=False):
self.size = size
@ -1463,20 +1494,37 @@ class ModelPatcher:
self.clear_cached_hook_weights()
def state_dict_for_saving(self, clip_state_dict=None, vae_state_dict=None, clip_vision_state_dict=None):
unet_state_dict = self.model.diffusion_model.state_dict()
for k, v in unet_state_dict.items():
original_state_dict = self.model.diffusion_model.state_dict()
unet_state_dict = {}
keys = list(original_state_dict)
while len(keys) > 0:
k = keys.pop(0)
v = original_state_dict[k]
op_keys = k.rsplit('.', 1)
if (len(op_keys) < 2) or op_keys[1] not in ["weight", "bias"]:
unet_state_dict[k] = v
continue
try:
op = comfy.utils.get_attr(self.model.diffusion_model, op_keys[0])
except:
unet_state_dict[k] = v
continue
if not op or not hasattr(op, "comfy_cast_weights") or \
(hasattr(op, "comfy_patched_weights") and op.comfy_patched_weights == True):
unet_state_dict[k] = v
continue
key = "diffusion_model." + k
unet_state_dict[k] = LazyCastingParam(self, key, comfy.utils.get_attr(self.model, key))
weight = comfy.utils.get_attr(self.model, key)
if isinstance(weight, QuantizedTensor) and k in original_state_dict:
qt_state_dict = weight.state_dict(k)
caster = LazyCastingQuantizedParam(self, key)
for group_key in (x for x in qt_state_dict if x in original_state_dict):
if group_key in keys:
keys.remove(group_key)
unet_state_dict.pop(group_key, "")
unet_state_dict[group_key] = LazyCastingParamPiece(caster, "diffusion_model." + group_key, original_state_dict[group_key])
continue
unet_state_dict[k] = LazyCastingParam(self, key, weight)
return self.model.state_dict_for_saving(unet_state_dict, clip_state_dict=clip_state_dict, vae_state_dict=vae_state_dict, clip_vision_state_dict=clip_vision_state_dict)
def __del__(self):

View File

@ -27,6 +27,7 @@ from comfy_api_nodes.util import (
ApiEndpoint,
download_url_to_bytesio,
downscale_image_tensor,
get_number_of_images,
poll_op,
sync_op,
tensor_to_base64_string,
@ -372,6 +373,7 @@ class OpenAIGPTImage1(IO.ComfyNode):
display_name="OpenAI GPT Image 2",
category="api node/image/OpenAI",
description="Generates images synchronously via OpenAI's GPT Image endpoint.",
is_deprecated=True,
inputs=[
IO.String.Input(
"prompt",
@ -640,6 +642,316 @@ class OpenAIGPTImage1(IO.ComfyNode):
return IO.NodeOutput(await validate_and_cast_response(response))
def _gpt_image_shared_inputs():
"""Inputs shared by all GPT Image models (quality + reference images + mask)."""
return [
IO.Combo.Input(
"quality",
default="low",
options=["low", "medium", "high"],
tooltip="Image quality, affects cost and generation time.",
),
IO.Autogrow.Input(
"images",
template=IO.Autogrow.TemplateNames(
IO.Image.Input("image"),
names=[f"image_{i}" for i in range(1, 17)],
min=0,
),
tooltip="Optional reference image(s) for image editing. Up to 16 images.",
),
IO.Mask.Input(
"mask",
optional=True,
tooltip="Optional mask for inpainting (white areas will be replaced). "
"Requires exactly one reference image.",
),
]
def _gpt_image_legacy_model_inputs():
"""Per-model widget set for legacy gpt-image-1 / gpt-image-1.5 (4 base sizes, transparent bg allowed)."""
return [
IO.Combo.Input(
"size",
default="auto",
options=["auto", "1024x1024", "1024x1536", "1536x1024"],
tooltip="Image size.",
),
IO.Combo.Input(
"background",
default="auto",
options=["auto", "opaque", "transparent"],
tooltip="Return image with or without background.",
),
*_gpt_image_shared_inputs(),
]
class OpenAIGPTImageNodeV2(IO.ComfyNode):
@classmethod
def define_schema(cls):
return IO.Schema(
node_id="OpenAIGPTImageNodeV2",
display_name="OpenAI GPT Image 2",
category="api node/image/OpenAI",
description="Generates images via OpenAI's GPT Image endpoint.",
inputs=[
IO.String.Input(
"prompt",
default="",
multiline=True,
tooltip="Text prompt for GPT Image",
),
IO.DynamicCombo.Input(
"model",
options=[
IO.DynamicCombo.Option(
"gpt-image-2",
[
IO.Combo.Input(
"size",
default="auto",
options=[
"auto",
"1024x1024",
"1024x1536",
"1536x1024",
"2048x2048",
"2048x1152",
"1152x2048",
"3840x2160",
"2160x3840",
"Custom",
],
tooltip="Image size. Select 'Custom' to use the custom width and height.",
),
IO.Int.Input(
"custom_width",
default=1024,
min=1024,
max=3840,
step=16,
tooltip="Used only when `size` is 'Custom'. Must be a multiple of 16.",
),
IO.Int.Input(
"custom_height",
default=1024,
min=1024,
max=3840,
step=16,
tooltip="Used only when `size` is 'Custom'. Must be a multiple of 16.",
),
IO.Combo.Input(
"background",
default="auto",
options=["auto", "opaque"],
tooltip="Return image with or without background.",
),
*_gpt_image_shared_inputs(),
],
),
IO.DynamicCombo.Option("gpt-image-1.5", _gpt_image_legacy_model_inputs()),
IO.DynamicCombo.Option("gpt-image-1", _gpt_image_legacy_model_inputs()),
],
),
IO.Int.Input(
"n",
default=1,
min=1,
max=8,
step=1,
tooltip="How many images to generate",
display_mode=IO.NumberDisplay.number,
),
IO.Int.Input(
"seed",
default=0,
min=0,
max=2147483647,
step=1,
display_mode=IO.NumberDisplay.number,
control_after_generate=True,
tooltip="not implemented yet in backend",
),
],
outputs=[IO.Image.Output()],
hidden=[
IO.Hidden.auth_token_comfy_org,
IO.Hidden.api_key_comfy_org,
IO.Hidden.unique_id,
],
is_api_node=True,
price_badge=IO.PriceBadge(
depends_on=IO.PriceBadgeDepends(widgets=["model", "model.quality", "n"]),
expr="""
(
$ranges := {
"gpt-image-1": {
"low": [0.011, 0.02],
"medium": [0.042, 0.07],
"high": [0.167, 0.25]
},
"gpt-image-1.5": {
"low": [0.009, 0.02],
"medium": [0.034, 0.062],
"high": [0.133, 0.22]
},
"gpt-image-2": {
"low": [0.0048, 0.019],
"medium": [0.041, 0.168],
"high": [0.165, 0.67]
}
};
$range := $lookup($lookup($ranges, widgets.model), $lookup(widgets, "model.quality"));
$nRaw := widgets.n;
$n := ($nRaw != null and $nRaw != 0) ? $nRaw : 1;
($n = 1)
? {"type":"range_usd","min_usd": $range[0], "max_usd": $range[1], "format": {"approximate": true}}
: {
"type":"range_usd",
"min_usd": $range[0] * $n,
"max_usd": $range[1] * $n,
"format": { "suffix": "/Run", "approximate": true }
}
)
""",
),
)
@classmethod
async def execute(
cls,
prompt: str,
model: dict,
n: int,
seed: int,
) -> IO.NodeOutput:
validate_string(prompt, strip_whitespace=False)
model_id = model["model"]
size = model["size"]
background = model["background"]
quality = model["quality"]
custom_width = model.get("custom_width", 1024)
custom_height = model.get("custom_height", 1024)
images_dict = model.get("images") or {}
image_tensors: list[Input.Image] = [t for t in images_dict.values() if t is not None]
n_images = sum(get_number_of_images(t) for t in image_tensors)
mask = model.get("mask")
if mask is not None and n_images == 0:
raise ValueError("Cannot use a mask without an input image")
if size == "Custom":
if custom_width % 16 != 0 or custom_height % 16 != 0:
raise ValueError(
f"Custom width and height must be multiples of 16, got {custom_width}x{custom_height}"
)
if max(custom_width, custom_height) > 3840:
raise ValueError(
f"Custom resolution max edge must be <= 3840, got {custom_width}x{custom_height}"
)
ratio = max(custom_width, custom_height) / min(custom_width, custom_height)
if ratio > 3:
raise ValueError(
f"Custom resolution aspect ratio must not exceed 3:1, got {custom_width}x{custom_height}"
)
total_pixels = custom_width * custom_height
if not 655_360 <= total_pixels <= 8_294_400:
raise ValueError(
f"Custom resolution total pixels must be between 655,360 and 8,294,400, got {total_pixels}"
)
size = f"{custom_width}x{custom_height}"
if model_id == "gpt-image-1":
price_extractor = calculate_tokens_price_image_1
elif model_id == "gpt-image-1.5":
price_extractor = calculate_tokens_price_image_1_5
elif model_id == "gpt-image-2":
price_extractor = calculate_tokens_price_image_2_0
else:
raise ValueError(f"Unknown model: {model_id}")
if image_tensors:
flat: list[torch.Tensor] = []
for tensor in image_tensors:
if len(tensor.shape) == 4:
flat.extend(tensor[i : i + 1] for i in range(tensor.shape[0]))
else:
flat.append(tensor.unsqueeze(0))
files = []
for i, single_image in enumerate(flat):
scaled_image = downscale_image_tensor(single_image, total_pixels=2048 * 2048).squeeze()
image_np = (scaled_image.numpy() * 255).astype(np.uint8)
img = Image.fromarray(image_np)
img_byte_arr = BytesIO()
img.save(img_byte_arr, format="PNG")
img_byte_arr.seek(0)
if len(flat) == 1:
files.append(("image", (f"image_{i}.png", img_byte_arr, "image/png")))
else:
files.append(("image[]", (f"image_{i}.png", img_byte_arr, "image/png")))
if mask is not None:
if len(flat) != 1:
raise Exception("Cannot use a mask with multiple image")
ref_image = flat[0]
if mask.shape[1:] != ref_image.shape[1:-1]:
raise Exception("Mask and Image must be the same size")
_, height, width = mask.shape
rgba_mask = torch.zeros(height, width, 4, device="cpu")
rgba_mask[:, :, 3] = 1 - mask.squeeze().cpu()
scaled_mask = downscale_image_tensor(
rgba_mask.unsqueeze(0), total_pixels=2048 * 2048
).squeeze()
mask_np = (scaled_mask.numpy() * 255).astype(np.uint8)
mask_img = Image.fromarray(mask_np)
mask_img_byte_arr = BytesIO()
mask_img.save(mask_img_byte_arr, format="PNG")
mask_img_byte_arr.seek(0)
files.append(("mask", ("mask.png", mask_img_byte_arr, "image/png")))
response = await sync_op(
cls,
ApiEndpoint(path="/proxy/openai/images/edits", method="POST"),
response_model=OpenAIImageGenerationResponse,
data=OpenAIImageEditRequest(
model=model_id,
prompt=prompt,
quality=quality,
background=background,
n=n,
size=size,
moderation="low",
),
content_type="multipart/form-data",
files=files,
price_extractor=price_extractor,
)
else:
response = await sync_op(
cls,
ApiEndpoint(path="/proxy/openai/images/generations", method="POST"),
response_model=OpenAIImageGenerationResponse,
data=OpenAIImageGenerationRequest(
model=model_id,
prompt=prompt,
quality=quality,
background=background,
n=n,
size=size,
moderation="low",
),
price_extractor=price_extractor,
)
return IO.NodeOutput(await validate_and_cast_response(response))
class OpenAIChatNode(IO.ComfyNode):
"""
Node to generate text responses from an OpenAI model.
@ -999,6 +1311,7 @@ class OpenAIExtension(ComfyExtension):
OpenAIDalle2,
OpenAIDalle3,
OpenAIGPTImage1,
OpenAIGPTImageNodeV2,
OpenAIChatNode,
OpenAIInputFiles,
OpenAIChatConfig,