Compare commits

...

21 Commits

Author SHA1 Message Date
Godwin Iheuwa
7da47f16fa
Merge 74b8d176ac into 8ccc0c94fa 2026-01-20 09:01:55 +03:00
comfyanonymous
8ccc0c94fa
Make omni stuff work on regular z image for easier testing. (#11985)
Some checks failed
Python Linting / Run Ruff (push) Waiting to run
Python Linting / Run Pylint (push) Waiting to run
Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.10, [self-hosted Linux], stable) (push) Waiting to run
Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.11, [self-hosted Linux], stable) (push) Waiting to run
Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.12, [self-hosted Linux], stable) (push) Waiting to run
Full Comfy CI Workflow Runs / test-unix-nightly (12.1, , linux, 3.11, [self-hosted Linux], nightly) (push) Waiting to run
Execution Tests / test (macos-latest) (push) Waiting to run
Execution Tests / test (ubuntu-latest) (push) Waiting to run
Execution Tests / test (windows-latest) (push) Waiting to run
Test server launches without errors / test (push) Waiting to run
Unit Tests / test (macos-latest) (push) Waiting to run
Unit Tests / test (ubuntu-latest) (push) Waiting to run
Unit Tests / test (windows-2022) (push) Waiting to run
Build package / Build Test (3.10) (push) Has been cancelled
Build package / Build Test (3.11) (push) Has been cancelled
Build package / Build Test (3.12) (push) Has been cancelled
Build package / Build Test (3.13) (push) Has been cancelled
Build package / Build Test (3.14) (push) Has been cancelled
2026-01-20 00:32:00 -05:00
Comfy Org PR Bot
4edb87aa50
Bump comfyui-frontend-package to 1.37.11 (#11976) 2026-01-19 23:57:50 -05:00
ComfyUI Wiki
0fc3b6e3a6
chore: update workflow templates to v0.8.15 (#11984) 2026-01-19 23:17:56 -05:00
comfyanonymous
2108167f9f
Support zimage omni base model. (#11979) 2026-01-19 23:17:38 -05:00
comfyanonymous
9d273d3ab1 ComfyUI v0.10.0 2026-01-19 22:40:18 -05:00
comfyanonymous
70c91b8248
Fix #11963 (#11982) 2026-01-19 22:32:40 -05:00
rkfg
0da5a0fe58
Convert mono audio to fake stereo for LTXV VAE encoding (#11965)
Some checks are pending
Python Linting / Run Ruff (push) Waiting to run
Python Linting / Run Pylint (push) Waiting to run
Build package / Build Test (3.10) (push) Waiting to run
Build package / Build Test (3.11) (push) Waiting to run
Build package / Build Test (3.12) (push) Waiting to run
Build package / Build Test (3.13) (push) Waiting to run
Build package / Build Test (3.14) (push) Waiting to run
Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.10, [self-hosted Linux], stable) (push) Waiting to run
Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.11, [self-hosted Linux], stable) (push) Waiting to run
Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.12, [self-hosted Linux], stable) (push) Waiting to run
Full Comfy CI Workflow Runs / test-unix-nightly (12.1, , linux, 3.11, [self-hosted Linux], nightly) (push) Waiting to run
Execution Tests / test (macos-latest) (push) Waiting to run
Execution Tests / test (ubuntu-latest) (push) Waiting to run
Execution Tests / test (windows-latest) (push) Waiting to run
Test server launches without errors / test (push) Waiting to run
Unit Tests / test (macos-latest) (push) Waiting to run
Unit Tests / test (ubuntu-latest) (push) Waiting to run
Unit Tests / test (windows-2022) (push) Waiting to run
2026-01-19 22:12:02 -05:00
comfyanonymous
e0eacb0688
Simpler way to implement the #11980 loras. (#11981) 2026-01-19 22:00:36 -05:00
Jedrzej Kosinski
7458e20465
Make Autogrow validation work properly (#11977)
* In-progress autogrow validation fixes - properly looks at required/optional inputs, now working on the edge case that all inputs are optional and nothing is plugged in (should just be an empty dictionary passed into node)

* Allow autogrow to work with all inputs being optional

* Revert accidentally pushed changes to nodes_logic.py
2026-01-19 16:58:30 -08:00
Jedrzej Kosinski
b931b37e30
feat(api-nodes): add Bria Edit node (#11978)
Co-authored-by: Alexander Piskun <bigcat88@icloud.com>
2026-01-19 16:47:14 -08:00
ComfyUI Wiki
866a4619db
chore: update workflow templates to v0.8.14 (#11974) 2026-01-19 14:21:35 -08:00
Godwin Iheuwa
74b8d176ac
Merge branch 'master' into fix/logger-flush-oserror 2026-01-17 20:39:44 +05:30
Godwin Iheuwa
0df9e96683
Merge branch 'master' into fix/logger-flush-oserror 2026-01-07 14:49:18 +05:30
RUiNtheExtinct
3cfe58d0c3 Merge origin/master into fix/logger-flush-oserror 2025-12-29 23:08:44 +05:30
RUiNtheExtinct
8c0f498a23 fix(logger): copy logs before passing to callbacks to prevent mutation
If a callback modifies the logs list (e.g., clear()) and a subsequent
callback raises an exception, the preserved logs for retry would have
been corrupted. Now passes a shallow copy to callbacks.
2025-12-28 14:45:13 +05:30
RUiNtheExtinct
1a410446e3 fix(logger): add max size limit for pending logs to prevent OOM
If flush callbacks persistently fail (e.g., network issues), logs would
accumulate indefinitely in _logs_since_flush, potentially causing OOM
on long-running servers.

Added MAX_PENDING_LOGS (10000) limit - when exceeded, oldest logs are
dropped. This is similar to how the global logs deque uses maxlen.
2025-12-28 14:35:31 +05:30
RUiNtheExtinct
494dce9a36 fix(logger): preserve logs for retry when callback raises exception
Move log clearing to after all callbacks succeed, not before. This
ensures that if any callback raises an exception, the logs remain
available for retry on the next flush call instead of being lost.

The previous approach cleared logs before iterating callbacks,
which meant logs were permanently lost if any callback failed.
2025-12-28 13:49:09 +05:30
RUiNtheExtinct
ddad64a4bf fix(logger): prevent duplicate logs when callback raises exception
Clear _logs_since_flush before iterating callbacks by capturing logs
into a local variable first. This prevents duplicate logs if a
callback raises an exception, since the instance variable is already
cleared before any callback runs.

Add test to verify logs are cleared even when callbacks raise.
2025-12-28 13:36:59 +05:30
RUiNtheExtinct
c06b18a014 fix(logger): clear logs after all callbacks, not inside loop
Move _logs_since_flush reset outside the callback loop so all
registered callbacks receive the same log data instead of only
the first callback getting logs while subsequent ones get an empty list.

Add test to verify multiple callbacks all receive the same logs.
2025-12-28 13:31:45 +05:30
RUiNtheExtinct
e86ffb0ea6 fix(logger): handle OSError errno 22 on flush for Windows piped streams
When running ComfyUI in API mode on Windows, print() statements from
custom nodes can crash with "OSError: [Errno 22] Invalid argument"
during flush. This occurs because piped/redirected stdout streams on
Windows may fail to flush even after successful writes.

This fix catches OSError with errno 22 (EINVAL) specifically in
LogInterceptor.flush(), allowing the flush callbacks to still execute.
The error is safe to ignore since write() already succeeded.

Fixes #11367
2025-12-28 13:29:27 +05:30
19 changed files with 1050 additions and 77 deletions

View File

@ -11,6 +11,10 @@ stderr_interceptor = None
class LogInterceptor(io.TextIOWrapper):
# Maximum logs to buffer between flushes to prevent unbounded memory growth
# if callbacks persistently fail. 10000 entries is ~2-5MB depending on message size.
MAX_PENDING_LOGS = 10000
def __init__(self, stream, *args, **kwargs):
buffer = stream.buffer
encoding = stream.encoding
@ -23,6 +27,9 @@ class LogInterceptor(io.TextIOWrapper):
entry = {"t": datetime.now().isoformat(), "m": data}
with self._lock:
self._logs_since_flush.append(entry)
# Enforce max size to prevent OOM if callbacks persistently fail
if len(self._logs_since_flush) > self.MAX_PENDING_LOGS:
self._logs_since_flush = self._logs_since_flush[-self.MAX_PENDING_LOGS:]
# Simple handling for cr to overwrite the last output if it isnt a full line
# else logs just get full of progress messages
@ -32,10 +39,21 @@ class LogInterceptor(io.TextIOWrapper):
super().write(data)
def flush(self):
super().flush()
try:
super().flush()
except OSError as e:
# errno 22 (EINVAL) can occur on Windows with piped/redirected streams
# This is safe to ignore as write() already succeeded
if e.errno != 22:
raise
if not self._logs_since_flush:
return
# Copy to prevent callback mutations from affecting retry on failure
logs_to_send = list(self._logs_since_flush)
for cb in self._flush_callbacks:
cb(self._logs_since_flush)
self._logs_since_flush = []
cb(logs_to_send)
# Only clear after all callbacks succeed - if any raises, logs remain for retry
self._logs_since_flush = []
def on_flush(self, callback):
self._flush_callbacks.append(callback)

View File

@ -189,9 +189,12 @@ class AudioVAE(torch.nn.Module):
waveform = self.device_manager.move_to_load_device(waveform)
expected_channels = self.autoencoder.encoder.in_channels
if waveform.shape[1] != expected_channels:
raise ValueError(
f"Input audio must have {expected_channels} channels, got {waveform.shape[1]}"
)
if waveform.shape[1] == 1:
waveform = waveform.expand(-1, expected_channels, *waveform.shape[2:])
else:
raise ValueError(
f"Input audio must have {expected_channels} channels, got {waveform.shape[1]}"
)
mel_spec = self.preprocessor.waveform_to_mel(
waveform, waveform_sample_rate, device=self.device_manager.load_device

View File

@ -13,10 +13,53 @@ from comfy.ldm.modules.attention import optimized_attention_masked
from comfy.ldm.flux.layers import EmbedND
from comfy.ldm.flux.math import apply_rope
import comfy.patcher_extension
import comfy.utils
def modulate(x, scale):
return x * (1 + scale.unsqueeze(1))
def invert_slices(slices, length):
sorted_slices = sorted(slices)
result = []
current = 0
for start, end in sorted_slices:
if current < start:
result.append((current, start))
current = max(current, end)
if current < length:
result.append((current, length))
return result
def modulate(x, scale, timestep_zero_index=None):
if timestep_zero_index is None:
return x * (1 + scale.unsqueeze(1))
else:
scale = (1 + scale.unsqueeze(1))
actual_batch = scale.size(0) // 2
slices = timestep_zero_index
invert = invert_slices(timestep_zero_index, x.shape[1])
for s in slices:
x[:, s[0]:s[1]] *= scale[actual_batch:]
for s in invert:
x[:, s[0]:s[1]] *= scale[:actual_batch]
return x
def apply_gate(gate, x, timestep_zero_index=None):
if timestep_zero_index is None:
return gate * x
else:
actual_batch = gate.size(0) // 2
slices = timestep_zero_index
invert = invert_slices(timestep_zero_index, x.shape[1])
for s in slices:
x[:, s[0]:s[1]] *= gate[actual_batch:]
for s in invert:
x[:, s[0]:s[1]] *= gate[:actual_batch]
return x
#############################################################################
# Core NextDiT Model #
@ -258,6 +301,7 @@ class JointTransformerBlock(nn.Module):
x_mask: torch.Tensor,
freqs_cis: torch.Tensor,
adaln_input: Optional[torch.Tensor]=None,
timestep_zero_index=None,
transformer_options={},
):
"""
@ -276,18 +320,18 @@ class JointTransformerBlock(nn.Module):
assert adaln_input is not None
scale_msa, gate_msa, scale_mlp, gate_mlp = self.adaLN_modulation(adaln_input).chunk(4, dim=1)
x = x + gate_msa.unsqueeze(1).tanh() * self.attention_norm2(
x = x + apply_gate(gate_msa.unsqueeze(1).tanh(), self.attention_norm2(
clamp_fp16(self.attention(
modulate(self.attention_norm1(x), scale_msa),
modulate(self.attention_norm1(x), scale_msa, timestep_zero_index=timestep_zero_index),
x_mask,
freqs_cis,
transformer_options=transformer_options,
))
))), timestep_zero_index=timestep_zero_index
)
x = x + gate_mlp.unsqueeze(1).tanh() * self.ffn_norm2(
x = x + apply_gate(gate_mlp.unsqueeze(1).tanh(), self.ffn_norm2(
clamp_fp16(self.feed_forward(
modulate(self.ffn_norm1(x), scale_mlp),
))
modulate(self.ffn_norm1(x), scale_mlp, timestep_zero_index=timestep_zero_index),
))), timestep_zero_index=timestep_zero_index
)
else:
assert adaln_input is None
@ -345,13 +389,37 @@ class FinalLayer(nn.Module):
),
)
def forward(self, x, c):
def forward(self, x, c, timestep_zero_index=None):
scale = self.adaLN_modulation(c)
x = modulate(self.norm_final(x), scale)
x = modulate(self.norm_final(x), scale, timestep_zero_index=timestep_zero_index)
x = self.linear(x)
return x
def pad_zimage(feats, pad_token, pad_tokens_multiple):
pad_extra = (-feats.shape[1]) % pad_tokens_multiple
return torch.cat((feats, pad_token.to(device=feats.device, dtype=feats.dtype, copy=True).unsqueeze(0).repeat(feats.shape[0], pad_extra, 1)), dim=1), pad_extra
def pos_ids_x(start_t, H_tokens, W_tokens, batch_size, device, transformer_options={}):
rope_options = transformer_options.get("rope_options", None)
h_scale = 1.0
w_scale = 1.0
h_start = 0
w_start = 0
if rope_options is not None:
h_scale = rope_options.get("scale_y", 1.0)
w_scale = rope_options.get("scale_x", 1.0)
h_start = rope_options.get("shift_y", 0.0)
w_start = rope_options.get("shift_x", 0.0)
x_pos_ids = torch.zeros((batch_size, H_tokens * W_tokens, 3), dtype=torch.float32, device=device)
x_pos_ids[:, :, 0] = start_t
x_pos_ids[:, :, 1] = (torch.arange(H_tokens, dtype=torch.float32, device=device) * h_scale + h_start).view(-1, 1).repeat(1, W_tokens).flatten()
x_pos_ids[:, :, 2] = (torch.arange(W_tokens, dtype=torch.float32, device=device) * w_scale + w_start).view(1, -1).repeat(H_tokens, 1).flatten()
return x_pos_ids
class NextDiT(nn.Module):
"""
Diffusion model with a Transformer backbone.
@ -378,6 +446,7 @@ class NextDiT(nn.Module):
time_scale=1.0,
pad_tokens_multiple=None,
clip_text_dim=None,
siglip_feat_dim=None,
image_model=None,
device=None,
dtype=None,
@ -491,6 +560,41 @@ class NextDiT(nn.Module):
for layer_id in range(n_layers)
]
)
if siglip_feat_dim is not None:
self.siglip_embedder = nn.Sequential(
operation_settings.get("operations").RMSNorm(siglip_feat_dim, eps=norm_eps, elementwise_affine=True, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")),
operation_settings.get("operations").Linear(
siglip_feat_dim,
dim,
bias=True,
device=operation_settings.get("device"),
dtype=operation_settings.get("dtype"),
),
)
self.siglip_refiner = nn.ModuleList(
[
JointTransformerBlock(
layer_id,
dim,
n_heads,
n_kv_heads,
multiple_of,
ffn_dim_multiplier,
norm_eps,
qk_norm,
modulation=False,
operation_settings=operation_settings,
)
for layer_id in range(n_refiner_layers)
]
)
self.siglip_pad_token = nn.Parameter(torch.empty((1, dim), device=device, dtype=dtype))
else:
self.siglip_embedder = None
self.siglip_refiner = None
self.siglip_pad_token = None
# This norm final is in the lumina 2.0 code but isn't actually used for anything.
# self.norm_final = operation_settings.get("operations").RMSNorm(dim, eps=norm_eps, elementwise_affine=True, device=operation_settings.get("device"), dtype=operation_settings.get("dtype"))
self.final_layer = FinalLayer(dim, patch_size, self.out_channels, z_image_modulation=z_image_modulation, operation_settings=operation_settings)
@ -531,70 +635,168 @@ class NextDiT(nn.Module):
imgs = torch.stack(imgs, dim=0)
return imgs
def patchify_and_embed(
self, x: List[torch.Tensor] | torch.Tensor, cap_feats: torch.Tensor, cap_mask: torch.Tensor, t: torch.Tensor, num_tokens, transformer_options={}
) -> Tuple[torch.Tensor, torch.Tensor, List[Tuple[int, int]], List[int], torch.Tensor]:
bsz = len(x)
pH = pW = self.patch_size
device = x[0].device
orig_x = x
if self.pad_tokens_multiple is not None:
pad_extra = (-cap_feats.shape[1]) % self.pad_tokens_multiple
cap_feats = torch.cat((cap_feats, self.cap_pad_token.to(device=cap_feats.device, dtype=cap_feats.dtype, copy=True).unsqueeze(0).repeat(cap_feats.shape[0], pad_extra, 1)), dim=1)
def embed_cap(self, cap_feats=None, offset=0, bsz=1, device=None, dtype=None):
if cap_feats is not None:
cap_feats = self.cap_embedder(cap_feats)
cap_feats_len = cap_feats.shape[1]
if self.pad_tokens_multiple is not None:
cap_feats, _ = pad_zimage(cap_feats, self.cap_pad_token, self.pad_tokens_multiple)
else:
cap_feats_len = 0
cap_feats = self.cap_pad_token.to(device=device, dtype=dtype, copy=True).unsqueeze(0).repeat(bsz, self.pad_tokens_multiple, 1)
cap_pos_ids = torch.zeros(bsz, cap_feats.shape[1], 3, dtype=torch.float32, device=device)
cap_pos_ids[:, :, 0] = torch.arange(cap_feats.shape[1], dtype=torch.float32, device=device) + 1.0
cap_pos_ids[:, :, 0] = torch.arange(cap_feats.shape[1], dtype=torch.float32, device=device) + 1.0 + offset
embeds = (cap_feats,)
freqs_cis = (self.rope_embedder(cap_pos_ids).movedim(1, 2),)
return embeds, freqs_cis, cap_feats_len
def embed_all(self, x, cap_feats=None, siglip_feats=None, offset=0, omni=False, transformer_options={}):
bsz = 1
pH = pW = self.patch_size
device = x.device
embeds, freqs_cis, cap_feats_len = self.embed_cap(cap_feats, offset=offset, bsz=bsz, device=device, dtype=x.dtype)
if (not omni) or self.siglip_embedder is None:
cap_feats_len = embeds[0].shape[1] + offset
embeds += (None,)
freqs_cis += (None,)
else:
cap_feats_len += offset
if siglip_feats is not None:
b, h, w, c = siglip_feats.shape
siglip_feats = siglip_feats.permute(0, 3, 1, 2).reshape(b, h * w, c)
siglip_feats = self.siglip_embedder(siglip_feats)
siglip_pos_ids = torch.zeros((bsz, siglip_feats.shape[1], 3), dtype=torch.float32, device=device)
siglip_pos_ids[:, :, 0] = cap_feats_len + 2
siglip_pos_ids[:, :, 1] = (torch.linspace(0, h * 8 - 1, steps=h, dtype=torch.float32, device=device).floor()).view(-1, 1).repeat(1, w).flatten()
siglip_pos_ids[:, :, 2] = (torch.linspace(0, w * 8 - 1, steps=w, dtype=torch.float32, device=device).floor()).view(1, -1).repeat(h, 1).flatten()
if self.siglip_pad_token is not None:
siglip_feats, pad_extra = pad_zimage(siglip_feats, self.siglip_pad_token, self.pad_tokens_multiple) # TODO: double check
siglip_pos_ids = torch.nn.functional.pad(siglip_pos_ids, (0, 0, 0, pad_extra))
else:
if self.siglip_pad_token is not None:
siglip_feats = self.siglip_pad_token.to(device=device, dtype=x.dtype, copy=True).unsqueeze(0).repeat(bsz, self.pad_tokens_multiple, 1)
siglip_pos_ids = torch.zeros((bsz, siglip_feats.shape[1], 3), dtype=torch.float32, device=device)
if siglip_feats is None:
embeds += (None,)
freqs_cis += (None,)
else:
embeds += (siglip_feats,)
freqs_cis += (self.rope_embedder(siglip_pos_ids).movedim(1, 2),)
B, C, H, W = x.shape
x = self.x_embedder(x.view(B, C, H // pH, pH, W // pW, pW).permute(0, 2, 4, 3, 5, 1).flatten(3).flatten(1, 2))
rope_options = transformer_options.get("rope_options", None)
h_scale = 1.0
w_scale = 1.0
h_start = 0
w_start = 0
if rope_options is not None:
h_scale = rope_options.get("scale_y", 1.0)
w_scale = rope_options.get("scale_x", 1.0)
h_start = rope_options.get("shift_y", 0.0)
w_start = rope_options.get("shift_x", 0.0)
H_tokens, W_tokens = H // pH, W // pW
x_pos_ids = torch.zeros((bsz, x.shape[1], 3), dtype=torch.float32, device=device)
x_pos_ids[:, :, 0] = cap_feats.shape[1] + 1
x_pos_ids[:, :, 1] = (torch.arange(H_tokens, dtype=torch.float32, device=device) * h_scale + h_start).view(-1, 1).repeat(1, W_tokens).flatten()
x_pos_ids[:, :, 2] = (torch.arange(W_tokens, dtype=torch.float32, device=device) * w_scale + w_start).view(1, -1).repeat(H_tokens, 1).flatten()
x_pos_ids = pos_ids_x(cap_feats_len + 1, H // pH, W // pW, bsz, device, transformer_options=transformer_options)
if self.pad_tokens_multiple is not None:
pad_extra = (-x.shape[1]) % self.pad_tokens_multiple
x = torch.cat((x, self.x_pad_token.to(device=x.device, dtype=x.dtype, copy=True).unsqueeze(0).repeat(x.shape[0], pad_extra, 1)), dim=1)
x, pad_extra = pad_zimage(x, self.x_pad_token, self.pad_tokens_multiple)
x_pos_ids = torch.nn.functional.pad(x_pos_ids, (0, 0, 0, pad_extra))
freqs_cis = self.rope_embedder(torch.cat((cap_pos_ids, x_pos_ids), dim=1)).movedim(1, 2)
embeds += (x,)
freqs_cis += (self.rope_embedder(x_pos_ids).movedim(1, 2),)
return embeds, freqs_cis, cap_feats_len + len(freqs_cis) - 1
def patchify_and_embed(
self, x: torch.Tensor, cap_feats: torch.Tensor, cap_mask: torch.Tensor, t: torch.Tensor, num_tokens, ref_latents=[], ref_contexts=[], siglip_feats=[], transformer_options={}
) -> Tuple[torch.Tensor, torch.Tensor, List[Tuple[int, int]], List[int], torch.Tensor]:
bsz = x.shape[0]
cap_mask = None # TODO?
main_siglip = None
orig_x = x
embeds = ([], [], [])
freqs_cis = ([], [], [])
leftover_cap = []
start_t = 0
omni = len(ref_latents) > 0
if omni:
for i, ref in enumerate(ref_latents):
if i < len(ref_contexts):
ref_con = ref_contexts[i]
else:
ref_con = None
if i < len(siglip_feats):
sig_feat = siglip_feats[i]
else:
sig_feat = None
out = self.embed_all(ref, ref_con, sig_feat, offset=start_t, omni=omni, transformer_options=transformer_options)
for i, e in enumerate(out[0]):
if e is not None:
embeds[i].append(comfy.utils.repeat_to_batch_size(e, bsz))
freqs_cis[i].append(out[1][i])
start_t = out[2]
leftover_cap = ref_contexts[len(ref_latents):]
H, W = x.shape[-2], x.shape[-1]
img_sizes = [(H, W)] * bsz
out = self.embed_all(x, cap_feats, main_siglip, offset=start_t, omni=omni, transformer_options=transformer_options)
img_len = out[0][-1].shape[1]
cap_len = out[0][0].shape[1]
for i, e in enumerate(out[0]):
if e is not None:
e = comfy.utils.repeat_to_batch_size(e, bsz)
embeds[i].append(e)
freqs_cis[i].append(out[1][i])
start_t = out[2]
for cap in leftover_cap:
out = self.embed_cap(cap, offset=start_t, bsz=bsz, device=x.device, dtype=x.dtype)
cap_len += out[0][0].shape[1]
embeds[0].append(comfy.utils.repeat_to_batch_size(out[0][0], bsz))
freqs_cis[0].append(out[1][0])
start_t += out[2]
patches = transformer_options.get("patches", {})
# refine context
cap_feats = torch.cat(embeds[0], dim=1)
cap_freqs_cis = torch.cat(freqs_cis[0], dim=1)
for layer in self.context_refiner:
cap_feats = layer(cap_feats, cap_mask, freqs_cis[:, :cap_pos_ids.shape[1]], transformer_options=transformer_options)
cap_feats = layer(cap_feats, cap_mask, cap_freqs_cis, transformer_options=transformer_options)
feats = (cap_feats,)
fc = (cap_freqs_cis,)
if omni and len(embeds[1]) > 0:
siglip_mask = None
siglip_feats_combined = torch.cat(embeds[1], dim=1)
siglip_feats_freqs_cis = torch.cat(freqs_cis[1], dim=1)
if self.siglip_refiner is not None:
for layer in self.siglip_refiner:
siglip_feats_combined = layer(siglip_feats_combined, siglip_mask, siglip_feats_freqs_cis, transformer_options=transformer_options)
feats += (siglip_feats_combined,)
fc += (siglip_feats_freqs_cis,)
padded_img_mask = None
x = torch.cat(embeds[-1], dim=1)
fc_x = torch.cat(freqs_cis[-1], dim=1)
if omni:
timestep_zero_index = [(x.shape[1] - img_len, x.shape[1])]
else:
timestep_zero_index = None
x_input = x
for i, layer in enumerate(self.noise_refiner):
x = layer(x, padded_img_mask, freqs_cis[:, cap_pos_ids.shape[1]:], t, transformer_options=transformer_options)
x = layer(x, padded_img_mask, fc_x, t, timestep_zero_index=timestep_zero_index, transformer_options=transformer_options)
if "noise_refiner" in patches:
for p in patches["noise_refiner"]:
out = p({"img": x, "img_input": x_input, "txt": cap_feats, "pe": freqs_cis[:, cap_pos_ids.shape[1]:], "vec": t, "x": orig_x, "block_index": i, "transformer_options": transformer_options, "block_type": "noise_refiner"})
out = p({"img": x, "img_input": x_input, "txt": cap_feats, "pe": fc_x, "vec": t, "x": orig_x, "block_index": i, "transformer_options": transformer_options, "block_type": "noise_refiner"})
if "img" in out:
x = out["img"]
padded_full_embed = torch.cat((cap_feats, x), dim=1)
padded_full_embed = torch.cat(feats + (x,), dim=1)
if timestep_zero_index is not None:
ind = padded_full_embed.shape[1] - x.shape[1]
timestep_zero_index = [(ind + x.shape[1] - img_len, ind + x.shape[1])]
timestep_zero_index.append((feats[0].shape[1] - cap_len, feats[0].shape[1]))
mask = None
img_sizes = [(H, W)] * bsz
l_effective_cap_len = [cap_feats.shape[1]] * bsz
return padded_full_embed, mask, img_sizes, l_effective_cap_len, freqs_cis
l_effective_cap_len = [padded_full_embed.shape[1] - img_len] * bsz
return padded_full_embed, mask, img_sizes, l_effective_cap_len, torch.cat(fc + (fc_x,), dim=1), timestep_zero_index
def forward(self, x, timesteps, context, num_tokens, attention_mask=None, **kwargs):
return comfy.patcher_extension.WrapperExecutor.new_class_executor(
@ -604,7 +806,11 @@ class NextDiT(nn.Module):
).execute(x, timesteps, context, num_tokens, attention_mask, **kwargs)
# def forward(self, x, t, cap_feats, cap_mask):
def _forward(self, x, timesteps, context, num_tokens, attention_mask=None, transformer_options={}, **kwargs):
def _forward(self, x, timesteps, context, num_tokens, attention_mask=None, ref_latents=[], ref_contexts=[], siglip_feats=[], transformer_options={}, **kwargs):
omni = len(ref_latents) > 0
if omni:
timesteps = torch.cat([timesteps * 0, timesteps], dim=0)
t = 1.0 - timesteps
cap_feats = context
cap_mask = attention_mask
@ -619,8 +825,6 @@ class NextDiT(nn.Module):
t = self.t_embedder(t * self.time_scale, dtype=x.dtype) # (N, D)
adaln_input = t
cap_feats = self.cap_embedder(cap_feats) # (N, L, D) # todo check if able to batchify w.o. redundant compute
if self.clip_text_pooled_proj is not None:
pooled = kwargs.get("clip_text_pooled", None)
if pooled is not None:
@ -632,7 +836,7 @@ class NextDiT(nn.Module):
patches = transformer_options.get("patches", {})
x_is_tensor = isinstance(x, torch.Tensor)
img, mask, img_size, cap_size, freqs_cis = self.patchify_and_embed(x, cap_feats, cap_mask, adaln_input, num_tokens, transformer_options=transformer_options)
img, mask, img_size, cap_size, freqs_cis, timestep_zero_index = self.patchify_and_embed(x, cap_feats, cap_mask, adaln_input, num_tokens, ref_latents=ref_latents, ref_contexts=ref_contexts, siglip_feats=siglip_feats, transformer_options=transformer_options)
freqs_cis = freqs_cis.to(img.device)
transformer_options["total_blocks"] = len(self.layers)
@ -640,7 +844,7 @@ class NextDiT(nn.Module):
img_input = img
for i, layer in enumerate(self.layers):
transformer_options["block_index"] = i
img = layer(img, mask, freqs_cis, adaln_input, transformer_options=transformer_options)
img = layer(img, mask, freqs_cis, adaln_input, timestep_zero_index=timestep_zero_index, transformer_options=transformer_options)
if "double_block" in patches:
for p in patches["double_block"]:
out = p({"img": img[:, cap_size[0]:], "img_input": img_input[:, cap_size[0]:], "txt": img[:, :cap_size[0]], "pe": freqs_cis[:, cap_size[0]:], "vec": adaln_input, "x": x, "block_index": i, "transformer_options": transformer_options})
@ -649,8 +853,7 @@ class NextDiT(nn.Module):
if "txt" in out:
img[:, :cap_size[0]] = out["txt"]
img = self.final_layer(img, adaln_input)
img = self.final_layer(img, adaln_input, timestep_zero_index=timestep_zero_index)
img = self.unpatchify(img, img_size, cap_size, return_tensor=x_is_tensor)[:, :, :h, :w]
return -img

View File

@ -1150,6 +1150,7 @@ class CosmosPredict2(BaseModel):
class Lumina2(BaseModel):
def __init__(self, model_config, model_type=ModelType.FLOW, device=None):
super().__init__(model_config, model_type, device=device, unet_model=comfy.ldm.lumina.model.NextDiT)
self.memory_usage_factor_conds = ("ref_latents",)
def extra_conds(self, **kwargs):
out = super().extra_conds(**kwargs)
@ -1169,6 +1170,35 @@ class Lumina2(BaseModel):
if clip_text_pooled is not None:
out['clip_text_pooled'] = comfy.conds.CONDRegular(clip_text_pooled)
clip_vision_outputs = kwargs.get("clip_vision_outputs", list(map(lambda a: a.get("clip_vision_output"), kwargs.get("unclip_conditioning", [{}])))) # Z Image omni
if clip_vision_outputs is not None and len(clip_vision_outputs) > 0:
sigfeats = []
for clip_vision_output in clip_vision_outputs:
if clip_vision_output is not None:
image_size = clip_vision_output.image_sizes[0]
shape = clip_vision_output.last_hidden_state.shape
sigfeats.append(clip_vision_output.last_hidden_state.reshape(shape[0], image_size[1] // 16, image_size[2] // 16, shape[-1]))
if len(sigfeats) > 0:
out['siglip_feats'] = comfy.conds.CONDList(sigfeats)
ref_latents = kwargs.get("reference_latents", None)
if ref_latents is not None:
latents = []
for lat in ref_latents:
latents.append(self.process_latent_in(lat))
out['ref_latents'] = comfy.conds.CONDList(latents)
ref_contexts = kwargs.get("reference_latents_text_embeds", None)
if ref_contexts is not None:
out['ref_contexts'] = comfy.conds.CONDList(ref_contexts)
return out
def extra_conds_shapes(self, **kwargs):
out = {}
ref_latents = kwargs.get("reference_latents", None)
if ref_latents is not None:
out['ref_latents'] = list([1, 16, sum(map(lambda a: math.prod(a.size()[2:]), ref_latents))])
return out
class WAN21(BaseModel):

View File

@ -446,6 +446,9 @@ def detect_unet_config(state_dict, key_prefix, metadata=None):
dit_config["time_scale"] = 1000.0
if '{}cap_pad_token'.format(key_prefix) in state_dict_keys:
dit_config["pad_tokens_multiple"] = 32
sig_weight = state_dict.get('{}siglip_embedder.0.weight'.format(key_prefix), None)
if sig_weight is not None:
dit_config["siglip_feat_dim"] = sig_weight.shape[0]
return dit_config

View File

@ -61,6 +61,7 @@ def te(dtype_llama=None, llama_quantization_metadata=None):
if dtype_llama is not None:
dtype = dtype_llama
if llama_quantization_metadata is not None:
model_options = model_options.copy()
model_options["quantization_metadata"] = llama_quantization_metadata
super().__init__(device=device, dtype=dtype, model_options=model_options)
return OvisTEModel_

View File

@ -40,6 +40,7 @@ def te(dtype_llama=None, llama_quantization_metadata=None):
if dtype_llama is not None:
dtype = dtype_llama
if llama_quantization_metadata is not None:
model_options = model_options.copy()
model_options["quantization_metadata"] = llama_quantization_metadata
super().__init__(device=device, dtype=dtype, model_options=model_options)
return ZImageTEModel_

View File

@ -639,6 +639,8 @@ def flux_to_diffusers(mmdit_config, output_prefix=""):
"proj_out.bias": "linear2.bias",
"attn.norm_q.weight": "norm.query_norm.scale",
"attn.norm_k.weight": "norm.key_norm.scale",
"attn.to_qkv_mlp_proj.weight": "linear1.weight", # Flux 2
"attn.to_out.weight": "linear2.weight", # Flux 2
}
for k in block_map:

View File

@ -1000,20 +1000,38 @@ class Autogrow(ComfyTypeI):
names = [f"{prefix}{i}" for i in range(max)]
# need to create a new input based on the contents of input
template_input = None
for _, dict_input in input.items():
# for now, get just the first value from dict_input
template_required = True
for _input_type, dict_input in input.items():
# for now, get just the first value from dict_input; if not required, min can be ignored
if len(dict_input) == 0:
continue
template_input = list(dict_input.values())[0]
template_required = _input_type == "required"
break
if template_input is None:
raise Exception("template_input could not be determined from required or optional; this should never happen.")
new_dict = {}
new_dict_added_to = False
# first, add possible inputs into out_dict
for i, name in enumerate(names):
expected_id = finalize_prefix(curr_prefix, name)
# required
if i < min and template_required:
out_dict["required"][expected_id] = template_input
type_dict = new_dict.setdefault("required", {})
# optional
else:
out_dict["optional"][expected_id] = template_input
type_dict = new_dict.setdefault("optional", {})
if expected_id in live_inputs:
# required
if i < min:
type_dict = new_dict.setdefault("required", {})
# optional
else:
type_dict = new_dict.setdefault("optional", {})
# NOTE: prefix gets added in parse_class_inputs
type_dict[name] = template_input
new_dict_added_to = True
# account for the edge case that all inputs are optional and no values are received
if not new_dict_added_to:
finalized_prefix = finalize_prefix(curr_prefix)
out_dict["dynamic_paths"][finalized_prefix] = finalized_prefix
out_dict["dynamic_paths_default_value"][finalized_prefix] = DynamicPathsDefaultValue.EMPTY_DICT
parse_class_inputs(out_dict, live_inputs, new_dict, curr_prefix)
@comfytype(io_type="COMFY_DYNAMICCOMBO_V3")
@ -1151,6 +1169,8 @@ class V3Data(TypedDict):
'Dictionary where the keys are the hidden input ids and the values are the values of the hidden inputs.'
dynamic_paths: dict[str, Any]
'Dictionary where the keys are the input ids and the values dictate how to turn the inputs into a nested dictionary.'
dynamic_paths_default_value: dict[str, Any]
'Dictionary where the keys are the input ids and the values are a string from DynamicPathsDefaultValue for the inputs if value is None.'
create_dynamic_tuple: bool
'When True, the value of the dynamic input will be in the format (value, path_key).'
@ -1504,6 +1524,7 @@ def get_finalized_class_inputs(d: dict[str, Any], live_inputs: dict[str, Any], i
"required": {},
"optional": {},
"dynamic_paths": {},
"dynamic_paths_default_value": {},
}
d = d.copy()
# ignore hidden for parsing
@ -1513,8 +1534,12 @@ def get_finalized_class_inputs(d: dict[str, Any], live_inputs: dict[str, Any], i
out_dict["hidden"] = hidden
v3_data = {}
dynamic_paths = out_dict.pop("dynamic_paths", None)
if dynamic_paths is not None:
if dynamic_paths is not None and len(dynamic_paths) > 0:
v3_data["dynamic_paths"] = dynamic_paths
# this list is used for autogrow, in the case all inputs are optional and no values are passed
dynamic_paths_default_value = out_dict.pop("dynamic_paths_default_value", None)
if dynamic_paths_default_value is not None and len(dynamic_paths_default_value) > 0:
v3_data["dynamic_paths_default_value"] = dynamic_paths_default_value
return out_dict, hidden, v3_data
def parse_class_inputs(out_dict: dict[str, Any], live_inputs: dict[str, Any], curr_dict: dict[str, Any], curr_prefix: list[str] | None=None) -> None:
@ -1551,11 +1576,16 @@ def add_to_dict_v1(i: Input, d: dict):
def add_to_dict_v3(io: Input | Output, d: dict):
d[io.id] = (io.get_io_type(), io.as_dict())
class DynamicPathsDefaultValue:
EMPTY_DICT = "empty_dict"
def build_nested_inputs(values: dict[str, Any], v3_data: V3Data):
paths = v3_data.get("dynamic_paths", None)
default_value_dict = v3_data.get("dynamic_paths_default_value", {})
if paths is None:
return values
values = values.copy()
result = {}
create_tuple = v3_data.get("create_dynamic_tuple", False)
@ -1569,6 +1599,11 @@ def build_nested_inputs(values: dict[str, Any], v3_data: V3Data):
if is_last:
value = values.pop(key, None)
if value is None:
# see if a default value was provided for this key
default_option = default_value_dict.get(key, None)
if default_option == DynamicPathsDefaultValue.EMPTY_DICT:
value = {}
if create_tuple:
value = (value, key)
current[p] = value

View File

@ -0,0 +1,61 @@
from typing import TypedDict
from pydantic import BaseModel, Field
class InputModerationSettings(TypedDict):
prompt_content_moderation: bool
visual_input_moderation: bool
visual_output_moderation: bool
class BriaEditImageRequest(BaseModel):
instruction: str | None = Field(...)
structured_instruction: str | None = Field(
...,
description="Use this instead of instruction for precise, programmatic control.",
)
images: list[str] = Field(
...,
description="Required. Publicly available URL or Base64-encoded. Must contain exactly one item.",
)
mask: str | None = Field(
None,
description="Mask image (black and white). Black areas will be preserved, white areas will be edited. "
"If omitted, the edit applies to the entire image. "
"The input image and the the input mask must be of the same size.",
)
negative_prompt: str | None = Field(None)
guidance_scale: float = Field(...)
model_version: str = Field(...)
steps_num: int = Field(...)
seed: int = Field(...)
ip_signal: bool = Field(
False,
description="If true, returns a warning for potential IP content in the instruction.",
)
prompt_content_moderation: bool = Field(
False, description="If true, returns 422 on instruction moderation failure."
)
visual_input_content_moderation: bool = Field(
False, description="If true, returns 422 on images or mask moderation failure."
)
visual_output_content_moderation: bool = Field(
False, description="If true, returns 422 on visual output moderation failure."
)
class BriaStatusResponse(BaseModel):
request_id: str = Field(...)
status_url: str = Field(...)
warning: str | None = Field(None)
class BriaResult(BaseModel):
structured_prompt: str = Field(...)
image_url: str = Field(...)
class BriaResponse(BaseModel):
status: str = Field(...)
result: BriaResult | None = Field(None)

View File

@ -0,0 +1,198 @@
from typing_extensions import override
from comfy_api.latest import IO, ComfyExtension, Input
from comfy_api_nodes.apis.bria import (
BriaEditImageRequest,
BriaResponse,
BriaStatusResponse,
InputModerationSettings,
)
from comfy_api_nodes.util import (
ApiEndpoint,
convert_mask_to_image,
download_url_to_image_tensor,
get_number_of_images,
poll_op,
sync_op,
upload_images_to_comfyapi,
)
class BriaImageEditNode(IO.ComfyNode):
@classmethod
def define_schema(cls):
return IO.Schema(
node_id="BriaImageEditNode",
display_name="Bria Image Edit",
category="api node/image/Bria",
description="Edit images using Bria latest model",
inputs=[
IO.Combo.Input("model", options=["FIBO"]),
IO.Image.Input("image"),
IO.String.Input(
"prompt",
multiline=True,
default="",
tooltip="Instruction to edit image",
),
IO.String.Input("negative_prompt", multiline=True, default=""),
IO.String.Input(
"structured_prompt",
multiline=True,
default="",
tooltip="A string containing the structured edit prompt in JSON format. "
"Use this instead of usual prompt for precise, programmatic control.",
),
IO.Int.Input(
"seed",
default=1,
min=1,
max=2147483647,
step=1,
display_mode=IO.NumberDisplay.number,
control_after_generate=True,
),
IO.Float.Input(
"guidance_scale",
default=3,
min=3,
max=5,
step=0.01,
display_mode=IO.NumberDisplay.number,
tooltip="Higher value makes the image follow the prompt more closely.",
),
IO.Int.Input(
"steps",
default=50,
min=20,
max=50,
step=1,
display_mode=IO.NumberDisplay.number,
),
IO.DynamicCombo.Input(
"moderation",
options=[
IO.DynamicCombo.Option(
"true",
[
IO.Boolean.Input(
"prompt_content_moderation", default=False
),
IO.Boolean.Input(
"visual_input_moderation", default=False
),
IO.Boolean.Input(
"visual_output_moderation", default=True
),
],
),
IO.DynamicCombo.Option("false", []),
],
tooltip="Moderation settings",
),
IO.Mask.Input(
"mask",
tooltip="If omitted, the edit applies to the entire image.",
optional=True,
),
],
outputs=[
IO.Image.Output(),
IO.String.Output(display_name="structured_prompt"),
],
hidden=[
IO.Hidden.auth_token_comfy_org,
IO.Hidden.api_key_comfy_org,
IO.Hidden.unique_id,
],
is_api_node=True,
price_badge=IO.PriceBadge(
expr="""{"type":"usd","usd":0.04}""",
),
)
@classmethod
async def execute(
cls,
model: str,
image: Input.Image,
prompt: str,
negative_prompt: str,
structured_prompt: str,
seed: int,
guidance_scale: float,
steps: int,
moderation: InputModerationSettings,
mask: Input.Image | None = None,
) -> IO.NodeOutput:
if not prompt and not structured_prompt:
raise ValueError(
"One of prompt or structured_prompt is required to be non-empty."
)
if get_number_of_images(image) != 1:
raise ValueError("Exactly one input image is required.")
mask_url = None
if mask is not None:
mask_url = (
await upload_images_to_comfyapi(
cls,
convert_mask_to_image(mask),
max_images=1,
mime_type="image/png",
wait_label="Uploading mask",
)
)[0]
response = await sync_op(
cls,
ApiEndpoint(path="proxy/bria/v2/image/edit", method="POST"),
data=BriaEditImageRequest(
instruction=prompt if prompt else None,
structured_instruction=structured_prompt if structured_prompt else None,
images=await upload_images_to_comfyapi(
cls,
image,
max_images=1,
mime_type="image/png",
wait_label="Uploading image",
),
mask=mask_url,
negative_prompt=negative_prompt if negative_prompt else None,
guidance_scale=guidance_scale,
seed=seed,
model_version=model,
steps_num=steps,
prompt_content_moderation=moderation.get(
"prompt_content_moderation", False
),
visual_input_content_moderation=moderation.get(
"visual_input_moderation", False
),
visual_output_content_moderation=moderation.get(
"visual_output_moderation", False
),
),
response_model=BriaStatusResponse,
)
response = await poll_op(
cls,
ApiEndpoint(path=f"/proxy/bria/v2/status/{response.request_id}"),
status_extractor=lambda r: r.status,
response_model=BriaResponse,
)
return IO.NodeOutput(
await download_url_to_image_tensor(response.result.image_url),
response.result.structured_prompt,
)
class BriaExtension(ComfyExtension):
@override
async def get_node_list(self) -> list[type[IO.ComfyNode]]:
return [
BriaImageEditNode,
]
async def comfy_entrypoint() -> BriaExtension:
return BriaExtension()

View File

@ -11,6 +11,7 @@ from .conversions import (
audio_input_to_mp3,
audio_to_base64_string,
bytesio_to_image_tensor,
convert_mask_to_image,
downscale_image_tensor,
image_tensor_pair_to_batch,
pil_to_bytesio,
@ -72,6 +73,7 @@ __all__ = [
"audio_input_to_mp3",
"audio_to_base64_string",
"bytesio_to_image_tensor",
"convert_mask_to_image",
"downscale_image_tensor",
"image_tensor_pair_to_batch",
"pil_to_bytesio",

View File

@ -451,6 +451,12 @@ def resize_mask_to_image(
return mask
def convert_mask_to_image(mask: Input.Image) -> torch.Tensor:
"""Make mask have the expected amount of dims (4) and channels (3) to be recognized as an image."""
mask = mask.unsqueeze(-1)
return torch.cat([mask] * 3, dim=-1)
def text_filepath_to_base64_string(filepath: str) -> str:
"""Converts a text file to a base64 string."""
with open(filepath, "rb") as f:

View File

@ -0,0 +1,88 @@
import node_helpers
from typing_extensions import override
from comfy_api.latest import ComfyExtension, io
import math
import comfy.utils
class TextEncodeZImageOmni(io.ComfyNode):
@classmethod
def define_schema(cls):
return io.Schema(
node_id="TextEncodeZImageOmni",
category="advanced/conditioning",
is_experimental=True,
inputs=[
io.Clip.Input("clip"),
io.ClipVision.Input("image_encoder", optional=True),
io.String.Input("prompt", multiline=True, dynamic_prompts=True),
io.Boolean.Input("auto_resize_images", default=True),
io.Vae.Input("vae", optional=True),
io.Image.Input("image1", optional=True),
io.Image.Input("image2", optional=True),
io.Image.Input("image3", optional=True),
],
outputs=[
io.Conditioning.Output(),
],
)
@classmethod
def execute(cls, clip, prompt, image_encoder=None, auto_resize_images=True, vae=None, image1=None, image2=None, image3=None) -> io.NodeOutput:
ref_latents = []
images = list(filter(lambda a: a is not None, [image1, image2, image3]))
prompt_list = []
template = None
if len(images) > 0:
prompt_list = ["<|im_start|>user\n<|vision_start|>"]
prompt_list += ["<|vision_end|><|vision_start|>"] * (len(images) - 1)
prompt_list += ["<|vision_end|><|im_end|>"]
template = "<|vision_end|>{}<|im_end|>\n<|im_start|>assistant\n<|vision_start|>"
encoded_images = []
for i, image in enumerate(images):
if image_encoder is not None:
encoded_images.append(image_encoder.encode_image(image))
if vae is not None:
if auto_resize_images:
samples = image.movedim(-1, 1)
total = int(1024 * 1024)
scale_by = math.sqrt(total / (samples.shape[3] * samples.shape[2]))
width = round(samples.shape[3] * scale_by / 8.0) * 8
height = round(samples.shape[2] * scale_by / 8.0) * 8
image = comfy.utils.common_upscale(samples, width, height, "area", "disabled").movedim(1, -1)
ref_latents.append(vae.encode(image))
tokens = clip.tokenize(prompt, llama_template=template)
conditioning = clip.encode_from_tokens_scheduled(tokens)
extra_text_embeds = []
for p in prompt_list:
tokens = clip.tokenize(p, llama_template="{}")
text_embeds = clip.encode_from_tokens_scheduled(tokens)
extra_text_embeds.append(text_embeds[0][0])
if len(ref_latents) > 0:
conditioning = node_helpers.conditioning_set_values(conditioning, {"reference_latents": ref_latents}, append=True)
if len(encoded_images) > 0:
conditioning = node_helpers.conditioning_set_values(conditioning, {"clip_vision_outputs": encoded_images}, append=True)
if len(extra_text_embeds) > 0:
conditioning = node_helpers.conditioning_set_values(conditioning, {"reference_latents_text_embeds": extra_text_embeds}, append=True)
return io.NodeOutput(conditioning)
class ZImageExtension(ComfyExtension):
@override
async def get_node_list(self) -> list[type[io.ComfyNode]]:
return [
TextEncodeZImageOmni,
]
async def comfy_entrypoint() -> ZImageExtension:
return ZImageExtension()

View File

@ -1,3 +1,3 @@
# This file is automatically generated by the build process when version is
# updated in pyproject.toml.
__version__ = "0.9.2"
__version__ = "0.10.0"

View File

@ -2373,6 +2373,7 @@ async def init_builtin_extra_nodes():
"nodes_kandinsky5.py",
"nodes_wanmove.py",
"nodes_image_compare.py",
"nodes_zimage.py",
]
import_failed = []

View File

@ -1,6 +1,6 @@
[project]
name = "ComfyUI"
version = "0.9.2"
version = "0.10.0"
readme = "README.md"
license = { file = "LICENSE" }
requires-python = ">=3.10"

View File

@ -1,5 +1,5 @@
comfyui-frontend-package==1.36.14
comfyui-workflow-templates==0.8.11
comfyui-frontend-package==1.37.11
comfyui-workflow-templates==0.8.15
comfyui-embedded-docs==0.4.0
torch
torchsde

View File

@ -0,0 +1,321 @@
"""Tests for the logger module, specifically LogInterceptor."""
import io
import pytest
from unittest.mock import MagicMock
class TestLogInterceptorFlush:
"""Test that LogInterceptor.flush() handles OSError gracefully."""
def test_flush_handles_errno_22(self):
"""Test that flush() catches OSError with errno 22 and still executes callbacks."""
# We can't easily mock the parent flush, so we test the behavior by
# creating a LogInterceptor and verifying the flush method exists
# with the try-except structure.
# Read the source to verify the fix is in place
import inspect
from app.logger import LogInterceptor
source = inspect.getsource(LogInterceptor.flush)
# Verify the try-except structure is present
assert 'try:' in source
assert 'super().flush()' in source
assert 'except OSError as e:' in source
assert 'e.errno != 22' in source or 'e.errno == 22' in source
def test_flush_callback_execution(self):
"""Test that flush callbacks are executed."""
from app.logger import LogInterceptor
# Create a proper stream for LogInterceptor
import sys
# Use a StringIO-based approach with a real buffer
class MockStream:
def __init__(self):
self._buffer = io.BytesIO()
self.encoding = 'utf-8'
self.line_buffering = False
@property
def buffer(self):
return self._buffer
mock_stream = MockStream()
interceptor = LogInterceptor(mock_stream)
# Register a callback
callback_results = []
interceptor.on_flush(lambda logs: callback_results.append(len(logs)))
# Add some logs
interceptor._logs_since_flush = [
{"t": "test", "m": "message1"},
{"t": "test", "m": "message2"}
]
# Flush should execute callback
interceptor.flush()
assert len(callback_results) == 1
assert callback_results[0] == 2 # Two log entries
def test_flush_clears_logs_after_callback(self):
"""Test that logs are cleared after flush callbacks."""
from app.logger import LogInterceptor
class MockStream:
def __init__(self):
self._buffer = io.BytesIO()
self.encoding = 'utf-8'
self.line_buffering = False
@property
def buffer(self):
return self._buffer
mock_stream = MockStream()
interceptor = LogInterceptor(mock_stream)
# Add a dummy callback
interceptor.on_flush(lambda logs: None)
# Add some logs
interceptor._logs_since_flush = [{"t": "test", "m": "message"}]
# Flush
interceptor.flush()
# Logs should be cleared
assert interceptor._logs_since_flush == []
def test_flush_multiple_callbacks_receive_same_logs(self):
"""Test that all callbacks receive the same logs, not just the first one."""
from app.logger import LogInterceptor
class MockStream:
def __init__(self):
self._buffer = io.BytesIO()
self.encoding = 'utf-8'
self.line_buffering = False
@property
def buffer(self):
return self._buffer
mock_stream = MockStream()
interceptor = LogInterceptor(mock_stream)
# Register multiple callbacks
callback1_results = []
callback2_results = []
callback3_results = []
interceptor.on_flush(lambda logs: callback1_results.append(len(logs)))
interceptor.on_flush(lambda logs: callback2_results.append(len(logs)))
interceptor.on_flush(lambda logs: callback3_results.append(len(logs)))
# Add some logs
interceptor._logs_since_flush = [
{"t": "test", "m": "message1"},
{"t": "test", "m": "message2"},
{"t": "test", "m": "message3"}
]
# Flush should execute all callbacks with the same logs
interceptor.flush()
# All callbacks should have received 3 log entries
assert callback1_results == [3]
assert callback2_results == [3]
assert callback3_results == [3]
def test_flush_preserves_logs_when_callback_raises(self):
"""Test that logs are preserved for retry if a callback raises an exception."""
from app.logger import LogInterceptor
class MockStream:
def __init__(self):
self._buffer = io.BytesIO()
self.encoding = 'utf-8'
self.line_buffering = False
@property
def buffer(self):
return self._buffer
mock_stream = MockStream()
interceptor = LogInterceptor(mock_stream)
# Register a callback that raises
def raising_callback(logs):
raise ValueError("Callback error")
interceptor.on_flush(raising_callback)
# Add some logs
original_logs = [
{"t": "test", "m": "message1"},
{"t": "test", "m": "message2"}
]
interceptor._logs_since_flush = original_logs.copy()
# Flush should raise
with pytest.raises(ValueError, match="Callback error"):
interceptor.flush()
# Logs should be preserved for retry on next flush
assert interceptor._logs_since_flush == original_logs
def test_flush_protects_logs_from_callback_mutation(self):
"""Test that callback mutations don't affect preserved logs on failure."""
from app.logger import LogInterceptor
class MockStream:
def __init__(self):
self._buffer = io.BytesIO()
self.encoding = 'utf-8'
self.line_buffering = False
@property
def buffer(self):
return self._buffer
mock_stream = MockStream()
interceptor = LogInterceptor(mock_stream)
# First callback mutates the list, second raises
def mutating_callback(logs):
logs.clear() # Mutate the passed list
def raising_callback(logs):
raise ValueError("Callback error")
interceptor.on_flush(mutating_callback)
interceptor.on_flush(raising_callback)
# Add some logs
original_logs = [
{"t": "test", "m": "message1"},
{"t": "test", "m": "message2"}
]
interceptor._logs_since_flush = original_logs.copy()
# Flush should raise
with pytest.raises(ValueError, match="Callback error"):
interceptor.flush()
# Logs should be preserved despite mutation by first callback
assert interceptor._logs_since_flush == original_logs
def test_flush_clears_logs_after_all_callbacks_succeed(self):
"""Test that logs are cleared only after all callbacks execute successfully."""
from app.logger import LogInterceptor
class MockStream:
def __init__(self):
self._buffer = io.BytesIO()
self.encoding = 'utf-8'
self.line_buffering = False
@property
def buffer(self):
return self._buffer
mock_stream = MockStream()
interceptor = LogInterceptor(mock_stream)
# Register multiple callbacks
callback1_results = []
callback2_results = []
interceptor.on_flush(lambda logs: callback1_results.append(len(logs)))
interceptor.on_flush(lambda logs: callback2_results.append(len(logs)))
# Add some logs
interceptor._logs_since_flush = [
{"t": "test", "m": "message1"},
{"t": "test", "m": "message2"}
]
# Flush should succeed
interceptor.flush()
# All callbacks should have executed
assert callback1_results == [2]
assert callback2_results == [2]
# Logs should be cleared after success
assert interceptor._logs_since_flush == []
class TestLogInterceptorWrite:
"""Test that LogInterceptor.write() works correctly."""
def test_write_adds_to_logs(self):
"""Test that write() adds entries to the log buffer."""
from app.logger import LogInterceptor
class MockStream:
def __init__(self):
self._buffer = io.BytesIO()
self.encoding = 'utf-8'
self.line_buffering = False
@property
def buffer(self):
return self._buffer
mock_stream = MockStream()
interceptor = LogInterceptor(mock_stream)
# Initialize the global logs
import app.logger
from collections import deque
app.logger.logs = deque(maxlen=100)
# Write a message
interceptor.write("test message")
# Check that it was added to _logs_since_flush
assert len(interceptor._logs_since_flush) == 1
assert interceptor._logs_since_flush[0]["m"] == "test message"
def test_write_enforces_max_pending_logs(self):
"""Test that write() enforces MAX_PENDING_LOGS to prevent OOM."""
from app.logger import LogInterceptor
class MockStream:
def __init__(self):
self._buffer = io.BytesIO()
self.encoding = 'utf-8'
self.line_buffering = False
@property
def buffer(self):
return self._buffer
mock_stream = MockStream()
interceptor = LogInterceptor(mock_stream)
# Initialize the global logs
import app.logger
from collections import deque
app.logger.logs = deque(maxlen=100)
# Manually set _logs_since_flush to be at the limit
interceptor._logs_since_flush = [
{"t": "test", "m": f"old_message_{i}"}
for i in range(LogInterceptor.MAX_PENDING_LOGS)
]
# Write one more message - should trigger trimming
interceptor.write("new_message")
# Should still be at MAX_PENDING_LOGS, oldest dropped
assert len(interceptor._logs_since_flush) == LogInterceptor.MAX_PENDING_LOGS
# The new message should be at the end
assert interceptor._logs_since_flush[-1]["m"] == "new_message"
# The oldest message should have been dropped (old_message_0)
assert interceptor._logs_since_flush[0]["m"] == "old_message_1"