Merge branch 'master' into matt/be-1452-core-jobs-namespace-cancel

This commit is contained in:
Matt Miller 2026-06-18 10:39:18 -07:00
commit 7226d5890e
14 changed files with 864 additions and 97 deletions

View File

@ -140,7 +140,7 @@ ComfyUI follows a weekly release cycle targeting Monday but this regularly chang
- Commits outside of the stable release tags may be very unstable and break many custom nodes.
- Serves as the foundation for the desktop release
2. **[ComfyUI Desktop](https://github.com/Comfy-Org/Comfy-Desktop)**
2. **[Comfy Desktop](https://github.com/Comfy-Org/Comfy-Desktop)**
- Builds a new release using the latest stable core version
3. **[ComfyUI Frontend](https://github.com/Comfy-Org/ComfyUI_frontend)**

321
comfy/ldm/boogu/model.py Normal file
View File

@ -0,0 +1,321 @@
# Boogu-Image-0.1 transformer
# Architecture is an OmniGen2 derivative (see comfy/ldm/omnigen/omnigen2.py) with an
# added dual-stream ("double_stream") stage before the single-stream layers, conditioned
# by a Qwen3-VL multimodal LLM. Reuses the OmniGen2/Lumina building blocks and the Flux
# RoPE core, the only new component is the double-stream block + the hybrid forward order.
from typing import Optional, Tuple
import torch
import torch.nn as nn
from einops import rearrange
import comfy.ldm.common_dit
import comfy.ldm.omnigen.omnigen2
from comfy.ldm.modules.attention import optimized_attention_masked
from comfy.ldm.omnigen.omnigen2 import (
OmniGen2RotaryPosEmbed,
Lumina2CombinedTimestepCaptionEmbedding,
LuminaRMSNormZero,
LuminaLayerNormContinuous,
LuminaFeedForward,
Attention,
OmniGen2TransformerBlock,
apply_rotary_emb,
)
class BooguDoubleStreamProcessor(nn.Module):
# Joint attention over [instruct ; img] with separate per-stream q/k/v and output projections.
def __init__(self, dim, head_dim, heads, kv_heads, dtype=None, device=None, operations=None):
super().__init__()
query_dim = head_dim * heads
kv_dim = head_dim * kv_heads
self.img_to_q = operations.Linear(query_dim, query_dim, bias=False, dtype=dtype, device=device)
self.img_to_k = operations.Linear(query_dim, kv_dim, bias=False, dtype=dtype, device=device)
self.img_to_v = operations.Linear(query_dim, kv_dim, bias=False, dtype=dtype, device=device)
self.instruct_to_q = operations.Linear(query_dim, query_dim, bias=False, dtype=dtype, device=device)
self.instruct_to_k = operations.Linear(query_dim, kv_dim, bias=False, dtype=dtype, device=device)
self.instruct_to_v = operations.Linear(query_dim, kv_dim, bias=False, dtype=dtype, device=device)
self.instruct_out = operations.Linear(query_dim, query_dim, bias=False, dtype=dtype, device=device)
self.img_out = operations.Linear(query_dim, query_dim, bias=False, dtype=dtype, device=device)
def forward(self, attn, img_hidden_states, instruct_hidden_states, rotary_emb, attention_mask=None, transformer_options={}):
batch_size = img_hidden_states.shape[0]
L_instruct = instruct_hidden_states.shape[1]
img_q = self.img_to_q(img_hidden_states)
img_k = self.img_to_k(img_hidden_states)
img_v = self.img_to_v(img_hidden_states)
instruct_q = self.instruct_to_q(instruct_hidden_states)
instruct_k = self.instruct_to_k(instruct_hidden_states)
instruct_v = self.instruct_to_v(instruct_hidden_states)
# Concatenate instruction first, then image (matches reference processor order).
query = torch.cat([instruct_q, img_q], dim=1)
key = torch.cat([instruct_k, img_k], dim=1)
value = torch.cat([instruct_v, img_v], dim=1)
query = query.view(batch_size, -1, attn.heads, attn.dim_head)
key = key.view(batch_size, -1, attn.kv_heads, attn.dim_head)
value = value.view(batch_size, -1, attn.kv_heads, attn.dim_head)
query = attn.norm_q(query)
key = attn.norm_k(key)
if rotary_emb is not None:
query = apply_rotary_emb(query, rotary_emb)
key = apply_rotary_emb(key, rotary_emb)
query = query.transpose(1, 2)
key = key.transpose(1, 2)
value = value.transpose(1, 2)
if attn.kv_heads < attn.heads:
key = key.repeat_interleave(attn.heads // attn.kv_heads, dim=1)
value = value.repeat_interleave(attn.heads // attn.kv_heads, dim=1)
hidden_states = optimized_attention_masked(query, key, value, attn.heads, attention_mask, skip_reshape=True, transformer_options=transformer_options)
# Split back to instruction/image, apply per-stream output projections, recombine.
instruct_hidden_states = self.instruct_out(hidden_states[:, :L_instruct])
img_hidden_states = self.img_out(hidden_states[:, L_instruct:])
hidden_states = torch.cat([instruct_hidden_states, img_hidden_states], dim=1)
hidden_states = attn.to_out[0](hidden_states)
return hidden_states
class BooguJointAttention(nn.Module):
# Holds the shared q/k RMSNorm + final output projection
def __init__(self, dim, head_dim, heads, kv_heads, eps=1e-5, dtype=None, device=None, operations=None):
super().__init__()
self.heads = heads
self.kv_heads = kv_heads
self.dim_head = head_dim
self.scale = head_dim ** -0.5
self.norm_q = operations.RMSNorm(head_dim, eps=eps, dtype=dtype, device=device)
self.norm_k = operations.RMSNorm(head_dim, eps=eps, dtype=dtype, device=device)
self.to_out = nn.Sequential(
operations.Linear(heads * head_dim, dim, bias=False, dtype=dtype, device=device),
nn.Dropout(0.0),
)
self.processor = BooguDoubleStreamProcessor(dim, head_dim, heads, kv_heads, dtype=dtype, device=device, operations=operations)
def forward(self, img_hidden_states, instruct_hidden_states, rotary_emb, attention_mask=None, transformer_options={}):
return self.processor(self, img_hidden_states, instruct_hidden_states, rotary_emb, attention_mask, transformer_options=transformer_options)
class BooguDoubleStreamBlock(nn.Module):
# Dual-stream block: joint attention over [instruct ; img] + image self-attention, each stream with its own modulation/MLP.
def __init__(self, dim, num_attention_heads, num_kv_heads, multiple_of, ffn_dim_multiplier, norm_eps, dtype=None, device=None, operations=None):
super().__init__()
head_dim = dim // num_attention_heads
self.img_instruct_attn = BooguJointAttention(dim, head_dim, num_attention_heads, num_kv_heads, eps=1e-5, dtype=dtype, device=device, operations=operations)
self.img_self_attn = Attention(
query_dim=dim, dim_head=head_dim, heads=num_attention_heads, kv_heads=num_kv_heads,
eps=1e-5, bias=False, dtype=dtype, device=device, operations=operations,
)
self.img_feed_forward = LuminaFeedForward(dim=dim, inner_dim=4 * dim, multiple_of=multiple_of, dtype=dtype, device=device, operations=operations)
self.instruct_feed_forward = LuminaFeedForward(dim=dim, inner_dim=4 * dim, multiple_of=multiple_of, dtype=dtype, device=device, operations=operations)
self.img_norm1 = LuminaRMSNormZero(embedding_dim=dim, norm_eps=norm_eps, dtype=dtype, device=device, operations=operations)
self.img_norm2 = LuminaRMSNormZero(embedding_dim=dim, norm_eps=norm_eps, dtype=dtype, device=device, operations=operations)
self.img_norm3 = LuminaRMSNormZero(embedding_dim=dim, norm_eps=norm_eps, dtype=dtype, device=device, operations=operations)
self.instruct_norm1 = LuminaRMSNormZero(embedding_dim=dim, norm_eps=norm_eps, dtype=dtype, device=device, operations=operations)
self.instruct_norm2 = LuminaRMSNormZero(embedding_dim=dim, norm_eps=norm_eps, dtype=dtype, device=device, operations=operations)
self.img_attn_norm = operations.RMSNorm(dim, eps=norm_eps, dtype=dtype, device=device)
self.img_self_attn_norm = operations.RMSNorm(dim, eps=norm_eps, dtype=dtype, device=device)
self.img_ffn_norm1 = operations.RMSNorm(dim, eps=norm_eps, dtype=dtype, device=device)
self.img_ffn_norm2 = operations.RMSNorm(dim, eps=norm_eps, dtype=dtype, device=device)
self.instruct_attn_norm = operations.RMSNorm(dim, eps=norm_eps, dtype=dtype, device=device)
self.instruct_ffn_norm1 = operations.RMSNorm(dim, eps=norm_eps, dtype=dtype, device=device)
self.instruct_ffn_norm2 = operations.RMSNorm(dim, eps=norm_eps, dtype=dtype, device=device)
def forward(self, img_hidden_states, instruct_hidden_states, joint_rotary_emb, img_rotary_emb, temb, joint_attention_mask=None, img_attention_mask=None, transformer_options={}):
L_instruct = instruct_hidden_states.shape[1]
img_norm1_out, img_gate_msa, img_scale_mlp, img_gate_mlp = self.img_norm1(img_hidden_states, temb)
img_norm2_out, img_shift_mlp, _, _ = self.img_norm2(img_hidden_states, temb)
img_norm3_out, img_gate_self, _, _ = self.img_norm3(img_hidden_states, temb)
instruct_norm1_out, instruct_gate_msa, instruct_scale_mlp, instruct_gate_mlp = self.instruct_norm1(instruct_hidden_states, temb)
instruct_norm2_out, instruct_shift_mlp, _, _ = self.instruct_norm2(instruct_hidden_states, temb)
joint_attn_out = self.img_instruct_attn(img_norm1_out, instruct_norm1_out, joint_rotary_emb, joint_attention_mask, transformer_options=transformer_options)
instruct_attn_out = joint_attn_out[:, :L_instruct]
img_attn_out = joint_attn_out[:, L_instruct:]
img_self_attn_out = self.img_self_attn(img_norm3_out, img_norm3_out, img_attention_mask, img_rotary_emb, transformer_options=transformer_options)
img_hidden_states = img_hidden_states + img_gate_msa.unsqueeze(1).tanh() * self.img_attn_norm(img_attn_out)
img_hidden_states = img_hidden_states + img_gate_self.unsqueeze(1).tanh() * self.img_self_attn_norm(img_self_attn_out)
img_mlp_input = (1 + img_scale_mlp.unsqueeze(1)) * img_norm2_out + img_shift_mlp.unsqueeze(1)
img_mlp_out = self.img_feed_forward(self.img_ffn_norm1(img_mlp_input))
img_hidden_states = img_hidden_states + img_gate_mlp.unsqueeze(1).tanh() * self.img_ffn_norm2(img_mlp_out)
instruct_hidden_states = instruct_hidden_states + instruct_gate_msa.unsqueeze(1).tanh() * self.instruct_attn_norm(instruct_attn_out)
instruct_mlp_input = (1 + instruct_scale_mlp.unsqueeze(1)) * instruct_norm2_out + instruct_shift_mlp.unsqueeze(1)
instruct_mlp_out = self.instruct_feed_forward(self.instruct_ffn_norm1(instruct_mlp_input))
instruct_hidden_states = instruct_hidden_states + instruct_gate_mlp.unsqueeze(1).tanh() * self.instruct_ffn_norm2(instruct_mlp_out)
return img_hidden_states, instruct_hidden_states
class BooguTransformer2DModel(nn.Module):
def __init__(
self,
patch_size: int = 2,
in_channels: int = 16,
out_channels: Optional[int] = None,
hidden_size: int = 3360,
num_layers: int = 32,
num_double_stream_layers: int = 8,
num_refiner_layers: int = 2,
num_attention_heads: int = 28,
num_kv_heads: int = 7,
multiple_of: int = 256,
ffn_dim_multiplier: Optional[float] = None,
norm_eps: float = 1e-5,
axes_dim_rope: Tuple[int, int, int] = (40, 40, 40),
axes_lens: Tuple[int, int, int] = (2048, 1664, 1664),
instruction_feat_dim: int = 4096,
timestep_scale: float = 1000.0,
image_model=None,
device=None, dtype=None, operations=None,
):
super().__init__()
self.patch_size = patch_size
self.out_channels = out_channels or in_channels
self.hidden_size = hidden_size
self.dtype = dtype
self.rope_embedder = OmniGen2RotaryPosEmbed(
theta=10000,
axes_dim=axes_dim_rope,
axes_lens=axes_lens,
patch_size=patch_size,
)
self.x_embedder = operations.Linear(patch_size * patch_size * in_channels, hidden_size, dtype=dtype, device=device)
self.ref_image_patch_embedder = operations.Linear(patch_size * patch_size * in_channels, hidden_size, dtype=dtype, device=device)
self.time_caption_embed = Lumina2CombinedTimestepCaptionEmbedding(
hidden_size=hidden_size,
text_feat_dim=instruction_feat_dim,
norm_eps=norm_eps,
timestep_scale=timestep_scale, dtype=dtype, device=device, operations=operations
)
self.noise_refiner = nn.ModuleList([
OmniGen2TransformerBlock(hidden_size, num_attention_heads, num_kv_heads, multiple_of, ffn_dim_multiplier, norm_eps, modulation=True, dtype=dtype, device=device, operations=operations)
for _ in range(num_refiner_layers)
])
self.ref_image_refiner = nn.ModuleList([
OmniGen2TransformerBlock(hidden_size, num_attention_heads, num_kv_heads, multiple_of, ffn_dim_multiplier, norm_eps, modulation=True, dtype=dtype, device=device, operations=operations)
for _ in range(num_refiner_layers)
])
self.context_refiner = nn.ModuleList([
OmniGen2TransformerBlock(hidden_size, num_attention_heads, num_kv_heads, multiple_of, ffn_dim_multiplier, norm_eps, modulation=False, dtype=dtype, device=device, operations=operations)
for _ in range(num_refiner_layers)
])
self.double_stream_layers = nn.ModuleList([
BooguDoubleStreamBlock(hidden_size, num_attention_heads, num_kv_heads, multiple_of, ffn_dim_multiplier, norm_eps, dtype=dtype, device=device, operations=operations)
for _ in range(num_double_stream_layers)
])
self.single_stream_layers = nn.ModuleList([
OmniGen2TransformerBlock(hidden_size, num_attention_heads, num_kv_heads, multiple_of, ffn_dim_multiplier, norm_eps, modulation=True, dtype=dtype, device=device, operations=operations)
for _ in range(num_layers)
])
self.norm_out = LuminaLayerNormContinuous(
embedding_dim=hidden_size,
conditioning_embedding_dim=min(hidden_size, 1024),
elementwise_affine=False,
eps=1e-6,
out_dim=patch_size * patch_size * self.out_channels, dtype=dtype, device=device, operations=operations
)
self.image_index_embedding = nn.Parameter(torch.empty(5, hidden_size, device=device, dtype=dtype))
# Patchify/refine helpers are identical to OmniGen2; reuse via bound methods.
flat_and_pad_to_seq = comfy.ldm.omnigen.omnigen2.OmniGen2Transformer2DModel.flat_and_pad_to_seq
img_patch_embed_and_refine = comfy.ldm.omnigen.omnigen2.OmniGen2Transformer2DModel.img_patch_embed_and_refine
def forward(self, x, timesteps, context, num_tokens, ref_latents=None, attention_mask=None, transformer_options={}, **kwargs):
B, C, H, W = x.shape
hidden_states = comfy.ldm.common_dit.pad_to_patch_size(x, (self.patch_size, self.patch_size))
_, _, H_padded, W_padded = hidden_states.shape
timestep = 1.0 - timesteps
text_hidden_states = context
text_attention_mask = attention_mask
ref_image_hidden_states = ref_latents
device = hidden_states.device
temb, text_hidden_states = self.time_caption_embed(timestep, text_hidden_states, hidden_states[0].dtype)
(
hidden_states, ref_image_hidden_states,
img_mask, ref_img_mask,
l_effective_ref_img_len, l_effective_img_len,
ref_img_sizes, img_sizes,
) = self.flat_and_pad_to_seq(hidden_states, ref_image_hidden_states)
(
context_rotary_emb, ref_img_rotary_emb, noise_rotary_emb,
rotary_emb, encoder_seq_lengths, seq_lengths,
) = self.rope_embedder(
hidden_states.shape[0], text_hidden_states.shape[1], [num_tokens] * text_hidden_states.shape[0],
l_effective_ref_img_len, l_effective_img_len,
ref_img_sizes, img_sizes, device,
)
for layer in self.context_refiner:
text_hidden_states = layer(text_hidden_states, text_attention_mask, context_rotary_emb, transformer_options=transformer_options)
img_len = hidden_states.shape[1]
combined_img_hidden_states = self.img_patch_embed_and_refine(
hidden_states, ref_image_hidden_states,
img_mask, ref_img_mask,
noise_rotary_emb, ref_img_rotary_emb,
l_effective_ref_img_len, l_effective_img_len,
temb,
transformer_options=transformer_options,
)
# Double-stream stage: the image self-attention only sees the [ref ; noise] tokens,
# which sit after the instruction tokens in the joint rope.
L_instruct = text_hidden_states.shape[1]
combined_img_rotary_emb = rotary_emb[:, L_instruct:]
for layer in self.double_stream_layers:
combined_img_hidden_states, text_hidden_states = layer(
combined_img_hidden_states, text_hidden_states,
rotary_emb, combined_img_rotary_emb, temb,
joint_attention_mask=None, img_attention_mask=None,
transformer_options=transformer_options,
)
hidden_states = torch.cat([text_hidden_states, combined_img_hidden_states], dim=1)
for layer in self.single_stream_layers:
hidden_states = layer(hidden_states, None, rotary_emb, temb, transformer_options=transformer_options)
hidden_states = self.norm_out(hidden_states, temb)
p = self.patch_size
output = rearrange(hidden_states[:, -img_len:], 'b (h w) (p1 p2 c) -> b c (h p1) (w p2)', h=H_padded // p, w=W_padded // p, p1=p, p2=p)[:, :, :H, :W]
return -output

View File

@ -22,7 +22,7 @@ def apply_rotary_emb(x, freqs_cis):
def swiglu(x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
return F.silu(x) * y
return F.silu(x, inplace=True).mul_(y)
class TimestepEmbedding(nn.Module):

View File

@ -54,6 +54,7 @@ import comfy.ldm.pixeldit.model
import comfy.ldm.pixeldit.pid
import comfy.ldm.ace.model
import comfy.ldm.omnigen.omnigen2
import comfy.ldm.boogu.model
import comfy.ldm.qwen_image.model
import comfy.ldm.ideogram4.model
import comfy.ldm.kandinsky5.model
@ -2103,6 +2104,11 @@ class Omnigen2(BaseModel):
out['ref_latents'] = list([1, 16, sum(map(lambda a: math.prod(a.size()), ref_latents)) // 16])
return out
class Boogu(Omnigen2):
def __init__(self, model_config, model_type=ModelType.FLOW, device=None):
super(Omnigen2, self).__init__(model_config, model_type, device=device, unet_model=comfy.ldm.boogu.model.BooguTransformer2DModel)
self.memory_usage_factor_conds = ("ref_latents",)
class QwenImage(BaseModel):
def __init__(self, model_config, model_type=ModelType.FLUX, device=None):
super().__init__(model_config, model_type, device=device, unet_model=comfy.ldm.qwen_image.model.QwenImageTransformer2DModel)

View File

@ -761,6 +761,16 @@ def detect_unet_config(state_dict, key_prefix, metadata=None):
return dit_config
if '{}double_stream_layers.0.img_instruct_attn.processor.img_to_q.weight'.format(key_prefix) in state_dict_keys: # Boogu-Image (OmniGen2 derivative + dual-stream stage)
dit_config = {}
dit_config["image_model"] = "boogu"
dit_config["hidden_size"] = state_dict['{}x_embedder.weight'.format(key_prefix)].shape[0]
dit_config["num_layers"] = count_blocks(state_dict_keys, '{}single_stream_layers.'.format(key_prefix) + '{}.')
dit_config["num_double_stream_layers"] = count_blocks(state_dict_keys, '{}double_stream_layers.'.format(key_prefix) + '{}.')
dit_config["num_refiner_layers"] = count_blocks(state_dict_keys, '{}noise_refiner.'.format(key_prefix) + '{}.')
dit_config["instruction_feat_dim"] = state_dict['{}time_caption_embed.caption_embedder.0.weight'.format(key_prefix)].shape[0]
return dit_config
if '{}time_caption_embed.timestep_embedder.linear_1.bias'.format(key_prefix) in state_dict_keys: # Omnigen2
dit_config = {}
dit_config["image_model"] = "omnigen2"

View File

@ -68,6 +68,7 @@ import comfy.text_encoders.ace15
import comfy.text_encoders.longcat_image
import comfy.text_encoders.qwen35
import comfy.text_encoders.qwen3vl
import comfy.text_encoders.boogu
import comfy.text_encoders.ernie
import comfy.text_encoders.gemma4
import comfy.text_encoders.cogvideo
@ -1301,6 +1302,7 @@ class CLIPType(Enum):
LENS = 28
PIXELDIT = 29
IDEOGRAM4 = 30
BOOGU = 31
@ -1622,6 +1624,10 @@ def load_text_encoder_state_dicts(state_dicts=[], embedding_directory=None, clip
clip_data[0] = comfy.utils.state_dict_prefix_replace(clip_data[0], {"model.language_model.": "model.", "model.visual.": "visual.", "lm_head.": "model.lm_head."})
clip_target.clip = comfy.text_encoders.ideogram4.te_qwen3vl(**llama_detect(clip_data))
clip_target.tokenizer = comfy.text_encoders.ideogram4.Ideogram4Qwen3VLTokenizer
elif clip_type == CLIPType.BOOGU and te_model == TEModel.QWEN3VL_8B: # Boogu-Image: full Qwen3-VL-8B, last hidden state, no-think template.
clip_data[0] = comfy.utils.state_dict_prefix_replace(clip_data[0], {"model.language_model.": "model.", "model.visual.": "visual.", "lm_head.": "model.lm_head."})
clip_target.clip = comfy.text_encoders.boogu.te(**llama_detect(clip_data))
clip_target.tokenizer = comfy.text_encoders.boogu.BooguTokenizer
elif clip_type in (CLIPType.FLUX, CLIPType.FLUX2): # Flux2 Klein reuses the Qwen3-VL LM (3-layer tap -> 12288); visual unused.
klein_model_type = "qwen3_8b" if te_model == TEModel.QWEN3VL_8B else "qwen3_4b"
clip_target.clip = comfy.text_encoders.flux.klein_te(**llama_detect(clip_data), model_type=klein_model_type)

View File

@ -25,6 +25,7 @@ import comfy.text_encoders.hunyuan_image
import comfy.text_encoders.kandinsky5
import comfy.text_encoders.z_image
import comfy.text_encoders.ideogram4
import comfy.text_encoders.boogu
import comfy.text_encoders.anima
import comfy.text_encoders.ace15
import comfy.text_encoders.longcat_image
@ -1758,6 +1759,27 @@ class Omnigen2(supported_models_base.BASE):
hunyuan_detect = comfy.text_encoders.hunyuan_video.llama_detect(state_dict, "{}qwen25_3b.transformer.".format(pref))
return supported_models_base.ClipTarget(comfy.text_encoders.omnigen2.Omnigen2Tokenizer, comfy.text_encoders.omnigen2.te(**hunyuan_detect))
class Boogu(Omnigen2):
unet_config = {
"image_model": "boogu",
}
sampling_settings = {
"multiplier": 1.0,
"shift": 3.16,
}
memory_usage_factor = 2.15
def get_model(self, state_dict, prefix="", device=None):
out = model_base.Boogu(self, device=device)
return out
def clip_target(self, state_dict={}):
pref = self.text_encoder_key_prefix[0]
hunyuan_detect = comfy.text_encoders.hunyuan_video.llama_detect(state_dict, "{}qwen3vl_8b.transformer.".format(pref))
return supported_models_base.ClipTarget(comfy.text_encoders.boogu.BooguTokenizer, comfy.text_encoders.boogu.te(**hunyuan_detect))
class Ideogram4(supported_models_base.BASE):
unet_config = {
"image_model": "ideogram4",
@ -2300,6 +2322,7 @@ models = [
ACEStep,
ACEStep15,
Omnigen2,
Boogu,
QwenImage,
Ideogram4,
Flux2,

View File

@ -0,0 +1,58 @@
"""Boogu-Image text encoder: full Qwen3-VL-8B, last hidden state (4096-dim).
Boogu uses the final hidden state of Qwen3-VL as the per-token instruction feature
(num_instruction_feature_layers=1, reduce_type=mean -> just the last layer).
The model itself is the standard Qwen3-VL TE, only the chat template differs
(a fixed system prompt and no <think> block).
"""
import comfy.text_encoders.qwen3vl
from comfy import sd1_clip
# System prompts from the reference pipeline (pipeline_boogu.py).
# T2I (non-empty instruction, no image) uses the helpful-assistant prompt
# everything else (the CFG negative / "drop" condition, and any image case) uses the TI2I "describe" prompt.
BOOGU_T2I_SYSTEM = "You are a helpful assistant that generates high-quality images based on user instructions. The instructions are as follows."
BOOGU_DROP_SYSTEM = "Describe the key features of the input image (color, shape, size, texture, objects, background), then explain how the user's text instruction should alter or modify the image. Generate a new image that meets the user's requirements while maintaining consistency with the original input where appropriate."
class BooguTokenizer(comfy.text_encoders.qwen3vl.Qwen3VLTokenizer):
def __init__(self, embedding_directory=None, tokenizer_data={}):
super().__init__(embedding_directory=embedding_directory, tokenizer_data=tokenizer_data, model_type="qwen3vl_8b")
# apply_chat_template without add_generation_prompt
self.llama_template = "<|im_start|>system\n" + BOOGU_T2I_SYSTEM + "<|im_end|>\n<|im_start|>user\n{}<|im_end|>\n"
self.llama_template_images = "<|im_start|>system\n" + BOOGU_DROP_SYSTEM + "<|im_end|>\n<|im_start|>user\n<|vision_start|><|image_pad|><|vision_end|>{}<|im_end|>\n"
# Reference SYSTEM_PROMPT_DROP: used for the empty negative/uncond instruction.
self.llama_template_drop = "<|im_start|>system\n" + BOOGU_DROP_SYSTEM + "<|im_end|>\n<|im_start|>user\n{}<|im_end|>\n"
def tokenize_with_weights(self, text, return_word_ids=False, llama_template=None, images=[], prevent_empty_text=False, thinking=True, **kwargs):
if llama_template is None and len(images) == 0 and text.strip() == "":
llama_template = self.llama_template_drop
# Boogu conditions on the no-think template; thinking=True drops the empty <think> block qwen3vl adds by default.
return super().tokenize_with_weights(text, return_word_ids=return_word_ids, llama_template=llama_template, images=images, prevent_empty_text=prevent_empty_text, thinking=thinking, **kwargs)
class BooguQwen3VLClipModel(comfy.text_encoders.qwen3vl.Qwen3VLClipModel):
def __init__(self, device="cpu", dtype=None, attention_mask=True, model_options={}, model_type="qwen3vl_8b"):
super().__init__(device=device, dtype=dtype, attention_mask=attention_mask, model_options=model_options, model_type=model_type)
# apply the final RMSNorm to the tapped last layer
self.layer_norm_hidden_state = True
class BooguTEModel(sd1_clip.SD1ClipModel):
def __init__(self, device="cpu", dtype=None, model_options={}):
clip_model = lambda **kw: BooguQwen3VLClipModel(**kw, model_type="qwen3vl_8b")
super().__init__(device=device, dtype=dtype, name="qwen3vl_8b", clip_model=clip_model, model_options=model_options)
def te(dtype_llama=None, llama_quantization_metadata=None):
class BooguTEModel_(BooguTEModel):
def __init__(self, device="cpu", dtype=None, model_options={}):
if dtype_llama is not None:
dtype = dtype_llama
if llama_quantization_metadata is not None:
model_options = model_options.copy()
model_options["quantization_metadata"] = llama_quantization_metadata
super().__init__(device=device, dtype=dtype, model_options=model_options)
return BooguTEModel_

View File

@ -25,6 +25,11 @@ CLI_FEATURE_FLAG_REGISTRY: dict[str, FeatureFlagInfo] = {
"default": False,
"description": "Show the sign-in button in the frontend even when not signed in",
},
"enable_telemetry": {
"type": "bool",
"default": False,
"description": "Signal the frontend that telemetry collection is enabled",
},
}

View File

@ -149,3 +149,59 @@ class MotionControlRequest(BaseModel):
character_orientation: str = Field(...)
mode: str = Field(..., description="'pro' or 'std'")
model_name: str = Field(...)
class Kling3TurboSettings(BaseModel):
resolution: str = Field("720p", description="'720p' or '1080p'")
aspect_ratio: str | None = Field(None, description="'16:9'/'9:16'/'1:1'; text-to-video only")
duration: int = Field(5, description="3-15 second")
class Kling3TurboText2VideoRequest(BaseModel):
prompt: str = Field(..., description="<=3072 chars; may use multi-shot 'shot n, m, words; ...'")
settings: Kling3TurboSettings | None = Field(None)
class Kling3TurboContent(BaseModel):
type: str = Field(..., description="'prompt' or 'first_frame'")
text: str | None = Field(None, description="for type=prompt; <=2500 chars")
url: str | None = Field(None, description="for type=first_frame")
class Kling3TurboImage2VideoRequest(BaseModel):
contents: list[Kling3TurboContent] = Field(..., description="prompt + first_frame materials")
settings: Kling3TurboSettings | None = Field(None)
class Kling3TurboCreateData(BaseModel):
id: str | None = Field(None, description="Task ID")
status: str | None = Field(None)
message: str | None = Field(None)
class Kling3TurboCreateResponse(BaseModel):
code: int | None = Field(None)
message: str | None = Field(None)
request_id: str | None = Field(None)
data: Kling3TurboCreateData | None = Field(None)
class Kling3TurboOutput(BaseModel):
type: str | None = Field(None, description="'video', 'image', 'audio', ...")
id: str | None = Field(None)
url: str | None = Field(None)
duration: str | None = Field(None)
class Kling3TurboTaskData(BaseModel):
id: str | None = Field(None)
status: str | None = Field(None, description="submitted | processing | succeeded | failed")
message: str | None = Field(None)
outputs: list[Kling3TurboOutput] | None = Field(None)
class Kling3TurboQueryResponse(BaseModel):
code: int | None = Field(None)
message: str | None = Field(None)
request_id: str | None = Field(None)
data: list[Kling3TurboTaskData] | None = Field(None)

View File

@ -60,6 +60,12 @@ from comfy_api_nodes.apis.kling import (
OmniProImageRequest,
OmniProReferences2VideoRequest,
OmniProText2VideoRequest,
Kling3TurboSettings,
Kling3TurboText2VideoRequest,
Kling3TurboContent,
Kling3TurboImage2VideoRequest,
Kling3TurboCreateResponse,
Kling3TurboQueryResponse,
TaskStatusResponse,
TextToVideoWithAudioRequest,
)
@ -2847,6 +2853,67 @@ class MotionControl(IO.ComfyNode):
return IO.NodeOutput(await download_url_to_video_output(final_response.data.task_result.videos[0].url))
def build_turbo_shot_prompt(multi_prompt: list[MultiPromptEntry]) -> str:
"""Render storyboard entries into the Turbo multi-shot prompt 'shot n, m, words; ...'."""
return "; ".join(f"shot {i}, {int(e.duration)}, {e.prompt}" for i, e in enumerate(multi_prompt, 1)) + ";"
def _turbo_video_url(response: Kling3TurboQueryResponse) -> str:
"""Extract the result video URL from a /tasks response (data[].outputs[] where type == 'video')."""
task = response.data[0] if response.data else None
if task and task.outputs:
for output in task.outputs:
if output.type == "video" and output.url:
return output.url
raise RuntimeError(f"Kling 3.0 Turbo task finished without a video output: {response.model_dump()}")
async def execute_kling_turbo(
cls: type[IO.ComfyNode],
*,
prompt: str,
resolution: str,
aspect_ratio: str,
duration: int,
start_frame: torch.Tensor | None,
) -> IO.NodeOutput:
"""Create + poll a Kling 3.0 Turbo task. Image-to-video when start_frame is given, else text-to-video."""
if start_frame is not None:
validate_image_dimensions(start_frame, min_width=300, min_height=300)
validate_image_aspect_ratio(start_frame, (1, 2.5), (2.5, 1))
contents = [Kling3TurboContent(type="first_frame", url=tensor_to_base64_string(start_frame))]
if prompt:
contents.insert(0, Kling3TurboContent(type="prompt", text=prompt))
create = await sync_op(
cls,
ApiEndpoint(path="/proxy/kling/image-to-video/kling-3.0-turbo", method="POST"),
response_model=Kling3TurboCreateResponse,
data=Kling3TurboImage2VideoRequest(
contents=contents,
settings=Kling3TurboSettings(resolution=resolution, duration=duration), # i2v: no aspect_ratio
),
)
else:
create = await sync_op(
cls,
ApiEndpoint(path="/proxy/kling/text-to-video/kling-3.0-turbo", method="POST"),
response_model=Kling3TurboCreateResponse,
data=Kling3TurboText2VideoRequest(
prompt=prompt,
settings=Kling3TurboSettings(resolution=resolution, aspect_ratio=aspect_ratio, duration=duration),
),
)
if not (create.data and create.data.id):
raise RuntimeError(f"Kling 3.0 Turbo create failed. Code: {create.code}, Message: {create.message}")
final_response = await poll_op(
cls,
ApiEndpoint(path="/proxy/kling/tasks", query_params={"task_ids": create.data.id}),
response_model=Kling3TurboQueryResponse,
status_extractor=lambda r: (r.data[0].status if r.data else None),
)
return IO.NodeOutput(await download_url_to_video_output(_turbo_video_url(final_response)))
class KlingVideoNode(IO.ComfyNode):
@classmethod
@ -2884,7 +2951,11 @@ class KlingVideoNode(IO.ComfyNode):
],
tooltip="Generate a series of video segments with individual prompts and durations.",
),
IO.Boolean.Input("generate_audio", default=True),
IO.Boolean.Input(
"generate_audio",
default=True,
tooltip="'kling-3.0-turbo' always generates native audio, so the audio toggle is ignored.",
),
IO.DynamicCombo.Input(
"model",
options=[
@ -2899,6 +2970,17 @@ class KlingVideoNode(IO.ComfyNode):
),
],
),
IO.DynamicCombo.Option(
"kling-3.0-turbo",
[
IO.Combo.Input("resolution", options=["1080p", "720p"], default="720p"),
IO.Combo.Input(
"aspect_ratio",
options=["16:9", "9:16", "1:1"],
tooltip="Ignored in image-to-video mode.",
),
],
),
],
tooltip="Model and generation settings.",
),
@ -2930,6 +3012,7 @@ class KlingVideoNode(IO.ComfyNode):
price_badge=IO.PriceBadge(
depends_on=IO.PriceBadgeDepends(
widgets=[
"model",
"model.resolution",
"generate_audio",
"multi_shot",
@ -2944,14 +3027,7 @@ class KlingVideoNode(IO.ComfyNode):
),
expr="""
(
$rates := {
"4k": {"off": 0.42, "on": 0.42},
"1080p": {"off": 0.112, "on": 0.168},
"720p": {"off": 0.084, "on": 0.126}
};
$res := $lookup(widgets, "model.resolution");
$audio := widgets.generate_audio ? "on" : "off";
$rate := $lookup($lookup($rates, $res), $audio);
$ms := widgets.multi_shot;
$isSb := $ms != "disabled";
$n := $isSb ? $number($substring($ms, 0, 1)) : 0;
@ -2962,7 +3038,18 @@ class KlingVideoNode(IO.ComfyNode):
$d5 := $n >= 5 ? $lookup(widgets, "multi_shot.storyboard_5_duration") : 0;
$d6 := $n >= 6 ? $lookup(widgets, "multi_shot.storyboard_6_duration") : 0;
$dur := $isSb ? $d1 + $d2 + $d3 + $d4 + $d5 + $d6 : $lookup(widgets, "multi_shot.duration");
{"type":"usd","usd": $rate * $dur}
widgets.model = "kling-3.0-turbo"
? {"type":"usd","usd": ($res = "1080p" ? 0.14 : 0.112) * $dur}
: (
$rates := {
"4k": {"off": 0.42, "on": 0.42},
"1080p": {"off": 0.112, "on": 0.168},
"720p": {"off": 0.084, "on": 0.126}
};
$audio := widgets.generate_audio ? "on" : "off";
$rate := $lookup($lookup($rates, $res), $audio);
{"type":"usd","usd": $rate * $dur}
)
)
""",
),
@ -3015,6 +3102,17 @@ class KlingVideoNode(IO.ComfyNode):
duration = multi_shot["duration"]
validate_string(multi_shot["prompt"], min_length=1, max_length=2500)
if model["model"] == "kling-3.0-turbo":
turbo_prompt = build_turbo_shot_prompt(multi_prompt_list) if custom_multi_shot else multi_shot["prompt"]
return await execute_kling_turbo(
cls,
prompt=turbo_prompt,
resolution=model["resolution"],
aspect_ratio=model["aspect_ratio"],
duration=duration,
start_frame=start_frame,
)
if start_frame is not None:
validate_image_dimensions(start_frame, min_width=300, min_height=300)
validate_image_aspect_ratio(start_frame, (1, 2.5), (2.5, 1))

View File

@ -0,0 +1,97 @@
import math
import node_helpers
import comfy.utils
from typing_extensions import override
from comfy_api.latest import ComfyExtension, io
class TextEncodeBooguEdit(io.ComfyNode):
"""Boogu-Image Edit conditioning.
The edit image is used twice, matching the reference pipeline:
- Qwen3-VL vision tokens (instruction understanding) -> positive only
- VAE reference latent (image identity) -> positive and negative
The ref latent is in both conds so it cancels under CFG (identity preserved);
the vision tokens are only in the positive so CFG amplifies the instruction.
The tokenizer selects the right system prompt automatically (image -> TI2I,
empty negative -> DROP), so no template plumbing is needed here.
"""
@classmethod
def define_schema(cls):
return io.Schema(
node_id="TextEncodeBooguEdit",
category="model/conditioning/boogu",
inputs=[
io.Clip.Input("clip"),
io.String.Input("prompt", multiline=True, dynamic_prompts=True),
io.String.Input("negative_prompt", multiline=True, dynamic_prompts=True, advanced=True),
io.Vae.Input("vae"),
io.Autogrow.Input(
"images",
template=io.Autogrow.TemplateNames(
io.Image.Input("image"),
names=[f"image_{i}" for i in range(1, 17)],
min=0,
),
tooltip="Reference image(s) to edit. Boogu focuses on one reference per sample; more are allowed.",
),
],
outputs=[
io.Conditioning.Output(display_name="positive"),
io.Conditioning.Output(display_name="negative"),
],
)
@classmethod
def execute(cls, clip, prompt, negative_prompt, vae=None, images: io.Autogrow.Type = None) -> io.NodeOutput:
ref_latents = []
images_vl = []
images = images or {}
for name in sorted(images, key=lambda n: int(n.rsplit("_", 1)[-1])):
image = images[name]
if image is None:
continue
samples = image.movedim(-1, 1)
# Vision tower input: the reference caps the VLM image at 384x384
# (max_vlm_input_pil_pixels in pipeline_boogu.py).
total = int(384 * 384)
scale_by = math.sqrt(total / (samples.shape[3] * samples.shape[2]))
width = round(samples.shape[3] * scale_by)
height = round(samples.shape[2] * scale_by)
s = comfy.utils.common_upscale(samples, width, height, "area", "disabled")
images_vl.append(s.movedim(1, -1)[:, :, :, :3])
# Reference latent: align to 16 px (VAE /8 * patch_size 2).
if vae is not None:
total = int(1024 * 1024)
scale_by = math.sqrt(total / (samples.shape[3] * samples.shape[2]))
width = round(samples.shape[3] * scale_by / 16.0) * 16
height = round(samples.shape[2] * scale_by / 16.0) * 16
s = comfy.utils.common_upscale(samples, width, height, "area", "disabled")
ref_latents.append(vae.encode(s.movedim(1, -1)[:, :, :, :3]))
# positive: instruction + vision tokens; negative: empty (no vision). Ref latent on both.
positive = clip.encode_from_tokens_scheduled(clip.tokenize(prompt, images=images_vl))
negative = clip.encode_from_tokens_scheduled(clip.tokenize(negative_prompt))
if len(ref_latents) > 0:
positive = node_helpers.conditioning_set_values(positive, {"reference_latents": ref_latents}, append=True)
negative = node_helpers.conditioning_set_values(negative, {"reference_latents": ref_latents}, append=True)
return io.NodeOutput(positive, negative)
class BooguExtension(ComfyExtension):
@override
async def get_node_list(self) -> list[type[io.ComfyNode]]:
return [
TextEncodeBooguEdit,
]
async def comfy_entrypoint() -> BooguExtension:
return BooguExtension()

View File

@ -969,7 +969,7 @@ class CLIPLoader:
@classmethod
def INPUT_TYPES(s):
return {"required": { "clip_name": (folder_paths.get_filename_list("text_encoders"), ),
"type": (["stable_diffusion", "stable_cascade", "sd3", "stable_audio", "mochi", "ltxv", "pixart", "cosmos", "lumina2", "wan", "hidream", "chroma", "ace", "omnigen2", "qwen_image", "hunyuan_image", "flux2", "ovis", "longcat_image", "cogvideox", "lens", "pixeldit", "ideogram4"], ),
"type": (["stable_diffusion", "stable_cascade", "sd3", "stable_audio", "mochi", "ltxv", "pixart", "cosmos", "lumina2", "wan", "hidream", "chroma", "ace", "omnigen2", "qwen_image", "hunyuan_image", "flux2", "ovis", "longcat_image", "cogvideox", "lens", "pixeldit", "ideogram4", "boogu"], ),
},
"optional": {
"device": (["default", "cpu"], {"advanced": True}),
@ -2425,6 +2425,7 @@ async def init_builtin_extra_nodes():
"nodes_tcfg.py",
"nodes_context_windows.py",
"nodes_qwen.py",
"nodes_boogu.py",
"nodes_chroma_radiance.py",
"nodes_pid.py",
"nodes_model_patch.py",

View File

@ -673,46 +673,32 @@ components:
- created_at
- updated_at
type: object
JobsBatchCancelNotFoundResponse:
description: |
Returned with 404 from POST /api/jobs/cancel when one or more
requested job ids are unknown. The batch is fail-fast, so no job
was cancelled.
properties:
error:
description: Human-readable error message
type: string
unknown_ids:
description: The subset of requested job ids that were not found
items:
type: string
type: array
required:
- error
- unknown_ids
type: object
JobsBatchCancelRequest:
JobsCancelRequest:
additionalProperties: false
description: Request body for batch job cancellation
description: Request to cancel multiple jobs by ID.
properties:
job_ids:
description: Ids (UUIDs) of the jobs to cancel
description: Job identifiers (UUIDs) to cancel.
items:
format: uuid
type: string
maxItems: 100
minItems: 1
type: array
required:
- job_ids
type: object
JobsBatchCancelResponse:
description: Response for POST /api/jobs/cancel when all requested jobs were known.
JobsCancelResponse:
description: Response for POST /api/jobs/cancel.
properties:
cancelled:
description: |
True when a cancel event was dispatched for at least one job in
the batch. False when every requested job was already in a
terminal state (the call is still 200 — idempotent).
type: boolean
Job IDs for which a cancel event was successfully dispatched by this
call. Jobs already in a terminal or cancelling state are idempotently
skipped and will not appear here.
items:
type: string
type: array
required:
- cancelled
type: object
@ -1049,7 +1035,7 @@ components:
description: If true, clear all pending jobs from the queue
type: boolean
delete:
description: Array of PENDING job IDs to cancel
description: Array of job IDs to cancel; pending and running jobs transition to cancelled
items:
type: string
type: array
@ -1865,6 +1851,83 @@ paths:
summary: Update asset metadata
tags:
- file
/api/assets/{id}/content:
get:
description: |
Returns the binary content of an asset by ID.
The contract is the same across runtimes — "GET this path and you
receive the asset's bytes" — but the mechanism differs:
- **Local ComfyUI** streams the bytes directly (`200`,
`application/octet-stream`).
- **Cloud** does not proxy large files; it responds `302` with a
`Location` redirect to a short-lived signed storage URL. Clients that
follow redirects (browsers, `fetch`/XHR, `<img>`/`<video>`) receive
the bytes transparently.
Prefer this over the filename-addressed `/api/view` when you have an
asset ID.
operationId: getAssetContent
parameters:
- description: Asset ID
in: path
name: id
required: true
schema:
type: string
- description: |
Content-Disposition for the response: `attachment` (download) or
`inline` (render in browser). Defaults to `attachment`.
in: query
name: disposition
schema:
default: attachment
enum:
- inline
- attachment
type: string
responses:
"200":
content:
application/octet-stream:
schema:
format: binary
type: string
description: Asset content stream (local runtime streams the bytes directly)
"302":
description: Redirect to a signed storage URL (cloud runtime)
headers:
Cache-Control:
description: Private caching directive scoped to the signed URL lifetime
schema:
type: string
Location:
description: Short-lived signed URL to the asset content in storage
schema:
type: string
Vary:
description: Partitions any cached redirect by auth credentials so a private redirect is not reused across users
schema:
type: string
"404":
content:
application/json:
schema:
$ref: '#/components/schemas/ErrorResponse'
description: Asset not found
"500":
content:
application/json:
schema:
$ref: '#/components/schemas/ErrorResponse'
description: Internal server error
security:
- ApiKeyAuth: []
- BearerAuth: []
- CookieAuth: []
summary: Get asset content
tags:
- file
/api/assets/{id}/tags:
delete:
description: Removes one or more tags from an existing asset
@ -2718,14 +2781,20 @@ paths:
summary: Get internationalisation translation strings
/api/interrupt:
post:
deprecated: true
description: |
Cancel all currently RUNNING jobs for the authenticated user.
This will interrupt any job that is currently in 'in_progress' status.
Note: This endpoint only affects running jobs. To cancel pending jobs, use /api/queue.
Deprecated. Prefer the jobs-namespace cancel endpoints:
POST /api/jobs/{job_id}/cancel for a single job, or
POST /api/jobs/cancel to cancel jobs by ID.
Cancels the first active job for the authenticated user (the currently
running job if there is one, otherwise the next pending job). Takes no
body and cannot target a specific job — use the jobs-namespace endpoints
for that.
operationId: interruptJob
responses:
"200":
description: Success - Job interrupted or no running job found
description: Success - first active job cancelled, or no active job found
"401":
content:
application/json:
@ -2738,7 +2807,7 @@ paths:
schema:
$ref: '#/components/schemas/ErrorResponse'
description: Internal server error
summary: Interrupt currently running jobs
summary: Interrupt the first active job
tags:
- queue
/api/job/{job_id}/status:
@ -2898,56 +2967,6 @@ paths:
summary: List jobs with pagination and filtering
tags:
- workflow
/api/jobs/cancel:
post:
description: |
Cancel a batch of jobs by id, regardless of each job's state.
Fail-fast: if any provided id is unknown (not running, pending, or
present in history) the request returns 404 and no job is cancelled,
so the call has no partial side effects. When every id is known, all
jobs are cancelled and the call returns 200.
operationId: cancelJobs
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/JobsBatchCancelRequest'
required: true
responses:
"200":
content:
application/json:
schema:
$ref: '#/components/schemas/JobsBatchCancelResponse'
description: Success - All requested jobs were cancelled (or were already terminal)
"400":
content:
application/json:
schema:
$ref: '#/components/schemas/ErrorResponse'
description: Bad Request - body is not valid JSON or job_ids is missing/not a list
"401":
content:
application/json:
schema:
$ref: '#/components/schemas/ErrorResponse'
description: Unauthorized - Authentication required
"404":
content:
application/json:
schema:
$ref: '#/components/schemas/JobsBatchCancelNotFoundResponse'
description: Not Found - one or more job ids are unknown; no job was cancelled
"500":
content:
application/json:
schema:
$ref: '#/components/schemas/ErrorResponse'
description: Internal server error - cancellation failed
summary: Cancel a batch of jobs
tags:
- workflow
/api/jobs/{job_id}:
get:
description: |
@ -3047,6 +3066,64 @@ paths:
summary: Cancel a job
tags:
- workflow
/api/jobs/cancel:
post:
description: |
Cancel one or more jobs for the authenticated user in a single request.
State-agnostic: cancels both pending and running jobs (both transition to
the cancelled state via the same mechanism as the single-job endpoint).
Idempotent per job: a job already in a terminal or cancelling state is a
no-op and simply will not appear in the returned `cancelled` list.
Fail-fast on unknown IDs: if any provided job ID does not exist for this
user, the request returns 404 and no jobs are cancelled. This surfaces
bad IDs to the caller rather than silently dropping them.
This is the canonical batch-cancel endpoint. The delete operation on
POST /api/queue is deprecated in favour of this.
operationId: cancelJobs
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/JobsCancelRequest'
required: true
responses:
"200":
content:
application/json:
schema:
$ref: '#/components/schemas/JobsCancelResponse'
description: Success - cancel requests dispatched (or jobs were already terminal)
"400":
content:
application/json:
schema:
$ref: '#/components/schemas/ErrorResponse'
description: Bad Request - job_ids is missing, empty, exceeds the maximum count, or contains an invalid UUID
"401":
content:
application/json:
schema:
$ref: '#/components/schemas/ErrorResponse'
description: Unauthorized - Authentication required
"404":
content:
application/json:
schema:
$ref: '#/components/schemas/ErrorResponse'
description: One or more job IDs not found for this user (no jobs cancelled)
"500":
content:
application/json:
schema:
$ref: '#/components/schemas/ErrorResponse'
description: Internal server error - cancellation failed
summary: Cancel multiple jobs
tags:
- workflow
/api/node_replacements:
get:
description: |
@ -3197,9 +3274,18 @@ paths:
tags:
- queue
post:
deprecated: true
description: |
Cancel specific PENDING jobs by ID or clear all pending jobs in the queue.
Note: This endpoint only affects pending jobs. To cancel running jobs, use /api/interrupt.
Deprecated. Prefer the jobs-namespace cancel endpoints:
POST /api/jobs/cancel for cancelling jobs by ID, and
POST /api/jobs/{job_id}/cancel for a single job.
Cancel specific jobs by ID (the `delete` field) or clear all pending
jobs in the queue (the `clear` field). Despite the `delete` naming, this
does not delete anything — listed jobs transition to the cancelled state,
and `delete` cancels both pending and running jobs (not pending-only as
previously documented). Job-by-ID cancellation is superseded by
POST /api/jobs/cancel; `clear` has no jobs-namespace replacement yet.
operationId: manageQueue
requestBody:
content: