diff --git a/comfy/context_windows.py b/comfy/context_windows.py index b54f7f39a..cb44ee6e8 100644 --- a/comfy/context_windows.py +++ b/comfy/context_windows.py @@ -93,6 +93,50 @@ class IndexListCallbacks: return {} +def slice_cond(cond_value, window: IndexListContextWindow, x_in: torch.Tensor, device, temporal_dim: int, temporal_scale: int=1, temporal_offset: int=0, retain_index_list: list[int]=[]): + if not (hasattr(cond_value, "cond") and isinstance(cond_value.cond, torch.Tensor)): + return None + cond_tensor = cond_value.cond + if temporal_dim >= cond_tensor.ndim: + return None + + cond_size = cond_tensor.size(temporal_dim) + + if temporal_scale == 1: + expected_size = x_in.size(window.dim) - temporal_offset + if cond_size != expected_size: + return None + + if temporal_offset == 0 and temporal_scale == 1: + sliced = window.get_tensor(cond_tensor, device, dim=temporal_dim, retain_index_list=retain_index_list) + return cond_value._copy_with(sliced) + + # skip leading latent positions that have no corresponding conditioning (e.g. reference frames) + if temporal_offset > 0: + indices = [i - temporal_offset for i in window.index_list[temporal_offset:]] + indices = [i for i in indices if 0 <= i] + else: + indices = list(window.index_list) + + if not indices: + return None + + if temporal_scale > 1: + scaled = [] + for i in indices: + for k in range(temporal_scale): + si = i * temporal_scale + k + if si < cond_size: + scaled.append(si) + indices = scaled + if not indices: + return None + + idx = tuple([slice(None)] * temporal_dim + [indices]) + sliced = cond_tensor[idx].to(device) + return cond_value._copy_with(sliced) + + @dataclass class ContextSchedule: name: str @@ -177,10 +221,17 @@ class IndexListContextHandler(ContextHandlerABC): new_cond_item[cond_key] = result handled = True break + if not handled and self._model is not None: + result = self._model.resize_cond_for_context_window( + cond_key, cond_value, window, x_in, device, + retain_index_list=self.cond_retain_index_list) + if result is not None: + new_cond_item[cond_key] = result + handled = True if handled: continue if isinstance(cond_value, torch.Tensor): - if (self.dim < cond_value.ndim and cond_value(self.dim) == x_in.size(self.dim)) or \ + if (self.dim < cond_value.ndim and cond_value.size(self.dim) == x_in.size(self.dim)) or \ (cond_value.ndim < self.dim and cond_value.size(0) == x_in.size(self.dim)): new_cond_item[cond_key] = window.get_tensor(cond_value, device) # Handle audio_embed (temporal dim is 1) @@ -224,6 +275,7 @@ class IndexListContextHandler(ContextHandlerABC): return context_windows def execute(self, calc_cond_batch: Callable, model: BaseModel, conds: list[list[dict]], x_in: torch.Tensor, timestep: torch.Tensor, model_options: dict[str]): + self._model = model self.set_step(timestep, model_options) context_windows = self.get_context_windows(model, x_in, model_options) enumerated_context_windows = list(enumerate(context_windows)) diff --git a/comfy/ldm/lightricks/vae/causal_video_autoencoder.py b/comfy/ldm/lightricks/vae/causal_video_autoencoder.py index 1a15cafd0..998122c85 100644 --- a/comfy/ldm/lightricks/vae/causal_video_autoencoder.py +++ b/comfy/ldm/lightricks/vae/causal_video_autoencoder.py @@ -536,6 +536,53 @@ class Decoder(nn.Module): c, (ts, hs, ws), to = self._output_scale return (input_shape[0], c, input_shape[2] * ts - to, input_shape[3] * hs, input_shape[4] * ws) + def run_up(self, idx, sample_ref, ended, timestep_shift_scale, scaled_timestep, checkpoint_fn, output_buffer, output_offset, max_chunk_size): + sample = sample_ref[0] + sample_ref[0] = None + if idx >= len(self.up_blocks): + sample = self.conv_norm_out(sample) + if timestep_shift_scale is not None: + shift, scale = timestep_shift_scale + sample = sample * (1 + scale) + shift + sample = self.conv_act(sample) + if ended: + mark_conv3d_ended(self.conv_out) + sample = self.conv_out(sample, causal=self.causal) + if sample is not None and sample.shape[2] > 0: + sample = unpatchify(sample, patch_size_hw=self.patch_size, patch_size_t=1) + t = sample.shape[2] + output_buffer[:, :, output_offset[0]:output_offset[0] + t].copy_(sample) + output_offset[0] += t + return + + up_block = self.up_blocks[idx] + if ended: + mark_conv3d_ended(up_block) + if self.timestep_conditioning and isinstance(up_block, UNetMidBlock3D): + sample = checkpoint_fn(up_block)( + sample, causal=self.causal, timestep=scaled_timestep + ) + else: + sample = checkpoint_fn(up_block)(sample, causal=self.causal) + + if sample is None or sample.shape[2] == 0: + return + + total_bytes = sample.numel() * sample.element_size() + num_chunks = (total_bytes + max_chunk_size - 1) // max_chunk_size + + if num_chunks == 1: + # when we are not chunking, detach our x so the callee can free it as soon as they are done + next_sample_ref = [sample] + del sample + self.run_up(idx + 1, next_sample_ref, ended, timestep_shift_scale, scaled_timestep, checkpoint_fn, output_buffer, output_offset, max_chunk_size) + return + else: + samples = torch.chunk(sample, chunks=num_chunks, dim=2) + + for chunk_idx, sample1 in enumerate(samples): + self.run_up(idx + 1, [sample1], ended and chunk_idx == len(samples) - 1, timestep_shift_scale, scaled_timestep, checkpoint_fn, output_buffer, output_offset, max_chunk_size) + def forward_orig( self, sample: torch.FloatTensor, @@ -555,6 +602,7 @@ class Decoder(nn.Module): ) timestep_shift_scale = None + scaled_timestep = None if self.timestep_conditioning: assert ( timestep is not None @@ -591,54 +639,7 @@ class Decoder(nn.Module): max_chunk_size = get_max_chunk_size(sample.device) - def run_up(idx, sample_ref, ended): - sample = sample_ref[0] - sample_ref[0] = None - if idx >= len(self.up_blocks): - sample = self.conv_norm_out(sample) - if timestep_shift_scale is not None: - shift, scale = timestep_shift_scale - sample = sample * (1 + scale) + shift - sample = self.conv_act(sample) - if ended: - mark_conv3d_ended(self.conv_out) - sample = self.conv_out(sample, causal=self.causal) - if sample is not None and sample.shape[2] > 0: - sample = unpatchify(sample, patch_size_hw=self.patch_size, patch_size_t=1) - t = sample.shape[2] - output_buffer[:, :, output_offset[0]:output_offset[0] + t].copy_(sample) - output_offset[0] += t - return - - up_block = self.up_blocks[idx] - if (ended): - mark_conv3d_ended(up_block) - if self.timestep_conditioning and isinstance(up_block, UNetMidBlock3D): - sample = checkpoint_fn(up_block)( - sample, causal=self.causal, timestep=scaled_timestep - ) - else: - sample = checkpoint_fn(up_block)(sample, causal=self.causal) - - if sample is None or sample.shape[2] == 0: - return - - total_bytes = sample.numel() * sample.element_size() - num_chunks = (total_bytes + max_chunk_size - 1) // max_chunk_size - - if num_chunks == 1: - # when we are not chunking, detach our x so the callee can free it as soon as they are done - next_sample_ref = [sample] - del sample - run_up(idx + 1, next_sample_ref, ended) - return - else: - samples = torch.chunk(sample, chunks=num_chunks, dim=2) - - for chunk_idx, sample1 in enumerate(samples): - run_up(idx + 1, [sample1], ended and chunk_idx == len(samples) - 1) - - run_up(0, [sample], True) + self.run_up(0, [sample], True, timestep_shift_scale, scaled_timestep, checkpoint_fn, output_buffer, output_offset, max_chunk_size) return output_buffer diff --git a/comfy/ldm/wan/vae.py b/comfy/ldm/wan/vae.py index a96b83c6c..deeb8695b 100644 --- a/comfy/ldm/wan/vae.py +++ b/comfy/ldm/wan/vae.py @@ -360,6 +360,43 @@ class Decoder3d(nn.Module): RMS_norm(out_dim, images=False), nn.SiLU(), CausalConv3d(out_dim, output_channels, 3, padding=1)) + def run_up(self, layer_idx, x_ref, feat_cache, feat_idx, out_chunks): + x = x_ref[0] + x_ref[0] = None + if layer_idx >= len(self.upsamples): + for layer in self.head: + if isinstance(layer, CausalConv3d) and feat_cache is not None: + cache_x = x[:, :, -CACHE_T:, :, :] + x = layer(x, feat_cache[feat_idx[0]]) + feat_cache[feat_idx[0]] = cache_x + feat_idx[0] += 1 + else: + x = layer(x) + out_chunks.append(x) + return + + layer = self.upsamples[layer_idx] + if isinstance(layer, Resample) and layer.mode == 'upsample3d' and x.shape[2] > 1: + for frame_idx in range(x.shape[2]): + self.run_up( + layer_idx, + [x[:, :, frame_idx:frame_idx + 1, :, :]], + feat_cache, + feat_idx.copy(), + out_chunks, + ) + del x + return + + if feat_cache is not None: + x = layer(x, feat_cache, feat_idx) + else: + x = layer(x) + + next_x_ref = [x] + del x + self.run_up(layer_idx + 1, next_x_ref, feat_cache, feat_idx, out_chunks) + def forward(self, x, feat_cache=None, feat_idx=[0]): ## conv1 if feat_cache is not None: @@ -380,42 +417,7 @@ class Decoder3d(nn.Module): out_chunks = [] - def run_up(layer_idx, x_ref, feat_idx): - x = x_ref[0] - x_ref[0] = None - if layer_idx >= len(self.upsamples): - for layer in self.head: - if isinstance(layer, CausalConv3d) and feat_cache is not None: - cache_x = x[:, :, -CACHE_T:, :, :] - x = layer(x, feat_cache[feat_idx[0]]) - feat_cache[feat_idx[0]] = cache_x - feat_idx[0] += 1 - else: - x = layer(x) - out_chunks.append(x) - return - - layer = self.upsamples[layer_idx] - if isinstance(layer, Resample) and layer.mode == 'upsample3d' and x.shape[2] > 1: - for frame_idx in range(x.shape[2]): - run_up( - layer_idx, - [x[:, :, frame_idx:frame_idx + 1, :, :]], - feat_idx.copy(), - ) - del x - return - - if feat_cache is not None: - x = layer(x, feat_cache, feat_idx) - else: - x = layer(x) - - next_x_ref = [x] - del x - run_up(layer_idx + 1, next_x_ref, feat_idx) - - run_up(0, [x], feat_idx) + self.run_up(0, [x], feat_cache, feat_idx, out_chunks) return out_chunks diff --git a/comfy/model_base.py b/comfy/model_base.py index d9d5a9293..43ec93324 100644 --- a/comfy/model_base.py +++ b/comfy/model_base.py @@ -21,6 +21,7 @@ import comfy.ldm.hunyuan3dv2_1.hunyuandit import torch import logging import comfy.ldm.lightricks.av_model +import comfy.context_windows from comfy.ldm.modules.diffusionmodules.openaimodel import UNetModel, Timestep from comfy.ldm.cascade.stage_c import StageC from comfy.ldm.cascade.stage_b import StageB @@ -285,6 +286,12 @@ class BaseModel(torch.nn.Module): return data return None + def resize_cond_for_context_window(self, cond_key, cond_value, window, x_in, device, retain_index_list=[]): + """Override in subclasses to handle model-specific cond slicing for context windows. + Return a sliced cond object, or None to fall through to default handling. + Use comfy.context_windows.slice_cond() for common cases.""" + return None + def extra_conds(self, **kwargs): out = {} concat_cond = self.concat_cond(**kwargs) @@ -1375,6 +1382,11 @@ class WAN21_Vace(WAN21): out['vace_strength'] = comfy.conds.CONDConstant(vace_strength) return out + def resize_cond_for_context_window(self, cond_key, cond_value, window, x_in, device, retain_index_list=[]): + if cond_key == "vace_context": + return comfy.context_windows.slice_cond(cond_value, window, x_in, device, temporal_dim=3, retain_index_list=retain_index_list) + return super().resize_cond_for_context_window(cond_key, cond_value, window, x_in, device, retain_index_list=retain_index_list) + class WAN21_Camera(WAN21): def __init__(self, model_config, model_type=ModelType.FLOW, image_to_video=False, device=None): super(WAN21, self).__init__(model_config, model_type, device=device, unet_model=comfy.ldm.wan.model.CameraWanModel) @@ -1427,6 +1439,11 @@ class WAN21_HuMo(WAN21): return out + def resize_cond_for_context_window(self, cond_key, cond_value, window, x_in, device, retain_index_list=[]): + if cond_key == "audio_embed": + return comfy.context_windows.slice_cond(cond_value, window, x_in, device, temporal_dim=1) + return super().resize_cond_for_context_window(cond_key, cond_value, window, x_in, device, retain_index_list=retain_index_list) + class WAN22_Animate(WAN21): def __init__(self, model_config, model_type=ModelType.FLOW, image_to_video=False, device=None): super(WAN21, self).__init__(model_config, model_type, device=device, unet_model=comfy.ldm.wan.model_animate.AnimateWanModel) @@ -1444,6 +1461,13 @@ class WAN22_Animate(WAN21): out['pose_latents'] = comfy.conds.CONDRegular(self.process_latent_in(pose_latents)) return out + def resize_cond_for_context_window(self, cond_key, cond_value, window, x_in, device, retain_index_list=[]): + if cond_key == "face_pixel_values": + return comfy.context_windows.slice_cond(cond_value, window, x_in, device, temporal_dim=2, temporal_scale=4, temporal_offset=1) + if cond_key == "pose_latents": + return comfy.context_windows.slice_cond(cond_value, window, x_in, device, temporal_dim=2, temporal_offset=1) + return super().resize_cond_for_context_window(cond_key, cond_value, window, x_in, device, retain_index_list=retain_index_list) + class WAN22_S2V(WAN21): def __init__(self, model_config, model_type=ModelType.FLOW, device=None): super(WAN21, self).__init__(model_config, model_type, device=device, unet_model=comfy.ldm.wan.model.WanModel_S2V) @@ -1480,6 +1504,11 @@ class WAN22_S2V(WAN21): out['reference_motion'] = reference_motion.shape return out + def resize_cond_for_context_window(self, cond_key, cond_value, window, x_in, device, retain_index_list=[]): + if cond_key == "audio_embed": + return comfy.context_windows.slice_cond(cond_value, window, x_in, device, temporal_dim=1) + return super().resize_cond_for_context_window(cond_key, cond_value, window, x_in, device, retain_index_list=retain_index_list) + class WAN22(WAN21): def __init__(self, model_config, model_type=ModelType.FLOW, image_to_video=False, device=None): super(WAN21, self).__init__(model_config, model_type, device=device, unet_model=comfy.ldm.wan.model.WanModel) diff --git a/comfy/model_management.py b/comfy/model_management.py index 5f2e6ef67..2c250dacc 100644 --- a/comfy/model_management.py +++ b/comfy/model_management.py @@ -1003,7 +1003,7 @@ def text_encoder_offload_device(): def text_encoder_device(): if args.gpu_only: return get_torch_device() - elif vram_state in (VRAMState.HIGH_VRAM, VRAMState.NORMAL_VRAM, VRAMState.SHARED) or comfy.memory_management.aimdo_enabled: + elif vram_state in (VRAMState.HIGH_VRAM, VRAMState.NORMAL_VRAM) or comfy.memory_management.aimdo_enabled: if should_use_fp16(prioritize_performance=False): return get_torch_device() else: diff --git a/comfy/sd.py b/comfy/sd.py index b5e7c93a9..e207bb0fd 100644 --- a/comfy/sd.py +++ b/comfy/sd.py @@ -978,6 +978,7 @@ class VAE: do_tile = True if do_tile: + comfy.model_management.soft_empty_cache() dims = samples_in.ndim - 2 if dims == 1 or self.extra_1d_channel is not None: pixel_samples = self.decode_tiled_1d(samples_in) @@ -1059,6 +1060,7 @@ class VAE: do_tile = True if do_tile: + comfy.model_management.soft_empty_cache() if self.latent_dim == 3: tile = 256 overlap = tile // 4 diff --git a/comfy_api_nodes/apis/quiver.py b/comfy_api_nodes/apis/quiver.py new file mode 100644 index 000000000..bc8708754 --- /dev/null +++ b/comfy_api_nodes/apis/quiver.py @@ -0,0 +1,43 @@ +from pydantic import BaseModel, Field + + +class QuiverImageObject(BaseModel): + url: str = Field(...) + + +class QuiverTextToSVGRequest(BaseModel): + model: str = Field(default="arrow-preview") + prompt: str = Field(...) + instructions: str | None = Field(default=None) + references: list[QuiverImageObject] | None = Field(default=None, max_length=4) + temperature: float | None = Field(default=None, ge=0, le=2) + top_p: float | None = Field(default=None, ge=0, le=1) + presence_penalty: float | None = Field(default=None, ge=-2, le=2) + + +class QuiverImageToSVGRequest(BaseModel): + model: str = Field(default="arrow-preview") + image: QuiverImageObject = Field(...) + auto_crop: bool | None = Field(default=None) + target_size: int | None = Field(default=None, ge=128, le=4096) + temperature: float | None = Field(default=None, ge=0, le=2) + top_p: float | None = Field(default=None, ge=0, le=1) + presence_penalty: float | None = Field(default=None, ge=-2, le=2) + + +class QuiverSVGResponseItem(BaseModel): + svg: str = Field(...) + mime_type: str | None = Field(default="image/svg+xml") + + +class QuiverSVGUsage(BaseModel): + total_tokens: int | None = Field(default=None) + input_tokens: int | None = Field(default=None) + output_tokens: int | None = Field(default=None) + + +class QuiverSVGResponse(BaseModel): + id: str | None = Field(default=None) + created: int | None = Field(default=None) + data: list[QuiverSVGResponseItem] = Field(...) + usage: QuiverSVGUsage | None = Field(default=None) diff --git a/comfy_api_nodes/nodes_bytedance.py b/comfy_api_nodes/nodes_bytedance.py index 6dbd5984e..de0c22e70 100644 --- a/comfy_api_nodes/nodes_bytedance.py +++ b/comfy_api_nodes/nodes_bytedance.py @@ -47,6 +47,10 @@ SEEDREAM_MODELS = { BYTEPLUS_TASK_ENDPOINT = "/proxy/byteplus/api/v3/contents/generations/tasks" BYTEPLUS_TASK_STATUS_ENDPOINT = "/proxy/byteplus/api/v3/contents/generations/tasks" # + /{task_id} +DEPRECATED_MODELS = {"seedance-1-0-lite-t2v-250428", "seedance-1-0-lite-i2v-250428"} + +logger = logging.getLogger(__name__) + def get_image_url_from_response(response: ImageTaskCreationResponse) -> str: if response.error: @@ -135,6 +139,7 @@ class ByteDanceImageNode(IO.ComfyNode): price_badge=IO.PriceBadge( expr="""{"type":"usd","usd":0.03}""", ), + is_deprecated=True, ) @classmethod @@ -942,7 +947,7 @@ class ByteDanceImageReferenceNode(IO.ComfyNode): ] return await process_video_task( cls, - payload=Image2VideoTaskCreationRequest(model=model, content=x), + payload=Image2VideoTaskCreationRequest(model=model, content=x, generate_audio=None), estimated_duration=max(1, math.ceil(VIDEO_TASKS_EXECUTION_TIME[model][resolution] * (duration / 10.0))), ) @@ -952,6 +957,12 @@ async def process_video_task( payload: Text2VideoTaskCreationRequest | Image2VideoTaskCreationRequest, estimated_duration: int | None, ) -> IO.NodeOutput: + if payload.model in DEPRECATED_MODELS: + logger.warning( + "Model '%s' is deprecated and will be deactivated on May 13, 2026. " + "Please switch to a newer model. Recommended: seedance-1-0-pro-fast-251015.", + payload.model, + ) initial_response = await sync_op( cls, ApiEndpoint(path=BYTEPLUS_TASK_ENDPOINT, method="POST"), diff --git a/comfy_api_nodes/nodes_quiver.py b/comfy_api_nodes/nodes_quiver.py new file mode 100644 index 000000000..61533263f --- /dev/null +++ b/comfy_api_nodes/nodes_quiver.py @@ -0,0 +1,291 @@ +from io import BytesIO + +from typing_extensions import override + +from comfy_api.latest import IO, ComfyExtension +from comfy_api_nodes.apis.quiver import ( + QuiverImageObject, + QuiverImageToSVGRequest, + QuiverSVGResponse, + QuiverTextToSVGRequest, +) +from comfy_api_nodes.util import ( + ApiEndpoint, + sync_op, + upload_image_to_comfyapi, + validate_string, +) +from comfy_extras.nodes_images import SVG + + +class QuiverTextToSVGNode(IO.ComfyNode): + @classmethod + def define_schema(cls): + return IO.Schema( + node_id="QuiverTextToSVGNode", + display_name="Quiver Text to SVG", + category="api node/image/Quiver", + description="Generate an SVG from a text prompt using Quiver AI.", + inputs=[ + IO.String.Input( + "prompt", + multiline=True, + default="", + tooltip="Text description of the desired SVG output.", + ), + IO.String.Input( + "instructions", + multiline=True, + default="", + tooltip="Additional style or formatting guidance.", + optional=True, + ), + IO.Autogrow.Input( + "reference_images", + template=IO.Autogrow.TemplatePrefix( + IO.Image.Input("image"), + prefix="ref_", + min=0, + max=4, + ), + tooltip="Up to 4 reference images to guide the generation.", + optional=True, + ), + IO.DynamicCombo.Input( + "model", + options=[ + IO.DynamicCombo.Option( + "arrow-preview", + [ + IO.Float.Input( + "temperature", + default=1.0, + min=0.0, + max=2.0, + step=0.1, + display_mode=IO.NumberDisplay.slider, + tooltip="Randomness control. Higher values increase randomness.", + advanced=True, + ), + IO.Float.Input( + "top_p", + default=1.0, + min=0.05, + max=1.0, + step=0.05, + display_mode=IO.NumberDisplay.slider, + tooltip="Nucleus sampling parameter.", + advanced=True, + ), + IO.Float.Input( + "presence_penalty", + default=0.0, + min=-2.0, + max=2.0, + step=0.1, + display_mode=IO.NumberDisplay.slider, + tooltip="Token presence penalty.", + advanced=True, + ), + ], + ), + ], + tooltip="Model to use for SVG generation.", + ), + IO.Int.Input( + "seed", + default=0, + min=0, + max=2147483647, + control_after_generate=True, + tooltip="Seed to determine if node should re-run; " + "actual results are nondeterministic regardless of seed.", + ), + ], + outputs=[ + IO.SVG.Output(), + ], + hidden=[ + IO.Hidden.auth_token_comfy_org, + IO.Hidden.api_key_comfy_org, + IO.Hidden.unique_id, + ], + is_api_node=True, + price_badge=IO.PriceBadge( + expr="""{"type":"usd","usd":0.429}""", + ), + ) + + @classmethod + async def execute( + cls, + prompt: str, + model: dict, + seed: int, + instructions: str = None, + reference_images: IO.Autogrow.Type = None, + ) -> IO.NodeOutput: + validate_string(prompt, strip_whitespace=False, min_length=1) + + references = None + if reference_images: + references = [] + for key in reference_images: + url = await upload_image_to_comfyapi(cls, reference_images[key]) + references.append(QuiverImageObject(url=url)) + if len(references) > 4: + raise ValueError("Maximum 4 reference images are allowed.") + + instructions_val = instructions.strip() if instructions else None + if instructions_val == "": + instructions_val = None + + response = await sync_op( + cls, + ApiEndpoint(path="/proxy/quiver/v1/svgs/generations", method="POST"), + response_model=QuiverSVGResponse, + data=QuiverTextToSVGRequest( + model=model["model"], + prompt=prompt, + instructions=instructions_val, + references=references, + temperature=model.get("temperature"), + top_p=model.get("top_p"), + presence_penalty=model.get("presence_penalty"), + ), + ) + + svg_data = [BytesIO(item.svg.encode("utf-8")) for item in response.data] + return IO.NodeOutput(SVG(svg_data)) + + +class QuiverImageToSVGNode(IO.ComfyNode): + @classmethod + def define_schema(cls): + return IO.Schema( + node_id="QuiverImageToSVGNode", + display_name="Quiver Image to SVG", + category="api node/image/Quiver", + description="Vectorize a raster image into SVG using Quiver AI.", + inputs=[ + IO.Image.Input( + "image", + tooltip="Input image to vectorize.", + ), + IO.Boolean.Input( + "auto_crop", + default=False, + tooltip="Automatically crop to the dominant subject.", + ), + IO.DynamicCombo.Input( + "model", + options=[ + IO.DynamicCombo.Option( + "arrow-preview", + [ + IO.Int.Input( + "target_size", + default=1024, + min=128, + max=4096, + tooltip="Square resize target in pixels.", + ), + IO.Float.Input( + "temperature", + default=1.0, + min=0.0, + max=2.0, + step=0.1, + display_mode=IO.NumberDisplay.slider, + tooltip="Randomness control. Higher values increase randomness.", + advanced=True, + ), + IO.Float.Input( + "top_p", + default=1.0, + min=0.05, + max=1.0, + step=0.05, + display_mode=IO.NumberDisplay.slider, + tooltip="Nucleus sampling parameter.", + advanced=True, + ), + IO.Float.Input( + "presence_penalty", + default=0.0, + min=-2.0, + max=2.0, + step=0.1, + display_mode=IO.NumberDisplay.slider, + tooltip="Token presence penalty.", + advanced=True, + ), + ], + ), + ], + tooltip="Model to use for SVG vectorization.", + ), + IO.Int.Input( + "seed", + default=0, + min=0, + max=2147483647, + control_after_generate=True, + tooltip="Seed to determine if node should re-run; " + "actual results are nondeterministic regardless of seed.", + ), + ], + outputs=[ + IO.SVG.Output(), + ], + hidden=[ + IO.Hidden.auth_token_comfy_org, + IO.Hidden.api_key_comfy_org, + IO.Hidden.unique_id, + ], + is_api_node=True, + price_badge=IO.PriceBadge( + expr="""{"type":"usd","usd":0.429}""", + ), + ) + + @classmethod + async def execute( + cls, + image, + auto_crop: bool, + model: dict, + seed: int, + ) -> IO.NodeOutput: + image_url = await upload_image_to_comfyapi(cls, image) + + response = await sync_op( + cls, + ApiEndpoint(path="/proxy/quiver/v1/svgs/vectorizations", method="POST"), + response_model=QuiverSVGResponse, + data=QuiverImageToSVGRequest( + model=model["model"], + image=QuiverImageObject(url=image_url), + auto_crop=auto_crop if auto_crop else None, + target_size=model.get("target_size"), + temperature=model.get("temperature"), + top_p=model.get("top_p"), + presence_penalty=model.get("presence_penalty"), + ), + ) + + svg_data = [BytesIO(item.svg.encode("utf-8")) for item in response.data] + return IO.NodeOutput(SVG(svg_data)) + + +class QuiverExtension(ComfyExtension): + @override + async def get_node_list(self) -> list[type[IO.ComfyNode]]: + return [ + QuiverTextToSVGNode, + QuiverImageToSVGNode, + ] + + +async def comfy_entrypoint() -> QuiverExtension: + return QuiverExtension() diff --git a/comfy_extras/nodes_canny.py b/comfy_extras/nodes_canny.py index 5e7c4eabb..648b4279d 100644 --- a/comfy_extras/nodes_canny.py +++ b/comfy_extras/nodes_canny.py @@ -3,6 +3,7 @@ from typing_extensions import override import comfy.model_management from comfy_api.latest import ComfyExtension, io +import torch class Canny(io.ComfyNode): @@ -29,8 +30,8 @@ class Canny(io.ComfyNode): @classmethod def execute(cls, image, low_threshold, high_threshold) -> io.NodeOutput: - output = canny(image.to(comfy.model_management.get_torch_device()).movedim(-1, 1), low_threshold, high_threshold) - img_out = output[1].to(comfy.model_management.intermediate_device()).repeat(1, 3, 1, 1).movedim(1, -1) + output = canny(image.to(device=comfy.model_management.get_torch_device(), dtype=torch.float32).movedim(-1, 1), low_threshold, high_threshold) + img_out = output[1].to(device=comfy.model_management.intermediate_device(), dtype=comfy.model_management.intermediate_dtype()).repeat(1, 3, 1, 1).movedim(1, -1) return io.NodeOutput(img_out) diff --git a/comfy_extras/nodes_context_windows.py b/comfy_extras/nodes_context_windows.py index 93a5204e1..0e43f2e44 100644 --- a/comfy_extras/nodes_context_windows.py +++ b/comfy_extras/nodes_context_windows.py @@ -27,8 +27,8 @@ class ContextWindowsManualNode(io.ComfyNode): io.Combo.Input("fuse_method", options=comfy.context_windows.ContextFuseMethods.LIST_STATIC, default=comfy.context_windows.ContextFuseMethods.PYRAMID, tooltip="The method to use to fuse the context windows."), io.Int.Input("dim", min=0, max=5, default=0, tooltip="The dimension to apply the context windows to."), io.Boolean.Input("freenoise", default=False, tooltip="Whether to apply FreeNoise noise shuffling, improves window blending."), - #io.String.Input("cond_retain_index_list", default="", tooltip="List of latent indices to retain in the conditioning tensors for each window, for example setting this to '0' will use the initial start image for each window."), - #io.Boolean.Input("split_conds_to_windows", default=False, tooltip="Whether to split multiple conditionings (created by ConditionCombine) to each window based on region index."), + io.String.Input("cond_retain_index_list", default="", tooltip="List of latent indices to retain in the conditioning tensors for each window, for example setting this to '0' will use the initial start image for each window."), + io.Boolean.Input("split_conds_to_windows", default=False, tooltip="Whether to split multiple conditionings (created by ConditionCombine) to each window based on region index."), ], outputs=[ io.Model.Output(tooltip="The model with context windows applied during sampling."), diff --git a/comfyui_version.py b/comfyui_version.py index 701f4d66a..a3b7204dc 100644 --- a/comfyui_version.py +++ b/comfyui_version.py @@ -1,3 +1,3 @@ # This file is automatically generated by the build process when version is # updated in pyproject.toml. -__version__ = "0.17.0" +__version__ = "0.18.0" diff --git a/nodes.py b/nodes.py index e93fa9767..2c4650a20 100644 --- a/nodes.py +++ b/nodes.py @@ -1966,9 +1966,11 @@ class EmptyImage: CATEGORY = "image" def generate(self, width, height, batch_size=1, color=0): - r = torch.full([batch_size, height, width, 1], ((color >> 16) & 0xFF) / 0xFF) - g = torch.full([batch_size, height, width, 1], ((color >> 8) & 0xFF) / 0xFF) - b = torch.full([batch_size, height, width, 1], ((color) & 0xFF) / 0xFF) + dtype = comfy.model_management.intermediate_dtype() + device = comfy.model_management.intermediate_device() + r = torch.full([batch_size, height, width, 1], ((color >> 16) & 0xFF) / 0xFF, device=device, dtype=dtype) + g = torch.full([batch_size, height, width, 1], ((color >> 8) & 0xFF) / 0xFF, device=device, dtype=dtype) + b = torch.full([batch_size, height, width, 1], ((color) & 0xFF) / 0xFF, device=device, dtype=dtype) return (torch.cat((r, g, b), dim=-1), ) class ImagePadForOutpaint: diff --git a/pyproject.toml b/pyproject.toml index e2ca79be7..6db9b1267 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "ComfyUI" -version = "0.17.0" +version = "0.18.0" readme = "README.md" license = { file = "LICENSE" } requires-python = ">=3.10"