Merge b93e924e01 into e35348aa53

2026-05-13 10:42:59 +08:00 · 2026-05-07 11:26:21 +08:00
14 changed files with 68 additions and 404 deletions
--- a/comfy/bg_removal_model.py
+++ b/comfy/bg_removal_model.py
@ -1,78 +0,0 @@
-from .utils import load_torch_file
-import os
-import json
-import torch
-import logging
-
-import comfy.ops
-import comfy.model_patcher
-import comfy.model_management
-import comfy.clip_model
-import comfy.background_removal.birefnet
-
-BG_REMOVAL_MODELS = {
-    "birefnet": comfy.background_removal.birefnet.BiRefNet
-}
-
-class BackgroundRemovalModel():
-    def __init__(self, json_config):
-        with open(json_config) as f:
-            config = json.load(f)
-
-        self.image_size = config.get("image_size", 1024)
-        self.image_mean = config.get("image_mean", [0.0, 0.0, 0.0])
-        self.image_std = config.get("image_std", [1.0, 1.0, 1.0])
-        self.model_type = config.get("model_type", "birefnet")
-        self.config = config.copy()
-        model_class = BG_REMOVAL_MODELS.get(self.model_type)
-
-        self.load_device = comfy.model_management.text_encoder_device()
-        offload_device = comfy.model_management.text_encoder_offload_device()
-        self.dtype = comfy.model_management.text_encoder_dtype(self.load_device)
-        self.model = model_class(config, self.dtype, offload_device, comfy.ops.manual_cast)
-        self.model.eval()
-
-        self.patcher = comfy.model_patcher.CoreModelPatcher(self.model, load_device=self.load_device, offload_device=offload_device)
-
-    def load_sd(self, sd):
-        return self.model.load_state_dict(sd, strict=False, assign=self.patcher.is_dynamic())
-
-    def get_sd(self):
-        return self.model.state_dict()
-
-    def encode_image(self, image):
-        comfy.model_management.load_model_gpu(self.patcher)
-        H, W = image.shape[1], image.shape[2]
-        pixel_values = comfy.clip_model.clip_preprocess(image.to(self.load_device), size=self.image_size, mean=self.image_mean, std=self.image_std, crop=False)
-        out = self.model(pixel_values=pixel_values)
-        out = torch.nn.functional.interpolate(out, size=(H, W), mode="bicubic", antialias=False)
-
-        mask = out.sigmoid()
-        if mask.ndim == 3:
-            mask = mask.unsqueeze(0)
-        if mask.shape[1] != 1:
-            mask = mask.movedim(-1, 1)
-
-        return mask
-
-
-def load_background_removal_model(sd):
-    if "bb.layers.1.blocks.0.attn.relative_position_index" in sd:
-        json_config = os.path.join(os.path.join(os.path.dirname(os.path.realpath(__file__)), "background_removal"), "birefnet.json")
-    else:
-        return None
-
-    bg_model = BackgroundRemovalModel(json_config)
-    m, u = bg_model.load_sd(sd)
-    if len(m) > 0:
-        logging.warning("missing background removal: {}".format(m))
-    u = set(u)
-    keys = list(sd.keys())
-    for k in keys:
-        if k not in u:
-            sd.pop(k)
-    return bg_model
-
-def load(ckpt_path):
-    sd = load_torch_file(ckpt_path)
-    return load_background_removal_model(sd)
--- a/comfy/clip_vision.py
+++ b/comfy/clip_vision.py
@ -1,6 +1,7 @@
 from .utils import load_torch_file, transformers_convert, state_dict_prefix_replace
 import os
 import json
+import torch
 import logging

 import comfy.ops
@ -9,6 +10,7 @@ import comfy.model_management
 import comfy.utils
 import comfy.clip_model
 import comfy.image_encoders.dino2
+import comfy.image_encoders.birefnet

 class Output:
    def __getitem__(self, key):
@ -23,6 +25,7 @@ IMAGE_ENCODERS = {
    "siglip_vision_model": comfy.clip_model.CLIPVisionModelProjection,
    "siglip2_vision_model": comfy.clip_model.CLIPVisionModelProjection,
    "dinov2": comfy.image_encoders.dino2.Dinov2Model,
+    "birefnet": comfy.image_encoders.birefnet.BiRefNet
 }

 class ClipVisionModel():
@ -34,6 +37,7 @@ class ClipVisionModel():
        self.image_mean = config.get("image_mean", [0.48145466, 0.4578275, 0.40821073])
        self.image_std = config.get("image_std", [0.26862954, 0.26130258, 0.27577711])
        self.model_type = config.get("model_type", "clip_vision_model")
+        self.resize_to_original = config.get("resize_to_original", False)
        self.config = config.copy()
        model_class = IMAGE_ENCODERS.get(self.model_type)
        if self.model_type == "siglip_vision_model":
@ -57,11 +61,15 @@ class ClipVisionModel():

    def encode_image(self, image, crop=True):
        comfy.model_management.load_model_gpu(self.patcher)
+        H, W = image.shape[1], image.shape[2]
        if self.model_type == "siglip2_vision_model":
            pixel_values = comfy.clip_model.siglip2_preprocess(image.to(self.load_device), size=self.image_size, patch_size=self.config.get("patch_size", 16), num_patches=self.config.get("num_patches", 256), mean=self.image_mean, std=self.image_std, crop=crop).float()
        else:
            pixel_values = comfy.clip_model.clip_preprocess(image.to(self.load_device), size=self.image_size, mean=self.image_mean, std=self.image_std, crop=crop).float()
        out = self.model(pixel_values=pixel_values, intermediate_output='all' if self.return_all_hidden_states else -2)
+        if self.resize_to_original:
+            resized = torch.nn.functional.interpolate(out[0], size=(H, W), mode="bicubic", antialias=False)
+            out = (resized,) + out[1:]

        outputs = Output()
        outputs["last_hidden_state"] = out[0].to(comfy.model_management.intermediate_device())
@ -129,6 +137,9 @@ def load_clipvision_from_sd(sd, prefix="", convert_keys=False):
        else:
            json_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "clip_vision_config_vitl.json")

+    elif "bb.layers.1.blocks.0.attn.relative_position_index" in sd:
+        json_config = os.path.join(os.path.join(os.path.dirname(os.path.realpath(__file__)), "image_encoders"), "birefnet.json")
+
    # Dinov2
    elif 'encoder.layer.39.layer_scale2.lambda1' in sd:
        json_config = os.path.join(os.path.join(os.path.dirname(os.path.realpath(__file__)), "image_encoders"), "dino2_giant.json")
--- a/comfy/background_removal/birefnet.json
+++ b/comfy/background_removal/birefnet.json
--- a/comfy/background_removal/birefnet.py
+++ b/comfy/background_removal/birefnet.py
@ -674,7 +674,8 @@ class Decoder(nn.Module):
        patches_batch = self.get_patches_batch(x, _p1) if self.split else x
        _p1 = torch.cat((_p1, self.ipt_blk1(F.interpolate(patches_batch, size=x.shape[2:], mode='bilinear', align_corners=True))), 1)
        p1_out = self.conv_out1(_p1)
-        return p1_out
+        fake = torch.empty_like(p1_out)
+        return p1_out, fake, fake, fake


 class SimpleConvs(nn.Module):
--- a/comfy_api/latest/_io.py
+++ b/comfy_api/latest/_io.py
@ -17,7 +17,6 @@ if TYPE_CHECKING:
    from spandrel import ImageModelDescriptor
    from comfy.clip_vision import ClipVisionModel
    from comfy.clip_vision import Output as ClipVisionOutput_
-    from comfy.bg_removal_model import BackgroundRemovalModel
    from comfy.controlnet import ControlNet
    from comfy.hooks import HookGroup, HookKeyframeGroup
    from comfy.model_patcher import ModelPatcher
@ -615,11 +614,6 @@ class Model(ComfyTypeIO):
    if TYPE_CHECKING:
        Type = ModelPatcher

-@comfytype(io_type="BACKGROUND_REMOVAL")
-class BackgroundRemoval(ComfyTypeIO):
-    if TYPE_CHECKING:
-        Type = BackgroundRemovalModel
-
@comfytype(io_type="CLIP_VISION")
 class ClipVision(ComfyTypeIO):
    if TYPE_CHECKING:
@ -2263,7 +2257,6 @@ __all__ = [
    "ModelPatch",
    "ClipVision",
    "ClipVisionOutput",
-    "BackgroundRemoval",
    "AudioEncoder",
    "AudioEncoderOutput",
    "StyleModel",
--- a/comfy_api_nodes/nodes_gemini.py
+++ b/comfy_api_nodes/nodes_gemini.py
@ -83,16 +83,13 @@ class GeminiImageModel(str, Enum):

 async def create_image_parts(
    cls: type[IO.ComfyNode],
-    images: Input.Image | list[Input.Image],
+    images: Input.Image,
    image_limit: int = 0,
 ) -> list[GeminiPart]:
    image_parts: list[GeminiPart] = []
    if image_limit < 0:
        raise ValueError("image_limit must be greater than or equal to 0 when creating Gemini image parts.")
-
-    # Accept either a single (possibly-batched) tensor or a list of them; share URL budget across all.
-    images_list: list[Input.Image] = images if isinstance(images, list) else [images]
-    total_images = sum(get_number_of_images(img) for img in images_list)
+    total_images = get_number_of_images(images)
    if total_images <= 0:
        raise ValueError("No images provided to create_image_parts; at least one image is required.")

@ -101,18 +98,10 @@ async def create_image_parts(

    # Number of images we'll send as URLs (fileData)
    num_url_images = min(effective_max, 10)  # Vertex API max number of image links
-    upload_kwargs: dict = {"wait_label": "Uploading reference images"}
-    if effective_max > num_url_images:
-        # Split path (e.g. 11+ images): suppress per-image counter to avoid a confusing dual-fraction label.
-        upload_kwargs = {
-            "wait_label": f"Uploading reference images ({num_url_images}+)",
-            "show_batch_index": False,
-        }
    reference_images_urls = await upload_images_to_comfyapi(
        cls,
-        images_list,
+        images,
        max_images=num_url_images,
-        **upload_kwargs,
    )
    for reference_image_url in reference_images_urls:
        image_parts.append(
@ -123,22 +112,15 @@ async def create_image_parts(
                )
            )
        )
-    if effective_max > num_url_images:
-        flat: list[torch.Tensor] = []
-        for tensor in images_list:
-            if len(tensor.shape) == 4:
-                flat.extend(tensor[i] for i in range(tensor.shape[0]))
-            else:
-                flat.append(tensor)
-        for idx in range(num_url_images, effective_max):
-            image_parts.append(
-                GeminiPart(
-                    inlineData=GeminiInlineData(
-                        mimeType=GeminiMimeType.image_png,
-                        data=tensor_to_base64_string(flat[idx]),
-                    )
+    for idx in range(num_url_images, effective_max):
+        image_parts.append(
+            GeminiPart(
+                inlineData=GeminiInlineData(
+                    mimeType=GeminiMimeType.image_png,
+                    data=tensor_to_base64_string(images[idx]),
                )
            )
+        )
    return image_parts


@ -909,6 +891,10 @@ class GeminiNanoBanana2(IO.ComfyNode):
                        "9:16",
                        "16:9",
                        "21:9",
+                        # "1:4",
+                        # "4:1",
+                        # "8:1",
+                        # "1:8",
                    ],
                    default="auto",
                    tooltip="If set to 'auto', matches your input image's aspect ratio; "
@ -916,7 +902,12 @@ class GeminiNanoBanana2(IO.ComfyNode):
                ),
                IO.Combo.Input(
                    "resolution",
-                    options=["1K", "2K", "4K"],
+                    options=[
+                        # "512px",
+                        "1K",
+                        "2K",
+                        "4K",
+                    ],
                    tooltip="Target output resolution. For 2K/4K the native Gemini upscaler is used.",
                ),
                IO.Combo.Input(
@ -965,7 +956,6 @@ class GeminiNanoBanana2(IO.ComfyNode):
            ],
            is_api_node=True,
            price_badge=GEMINI_IMAGE_2_PRICE_BADGE,
-            is_deprecated=True,
        )

    @classmethod
@ -1026,197 +1016,6 @@ class GeminiNanoBanana2(IO.ComfyNode):
        )


-def _nano_banana_2_v2_model_inputs():
-    return [
-        IO.Combo.Input(
-            "aspect_ratio",
-            options=[
-                "auto",
-                "1:1",
-                "2:3",
-                "3:2",
-                "3:4",
-                "4:3",
-                "4:5",
-                "5:4",
-                "9:16",
-                "16:9",
-                "21:9",
-                "1:4",
-                "4:1",
-                "8:1",
-                "1:8",
-            ],
-            default="auto",
-            tooltip="If set to 'auto', matches your input image's aspect ratio; "
-            "if no image is provided, a 16:9 square is usually generated.",
-        ),
-        IO.Combo.Input(
-            "resolution",
-            options=["1K", "2K", "4K"],
-            tooltip="Target output resolution. For 2K/4K the native Gemini upscaler is used.",
-        ),
-        IO.Combo.Input(
-            "thinking_level",
-            options=["MINIMAL", "HIGH"],
-        ),
-        IO.Autogrow.Input(
-            "images",
-            template=IO.Autogrow.TemplateNames(
-                IO.Image.Input("image"),
-                names=[f"image_{i}" for i in range(1, 15)],
-                min=0,
-            ),
-            tooltip="Optional reference image(s). Up to 14 images total.",
-        ),
-        IO.Custom("GEMINI_INPUT_FILES").Input(
-            "files",
-            optional=True,
-            tooltip="Optional file(s) to use as context for the model. "
-                    "Accepts inputs from the Gemini Generate Content Input Files node.",
-        ),
-    ]
-
-
-class GeminiNanoBanana2V2(IO.ComfyNode):
-
-    @classmethod
-    def define_schema(cls):
-        return IO.Schema(
-            node_id="GeminiNanoBanana2V2",
-            display_name="Nano Banana 2",
-            category="api node/image/Gemini",
-            description="Generate or edit images synchronously via Google Vertex API.",
-            inputs=[
-                IO.String.Input(
-                    "prompt",
-                    multiline=True,
-                    tooltip="Text prompt describing the image to generate or the edits to apply. "
-                    "Include any constraints, styles, or details the model should follow.",
-                    default="",
-                ),
-                IO.DynamicCombo.Input(
-                    "model",
-                    options=[
-                        IO.DynamicCombo.Option(
-                            "Nano Banana 2 (Gemini 3.1 Flash Image)",
-                            _nano_banana_2_v2_model_inputs(),
-                        ),
-                    ],
-                ),
-                IO.Int.Input(
-                    "seed",
-                    default=42,
-                    min=0,
-                    max=0xFFFFFFFFFFFFFFFF,
-                    control_after_generate=True,
-                    tooltip="When the seed is fixed to a specific value, the model makes a best effort to provide "
-                    "the same response for repeated requests. Deterministic output isn't guaranteed. "
-                    "Also, changing the model or parameter settings, such as the temperature, "
-                    "can cause variations in the response even when you use the same seed value. "
-                    "By default, a random seed value is used.",
-                ),
-                IO.Combo.Input(
-                    "response_modalities",
-                    options=["IMAGE", "IMAGE+TEXT"],
-                    advanced=True,
-                ),
-                IO.String.Input(
-                    "system_prompt",
-                    multiline=True,
-                    default=GEMINI_IMAGE_SYS_PROMPT,
-                    optional=True,
-                    tooltip="Foundational instructions that dictate an AI's behavior.",
-                    advanced=True,
-                ),
-            ],
-            outputs=[
-                IO.Image.Output(),
-                IO.String.Output(),
-                IO.Image.Output(
-                    display_name="thought_image",
-                    tooltip="First image from the model's thinking process. "
-                    "Only available with thinking_level HIGH and IMAGE+TEXT modality.",
-                ),
-            ],
-            hidden=[
-                IO.Hidden.auth_token_comfy_org,
-                IO.Hidden.api_key_comfy_org,
-                IO.Hidden.unique_id,
-            ],
-            is_api_node=True,
-            price_badge=IO.PriceBadge(
-                depends_on=IO.PriceBadgeDepends(widgets=["model", "model.resolution"]),
-                expr="""
-                (
-                  $r := $lookup(widgets, "model.resolution");
-                  $prices := {"1k": 0.0696, "2k": 0.1014, "4k": 0.154};
-                  {"type":"usd","usd": $lookup($prices, $r), "format":{"suffix":"/Image","approximate":true}}
-                )
-                """,
-            ),
-        )
-
-    @classmethod
-    async def execute(
-        cls,
-        prompt: str,
-        model: dict,
-        seed: int,
-        response_modalities: str,
-        system_prompt: str = "",
-    ) -> IO.NodeOutput:
-        validate_string(prompt, strip_whitespace=True, min_length=1)
-        model_choice = model["model"]
-        if model_choice == "Nano Banana 2 (Gemini 3.1 Flash Image)":
-            model_id = "gemini-3.1-flash-image-preview"
-        else:
-            model_id = model_choice
-
-        images = model.get("images") or {}
-        parts: list[GeminiPart] = [GeminiPart(text=prompt)]
-        if images:
-            image_tensors: list[Input.Image] = [t for t in images.values() if t is not None]
-            if image_tensors:
-                if sum(get_number_of_images(t) for t in image_tensors) > 14:
-                    raise ValueError("The current maximum number of supported images is 14.")
-                parts.extend(await create_image_parts(cls, image_tensors))
-        files = model.get("files")
-        if files is not None:
-            parts.extend(files)
-
-        image_config = GeminiImageConfig(imageSize=model["resolution"])
-        if model["aspect_ratio"] != "auto":
-            image_config.aspectRatio = model["aspect_ratio"]
-
-        gemini_system_prompt = None
-        if system_prompt:
-            gemini_system_prompt = GeminiSystemInstructionContent(parts=[GeminiTextPart(text=system_prompt)], role=None)
-
-        response = await sync_op(
-            cls,
-            ApiEndpoint(path=f"/proxy/vertexai/gemini/{model_id}", method="POST"),
-            data=GeminiImageGenerateContentRequest(
-                contents=[
-                    GeminiContent(role=GeminiRole.user, parts=parts),
-                ],
-                generationConfig=GeminiImageGenerationConfig(
-                    responseModalities=(["IMAGE"] if response_modalities == "IMAGE" else ["TEXT", "IMAGE"]),
-                    imageConfig=image_config,
-                    thinkingConfig=GeminiThinkingConfig(thinkingLevel=model["thinking_level"]),
-                ),
-                systemInstruction=gemini_system_prompt,
-            ),
-            response_model=GeminiGenerateContentResponse,
-            price_extractor=calculate_tokens_price,
-        )
-        return IO.NodeOutput(
-            await get_image_from_response(response),
-            get_text_from_response(response),
-            await get_image_from_response(response, thought=True),
-        )
-
-
 class GeminiExtension(ComfyExtension):
    @override
    async def get_node_list(self) -> list[type[IO.ComfyNode]]:
@ -1225,7 +1024,6 @@ class GeminiExtension(ComfyExtension):
            GeminiImage,
            GeminiImage2,
            GeminiNanoBanana2,
-            GeminiNanoBanana2V2,
            GeminiInputFiles,
        ]

--- a/comfy_api_nodes/nodes_grok.py
+++ b/comfy_api_nodes/nodes_grok.py
@ -54,12 +54,7 @@ class GrokImageNode(IO.ComfyNode):
            inputs=[
                IO.Combo.Input(
                    "model",
-                    options=[
-                        "grok-imagine-image-quality",
-                        "grok-imagine-image-pro",
-                        "grok-imagine-image",
-                        "grok-imagine-image-beta",
-                    ],
+                    options=["grok-imagine-image-pro", "grok-imagine-image", "grok-imagine-image-beta"],
                ),
                IO.String.Input(
                    "prompt",
@ -116,12 +111,10 @@ class GrokImageNode(IO.ComfyNode):
            ],
            is_api_node=True,
            price_badge=IO.PriceBadge(
-                depends_on=IO.PriceBadgeDepends(widgets=["model", "number_of_images", "resolution"]),
+                depends_on=IO.PriceBadgeDepends(widgets=["model", "number_of_images"]),
                expr="""
                (
-                  $rate := widgets.model = "grok-imagine-image-quality"
-                    ? (widgets.resolution = "1k" ? 0.05 : 0.07)
-                    : ($contains(widgets.model, "pro") ? 0.07 : 0.02);
+                  $rate := $contains(widgets.model, "pro") ? 0.07 : 0.02;
                  {"type":"usd","usd": $rate * widgets.number_of_images}
                )
                """,
@ -174,12 +167,7 @@ class GrokImageEditNode(IO.ComfyNode):
            inputs=[
                IO.Combo.Input(
                    "model",
-                    options=[
-                        "grok-imagine-image-quality",
-                        "grok-imagine-image-pro",
-                        "grok-imagine-image",
-                        "grok-imagine-image-beta",
-                    ],
+                    options=["grok-imagine-image-pro", "grok-imagine-image", "grok-imagine-image-beta"],
                ),
                IO.Image.Input("image", display_name="images"),
                IO.String.Input(
@ -240,19 +228,11 @@ class GrokImageEditNode(IO.ComfyNode):
            ],
            is_api_node=True,
            price_badge=IO.PriceBadge(
-                depends_on=IO.PriceBadgeDepends(widgets=["model", "number_of_images", "resolution"]),
+                depends_on=IO.PriceBadgeDepends(widgets=["model", "number_of_images"]),
                expr="""
                (
-                  $isQualityModel := widgets.model = "grok-imagine-image-quality";
-                  $isPro := $contains(widgets.model, "pro");
-                  $rate := $isQualityModel
-                    ? (widgets.resolution = "1k" ? 0.05 : 0.07)
-                    : ($isPro ? 0.07 : 0.02);
-                  $base := $isQualityModel ? 0.01 : 0.002;
-                  $output := $rate * widgets.number_of_images;
-                  $isPro
-                    ? {"type":"usd","usd": $base + $output}
-                    : {"type":"range_usd","min_usd": $base + $output, "max_usd": 3 * $base + $output}
+                  $rate := $contains(widgets.model, "pro") ? 0.07 : 0.02;
+                  {"type":"usd","usd": 0.002 + $rate * widgets.number_of_images}
                )
                """,
            ),
--- a/comfy_api_nodes/nodes_kling.py
+++ b/comfy_api_nodes/nodes_kling.py
@ -2787,15 +2787,11 @@ class MotionControl(IO.ComfyNode):
            ],
            is_api_node=True,
            price_badge=IO.PriceBadge(
-                depends_on=IO.PriceBadgeDepends(widgets=["mode", "model"]),
+                depends_on=IO.PriceBadgeDepends(widgets=["mode"]),
                expr="""
                (
-                  $prices := {
-                    "kling-v3": {"std": 0.126, "pro": 0.168},
-                    "kling-v2-6": {"std": 0.07, "pro": 0.112}
-                  };
-                  $modelPrices := $lookup($prices, widgets.model);
-                  {"type":"usd","usd": $lookup($modelPrices, widgets.mode), "format":{"suffix":"/second"}}
+                  $prices := {"std": 0.07, "pro": 0.112};
+                  {"type":"usd","usd": $lookup($prices, widgets.mode), "format":{"suffix":"/second"}}
                )
                """,
            ),
--- a/comfy_extras/nodes_bg_removal.py
+++ b/comfy_extras/nodes_bg_removal.py
@ -1,58 +0,0 @@
-import folder_paths
-from typing_extensions import override
-from comfy_api.latest import ComfyExtension, IO
-from comfy.bg_removal_model import load
-
-
-class LoadBackGroundRemovalModel(IO.ComfyNode):
-    @classmethod
-    def define_schema(cls):
-        files = folder_paths.get_filename_list("background_removal")
-        return IO.Schema(
-            node_id="LoadBackGroundRemovalModel",
-            category="loaders",
-            inputs=[
-                IO.Combo.Input("background_removal_name", options=sorted(files)),
-            ],
-            outputs=[
-                IO.BackgroundRemoval.Output("bg_model")
-            ]
-        )
-    @classmethod
-    def execute(cls, background_removal_name):
-        path = folder_paths.get_full_path_or_raise("background_removal", background_removal_name)
-        bg = load(path)
-        if bg is None:
-            raise RuntimeError("ERROR: clip vision file is invalid and does not contain a valid vision model.")
-        return IO.NodeOutput(bg)
-
-class RemoveBackGround(IO.ComfyNode):
-    @classmethod
-    def define_schema(cls):
-        return IO.Schema(
-            node_id="RemoveBackGround",
-            category="encode",
-            inputs=[
-                IO.Image.Input("image"),
-                IO.BackgroundRemoval.Input("bg_removal_model")
-            ],
-            outputs=[
-                IO.Mask.Output("mask")
-            ]
-        )
-    @classmethod
-    def execute(cls, image, bg_removal_model):
-        mask = bg_removal_model.encode_image(image)
-        return IO.NodeOutput(mask)
-
-class BackGroundRemovalExtension(ComfyExtension):
-    @override
-    async def get_node_list(self) -> list[type[IO.ComfyNode]]:
-        return [
-            LoadBackGroundRemovalModel,
-            RemoveBackGround
-        ]
-
-
-async def comfy_entrypoint() -> BackGroundRemovalExtension:
-    return BackGroundRemovalExtension()
--- a/comfy_extras/nodes_mask.py
+++ b/comfy_extras/nodes_mask.py
@ -397,6 +397,29 @@ class GrowMask(IO.ComfyNode):

    expand_mask = execute  # TODO: remove

+class ClipVisionToMask(IO.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return IO.Schema(
+            node_id="ClipVisionToMask",
+            inputs = [
+                IO.ClipVisionOutput.Input("clip_vision_output")
+            ],
+            outputs = [IO.Mask.Output("mask")]
+        )
+    @classmethod
+    def execute(cls, clip_vision_output):
+        if not isinstance(clip_vision_output, torch.Tensor):
+            mask = clip_vision_output["last_hidden_state"]
+        mask = mask.sigmoid()
+        if mask.ndim == 3:
+            mask = mask.unsqueeze(0)
+        if mask.shape[1] != 1:
+            mask = mask.movedim(-1, 1)
+        return IO.NodeOutput(mask)
+
+    clip_vision_to_mask = execute
+
 class ThresholdMask(IO.ComfyNode):
    @classmethod
    def define_schema(cls):
@ -460,6 +483,7 @@ class MaskExtension(ComfyExtension):
            GrowMask,
            ThresholdMask,
            MaskPreview,
+            ClipVisionToMask
        ]


--- a/folder_paths.py
+++ b/folder_paths.py
@ -52,8 +52,6 @@ folder_names_and_paths["model_patches"] = ([os.path.join(models_dir, "model_patc

 folder_names_and_paths["audio_encoders"] = ([os.path.join(models_dir, "audio_encoders")], supported_pt_extensions)

-folder_names_and_paths["background_removal"] = ([os.path.join(models_dir, "background_removal")], supported_pt_extensions)
-
 folder_names_and_paths["frame_interpolation"] = ([os.path.join(models_dir, "frame_interpolation")], supported_pt_extensions)

 folder_names_and_paths["optical_flow"] = ([os.path.join(models_dir, "optical_flow")], supported_pt_extensions)
--- a/models/background_removal/put_background_removal_models_here
+++ b/models/background_removal/put_background_removal_models_here
--- a/nodes.py
+++ b/nodes.py
@ -2427,7 +2427,6 @@ async def init_builtin_extra_nodes():
        "nodes_number_convert.py",
        "nodes_painter.py",
        "nodes_curve.py",
-        "nodes_bg_removal.py",
        "nodes_rtdetr.py",
        "nodes_frame_interpolation.py",
        "nodes_sam3.py",
--- a/requirements.txt
+++ b/requirements.txt
@ -1,5 +1,5 @@
 comfyui-frontend-package==1.43.17
-comfyui-workflow-templates==0.9.72
+comfyui-workflow-templates==0.9.69
 comfyui-embedded-docs==0.4.4
 torch
 torchsde