Merge b65c1b1580 into 85fc35e8fa

Fix mac issue. (#12250 )
llama: cast logits as a comfy-weight (#12248 )
2026-02-06 11:32:31 +08:00 · 2026-02-03 09:20:01 -08:00 · 2026-02-03 12:19:39 -05:00 · 2026-02-03 11:31:36 -05:00 · 2026-02-03 11:07:04 -05:00 · 2026-01-30 12:14:28 +09:00
4 changed files with 64 additions and 8 deletions
--- a/app/model_manager.py
+++ b/app/model_manager.py
@ -5,8 +5,10 @@ import base64
 import json
 import time
 import logging
+import requests
 import folder_paths
 import glob
+from tqdm.auto import tqdm
 import comfy.utils
 from aiohttp import web
 from PIL import Image
@ -15,8 +17,9 @@ from folder_paths import map_legacy, filter_files_extensions, filter_files_conte


 class ModelFileManager:
-    def __init__(self) -> None:
+    def __init__(self, is_download_model_enabled: lambda: bool= lambda: False) -> None:
        self.cache: dict[str, tuple[list[dict], dict[str, float], float]] = {}
+        self.is_download_model_enabled = is_download_model_enabled

    def get_cache(self, key: str, default=None) -> tuple[list[dict], dict[str, float], float] | None:
        return self.cache.get(key, default)
@ -76,6 +79,45 @@ class ModelFileManager:
            except:
                return web.Response(status=404)

+        @routes.post("/download_model")
+        async def post_download_model(request):
+            if not self.is_download_model_enabled():
+                logging.error("Download Model endpoint is disabled")
+                return web.Response(status=403)
+            json_data = await request.json()
+            url = json_data.get("url", None)
+            if url is None:
+                logging.error("URL is not provided")
+                return web.Response(status=401)
+            save_dir = json_data.get("save_dir", None)
+            if save_dir not in folder_paths.folder_names_and_paths:
+                logging.error("Save directory is not valid")
+                return web.Response(status=401)
+            filename = json_data.get("filename", url.split("/")[-1])
+            token = json_data.get("token", None)
+
+            save_path = os.path.join(folder_paths.folder_names_and_paths[save_dir][0][0], filename)
+            tmp_path = save_path + ".tmp"
+            headers = {"Authorization": f"Bearer {token}"} if token else {}
+            try:
+                with requests.get(url, headers=headers,stream=True,timeout=10) as r:
+                    r.raise_for_status()
+                    total_size = int(r.headers.get('content-length', 0))
+                    with open(tmp_path, "wb") as f:
+                        with tqdm(total=total_size, unit='iB', unit_scale=True, desc=filename) as pbar:
+                            for chunk in r.iter_content(chunk_size=1024*1024):
+                                if not chunk:
+                                    break
+                                size = f.write(chunk)
+                                pbar.update(size)
+                os.rename(tmp_path, save_path)
+                return web.Response(status=200)
+            except Exception as e:
+                logging.error(f"Failed to download model: {e}")
+                if os.path.exists(tmp_path):
+                    os.remove(tmp_path)
+                return web.Response(status=500)
+
    def get_model_file_list(self, folder_name: str):
        folder_name = map_legacy(folder_name)
        folders = folder_paths.folder_names_and_paths[folder_name]
--- a/comfy/text_encoders/ace15.py
+++ b/comfy/text_encoders/ace15.py
@ -57,8 +57,9 @@ def sample_manual_loop_no_classes(
        if eos_token_id is not None and eos_token_id < audio_start_id and min_tokens < step:
            eos_score = cfg_logits[:, eos_token_id].clone()

+        remove_logit_value = torch.finfo(cfg_logits.dtype).min
        # Only generate audio tokens
-        cfg_logits[:, :audio_start_id] = float('-inf')
+        cfg_logits[:, :audio_start_id] = remove_logit_value

        if eos_token_id is not None and eos_token_id < audio_start_id and min_tokens < step:
            cfg_logits[:, eos_token_id] = eos_score
@ -66,7 +67,7 @@ def sample_manual_loop_no_classes(
        if top_k is not None and top_k > 0:
            top_k_vals, _ = torch.topk(cfg_logits, top_k)
            min_val = top_k_vals[..., -1, None]
-            cfg_logits[cfg_logits < min_val] = float('-inf')
+            cfg_logits[cfg_logits < min_val] = remove_logit_value

        if top_p is not None and top_p < 1.0:
            sorted_logits, sorted_indices = torch.sort(cfg_logits, descending=True)
@ -75,7 +76,7 @@ def sample_manual_loop_no_classes(
            sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[..., :-1].clone()
            sorted_indices_to_remove[..., 0] = 0
            indices_to_remove = sorted_indices_to_remove.scatter(1, sorted_indices, sorted_indices_to_remove)
-            cfg_logits[indices_to_remove] = float('-inf')
+            cfg_logits[indices_to_remove] = remove_logit_value

        if temperature > 0:
            cfg_logits = cfg_logits / temperature
--- a/comfy/text_encoders/llama.py
+++ b/comfy/text_encoders/llama.py
@ -6,6 +6,7 @@ import math

 from comfy.ldm.modules.attention import optimized_attention_for_device
 import comfy.model_management
+import comfy.ops
 import comfy.ldm.common_dit
 import comfy.clip_model

@ -627,10 +628,10 @@ class Llama2_(nn.Module):
        mask = None
        if attention_mask is not None:
            mask = 1.0 - attention_mask.to(x.dtype).reshape((attention_mask.shape[0], 1, -1, attention_mask.shape[-1])).expand(attention_mask.shape[0], 1, seq_len, attention_mask.shape[-1])
-            mask = mask.masked_fill(mask.to(torch.bool), float("-inf"))
+            mask = mask.masked_fill(mask.to(torch.bool), torch.finfo(x.dtype).min)

        if seq_len > 1:
-            causal_mask = torch.empty(past_len + seq_len, past_len + seq_len, dtype=x.dtype, device=x.device).fill_(float("-inf")).triu_(1)
+            causal_mask = torch.empty(past_len + seq_len, past_len + seq_len, dtype=x.dtype, device=x.device).fill_(torch.finfo(x.dtype).min).triu_(1)
            if mask is not None:
                mask += causal_mask
            else:
@ -794,7 +795,19 @@ class Qwen3_2B_ACE15_lm(BaseLlama, torch.nn.Module):
        self.dtype = dtype

    def logits(self, x):
-        return torch.nn.functional.linear(x[:, -1:], self.model.embed_tokens.weight.to(x), None)
+        input = x[:, -1:]
+        module = self.model.embed_tokens
+
+        offload_stream = None
+        if module.comfy_cast_weights:
+            weight, _, offload_stream = comfy.ops.cast_bias_weight(module, input, offloadable=True)
+        else:
+            weight = self.model.embed_tokens.weight.to(x)
+
+        x = torch.nn.functional.linear(input, weight, None)
+
+        comfy.ops.uncast_bias_weight(module, weight, None, offload_stream)
+        return x

 class Qwen3_4B(BaseLlama, torch.nn.Module):
    def __init__(self, config_dict, dtype, device, operations):
--- a/server.py
+++ b/server.py
@ -201,7 +201,7 @@ class PromptServer():
        mimetypes.add_type('image/webp', '.webp')

        self.user_manager = UserManager()
-        self.model_file_manager = ModelFileManager()
+        self.model_file_manager = ModelFileManager(is_download_model_enabled=lambda: self.user_manager.settings.get_settings(None).get("Comfy.ModelDownloadEnabled", False))
        self.custom_node_manager = CustomNodeManager()
        self.subgraph_manager = SubgraphManager()
        self.internal_routes = InternalRoutes(self)
Author	SHA1	Message	Date
yt-koike	14a9e27551	Merge `b65c1b1580` into `85fc35e8fa`	2026-02-03 09:20:01 -08:00
comfyanonymous	85fc35e8fa	Fix mac issue. (#12250 ) Some checks are pending Python Linting / Run Ruff (push) Waiting to run Details Python Linting / Run Pylint (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.10, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.11, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.12, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-unix-nightly (12.1, , linux, 3.11, [self-hosted Linux], nightly) (push) Waiting to run Details Execution Tests / test (macos-latest) (push) Waiting to run Details Execution Tests / test (ubuntu-latest) (push) Waiting to run Details Execution Tests / test (windows-latest) (push) Waiting to run Details Test server launches without errors / test (push) Waiting to run Details Unit Tests / test (macos-latest) (push) Waiting to run Details Unit Tests / test (ubuntu-latest) (push) Waiting to run Details Unit Tests / test (windows-2022) (push) Waiting to run Details	2026-02-03 12:19:39 -05:00
comfyanonymous	223364743c	llama: cast logits as a comfy-weight (#12248 ) This is using a different layers weight with .to(). Change it to use the ops caster if the original layer is a comfy weight so that it picks up dynamic_vram and async_offload functionality in full. Co-authored-by: Rattus <rattus128@gmail.com>	2026-02-03 11:31:36 -05:00
comfyanonymous	affe881354	Fix some issues with mac. (#12247 )	2026-02-03 11:07:04 -05:00
yt-koike	b65c1b1580	replace print() to logging.error()	2026-01-30 12:14:28 +09:00
yt-koike	389b3325d1	add model downloader endpoint	2026-01-30 11:41:07 +09:00