Compare commits

...

6 Commits

Author SHA1 Message Date
yt-koike
14a9e27551
Merge b65c1b1580 into 85fc35e8fa 2026-02-03 09:20:01 -08:00
comfyanonymous
85fc35e8fa
Fix mac issue. (#12250)
Some checks are pending
Python Linting / Run Ruff (push) Waiting to run
Python Linting / Run Pylint (push) Waiting to run
Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.10, [self-hosted Linux], stable) (push) Waiting to run
Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.11, [self-hosted Linux], stable) (push) Waiting to run
Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.12, [self-hosted Linux], stable) (push) Waiting to run
Full Comfy CI Workflow Runs / test-unix-nightly (12.1, , linux, 3.11, [self-hosted Linux], nightly) (push) Waiting to run
Execution Tests / test (macos-latest) (push) Waiting to run
Execution Tests / test (ubuntu-latest) (push) Waiting to run
Execution Tests / test (windows-latest) (push) Waiting to run
Test server launches without errors / test (push) Waiting to run
Unit Tests / test (macos-latest) (push) Waiting to run
Unit Tests / test (ubuntu-latest) (push) Waiting to run
Unit Tests / test (windows-2022) (push) Waiting to run
2026-02-03 12:19:39 -05:00
comfyanonymous
223364743c
llama: cast logits as a comfy-weight (#12248)
This is using a different layers weight with .to(). Change it to use
the ops caster if the original layer is a comfy weight so that it picks
up dynamic_vram and async_offload functionality in full.

Co-authored-by: Rattus <rattus128@gmail.com>
2026-02-03 11:31:36 -05:00
comfyanonymous
affe881354
Fix some issues with mac. (#12247) 2026-02-03 11:07:04 -05:00
yt-koike
b65c1b1580 replace print() to logging.error() 2026-01-30 12:14:28 +09:00
yt-koike
389b3325d1 add model downloader endpoint 2026-01-30 11:41:07 +09:00
4 changed files with 64 additions and 8 deletions

View File

@ -5,8 +5,10 @@ import base64
import json
import time
import logging
import requests
import folder_paths
import glob
from tqdm.auto import tqdm
import comfy.utils
from aiohttp import web
from PIL import Image
@ -15,8 +17,9 @@ from folder_paths import map_legacy, filter_files_extensions, filter_files_conte
class ModelFileManager:
def __init__(self) -> None:
def __init__(self, is_download_model_enabled: lambda: bool= lambda: False) -> None:
self.cache: dict[str, tuple[list[dict], dict[str, float], float]] = {}
self.is_download_model_enabled = is_download_model_enabled
def get_cache(self, key: str, default=None) -> tuple[list[dict], dict[str, float], float] | None:
return self.cache.get(key, default)
@ -76,6 +79,45 @@ class ModelFileManager:
except:
return web.Response(status=404)
@routes.post("/download_model")
async def post_download_model(request):
if not self.is_download_model_enabled():
logging.error("Download Model endpoint is disabled")
return web.Response(status=403)
json_data = await request.json()
url = json_data.get("url", None)
if url is None:
logging.error("URL is not provided")
return web.Response(status=401)
save_dir = json_data.get("save_dir", None)
if save_dir not in folder_paths.folder_names_and_paths:
logging.error("Save directory is not valid")
return web.Response(status=401)
filename = json_data.get("filename", url.split("/")[-1])
token = json_data.get("token", None)
save_path = os.path.join(folder_paths.folder_names_and_paths[save_dir][0][0], filename)
tmp_path = save_path + ".tmp"
headers = {"Authorization": f"Bearer {token}"} if token else {}
try:
with requests.get(url, headers=headers,stream=True,timeout=10) as r:
r.raise_for_status()
total_size = int(r.headers.get('content-length', 0))
with open(tmp_path, "wb") as f:
with tqdm(total=total_size, unit='iB', unit_scale=True, desc=filename) as pbar:
for chunk in r.iter_content(chunk_size=1024*1024):
if not chunk:
break
size = f.write(chunk)
pbar.update(size)
os.rename(tmp_path, save_path)
return web.Response(status=200)
except Exception as e:
logging.error(f"Failed to download model: {e}")
if os.path.exists(tmp_path):
os.remove(tmp_path)
return web.Response(status=500)
def get_model_file_list(self, folder_name: str):
folder_name = map_legacy(folder_name)
folders = folder_paths.folder_names_and_paths[folder_name]

View File

@ -57,8 +57,9 @@ def sample_manual_loop_no_classes(
if eos_token_id is not None and eos_token_id < audio_start_id and min_tokens < step:
eos_score = cfg_logits[:, eos_token_id].clone()
remove_logit_value = torch.finfo(cfg_logits.dtype).min
# Only generate audio tokens
cfg_logits[:, :audio_start_id] = float('-inf')
cfg_logits[:, :audio_start_id] = remove_logit_value
if eos_token_id is not None and eos_token_id < audio_start_id and min_tokens < step:
cfg_logits[:, eos_token_id] = eos_score
@ -66,7 +67,7 @@ def sample_manual_loop_no_classes(
if top_k is not None and top_k > 0:
top_k_vals, _ = torch.topk(cfg_logits, top_k)
min_val = top_k_vals[..., -1, None]
cfg_logits[cfg_logits < min_val] = float('-inf')
cfg_logits[cfg_logits < min_val] = remove_logit_value
if top_p is not None and top_p < 1.0:
sorted_logits, sorted_indices = torch.sort(cfg_logits, descending=True)
@ -75,7 +76,7 @@ def sample_manual_loop_no_classes(
sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[..., :-1].clone()
sorted_indices_to_remove[..., 0] = 0
indices_to_remove = sorted_indices_to_remove.scatter(1, sorted_indices, sorted_indices_to_remove)
cfg_logits[indices_to_remove] = float('-inf')
cfg_logits[indices_to_remove] = remove_logit_value
if temperature > 0:
cfg_logits = cfg_logits / temperature

View File

@ -6,6 +6,7 @@ import math
from comfy.ldm.modules.attention import optimized_attention_for_device
import comfy.model_management
import comfy.ops
import comfy.ldm.common_dit
import comfy.clip_model
@ -627,10 +628,10 @@ class Llama2_(nn.Module):
mask = None
if attention_mask is not None:
mask = 1.0 - attention_mask.to(x.dtype).reshape((attention_mask.shape[0], 1, -1, attention_mask.shape[-1])).expand(attention_mask.shape[0], 1, seq_len, attention_mask.shape[-1])
mask = mask.masked_fill(mask.to(torch.bool), float("-inf"))
mask = mask.masked_fill(mask.to(torch.bool), torch.finfo(x.dtype).min)
if seq_len > 1:
causal_mask = torch.empty(past_len + seq_len, past_len + seq_len, dtype=x.dtype, device=x.device).fill_(float("-inf")).triu_(1)
causal_mask = torch.empty(past_len + seq_len, past_len + seq_len, dtype=x.dtype, device=x.device).fill_(torch.finfo(x.dtype).min).triu_(1)
if mask is not None:
mask += causal_mask
else:
@ -794,7 +795,19 @@ class Qwen3_2B_ACE15_lm(BaseLlama, torch.nn.Module):
self.dtype = dtype
def logits(self, x):
return torch.nn.functional.linear(x[:, -1:], self.model.embed_tokens.weight.to(x), None)
input = x[:, -1:]
module = self.model.embed_tokens
offload_stream = None
if module.comfy_cast_weights:
weight, _, offload_stream = comfy.ops.cast_bias_weight(module, input, offloadable=True)
else:
weight = self.model.embed_tokens.weight.to(x)
x = torch.nn.functional.linear(input, weight, None)
comfy.ops.uncast_bias_weight(module, weight, None, offload_stream)
return x
class Qwen3_4B(BaseLlama, torch.nn.Module):
def __init__(self, config_dict, dtype, device, operations):

View File

@ -201,7 +201,7 @@ class PromptServer():
mimetypes.add_type('image/webp', '.webp')
self.user_manager = UserManager()
self.model_file_manager = ModelFileManager()
self.model_file_manager = ModelFileManager(is_download_model_enabled=lambda: self.user_manager.settings.get_settings(None).get("Comfy.ModelDownloadEnabled", False))
self.custom_node_manager = CustomNodeManager()
self.subgraph_manager = SubgraphManager()
self.internal_routes = InternalRoutes(self)