mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2026-02-05 19:12:41 +08:00
Compare commits
21 Commits
ed22965190
...
3d9bc9ea5f
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
3d9bc9ea5f | ||
|
|
abe2ec26a6 | ||
|
|
bdeac8897e | ||
|
|
7602203696 | ||
|
|
ffa7a369ba | ||
|
|
7ec1656735 | ||
|
|
cee75f301a | ||
|
|
1a59686ca8 | ||
|
|
6d96d26795 | ||
|
|
e07a32c9b8 | ||
|
|
a19f0a88e4 | ||
|
|
64811809a0 | ||
|
|
529109083a | ||
|
|
a7be9f6fc3 | ||
|
|
6075c44ec8 | ||
|
|
154b73835a | ||
|
|
862e7784f4 | ||
|
|
f6b6636bf3 | ||
|
|
83b00df3f0 | ||
|
|
5f24eb699c | ||
|
|
fab0954077 |
@ -404,6 +404,14 @@ Use `--tls-keyfile key.pem --tls-certfile cert.pem` to enable TLS/SSL, the app w
|
||||
> Note: Windows users can use [alexisrolland/docker-openssl](https://github.com/alexisrolland/docker-openssl) or one of the [3rd party binary distributions](https://wiki.openssl.org/index.php/Binaries) to run the command example above.
|
||||
<br/><br/>If you use a container, note that the volume mount `-v` can be a relative path so `... -v ".\:/openssl-certs" ...` would create the key & cert files in the current directory of your command prompt or powershell terminal.
|
||||
|
||||
## How to run heavy workflow on mid range GPU (NVIDIA-Linux)?
|
||||
|
||||
Use the `--enable-gds` flag to activate NVIDIA [GPUDirect Storage](https://docs.nvidia.com/gpudirect-storage/) (GDS), which allows data to be transferred directly between SSDs and GPUs. This eliminates traditional CPU-mediated data paths, significantly reducing I/O latency and CPU overhead. System RAM will still be utilized for caching to further optimize performance, along with SSD.
|
||||
|
||||
This feature is tested on NVIDIA GPUs on Linux based system only.
|
||||
|
||||
Requires: `cupy-cuda12x>=12.0.0`, `pynvml>=11.4.1`, `cudf>=23.0.0`, `numba>=0.57.0`, `nvidia-ml-py>=12.0.0`.
|
||||
|
||||
## Support and dev channel
|
||||
|
||||
[Discord](https://comfy.org/discord): Try the #help or #feedback channels.
|
||||
|
||||
@ -154,6 +154,17 @@ parser.add_argument("--default-hashing-function", type=str, choices=['md5', 'sha
|
||||
parser.add_argument("--disable-smart-memory", action="store_true", help="Force ComfyUI to agressively offload to regular ram instead of keeping models in vram when it can.")
|
||||
parser.add_argument("--deterministic", action="store_true", help="Make pytorch use slower deterministic algorithms when it can. Note that this might not make images deterministic in all cases.")
|
||||
|
||||
# GPUDirect Storage (GDS) arguments
|
||||
gds_group = parser.add_argument_group('gds', 'GPUDirect Storage options for direct SSD-to-GPU model loading')
|
||||
gds_group.add_argument("--enable-gds", action="store_true", help="Enable GPUDirect Storage for direct SSD-to-GPU model loading (requires CUDA 11.4+, cuFile).")
|
||||
gds_group.add_argument("--disable-gds", action="store_true", help="Explicitly disable GPUDirect Storage.")
|
||||
gds_group.add_argument("--gds-min-file-size", type=int, default=100, help="Minimum file size in MB to use GDS (default: 100MB).")
|
||||
gds_group.add_argument("--gds-chunk-size", type=int, default=64, help="GDS transfer chunk size in MB (default: 64MB).")
|
||||
gds_group.add_argument("--gds-streams", type=int, default=4, help="Number of CUDA streams for GDS operations (default: 4).")
|
||||
gds_group.add_argument("--gds-prefetch", action="store_true", help="Enable GDS prefetching for better performance.")
|
||||
gds_group.add_argument("--gds-no-fallback", action="store_true", help="Disable fallback to CPU loading if GDS fails.")
|
||||
gds_group.add_argument("--gds-stats", action="store_true", help="Print GDS statistics on exit.")
|
||||
|
||||
class PerformanceFeature(enum.Enum):
|
||||
Fp16Accumulation = "fp16_accumulation"
|
||||
Fp8MatrixMultiplication = "fp8_matrix_mult"
|
||||
|
||||
494
comfy/gds_loader.py
Normal file
494
comfy/gds_loader.py
Normal file
@ -0,0 +1,494 @@
|
||||
# copyright 2025 Maifee Ul Asad @ github.com/maifeeulasad
|
||||
# copyright under GNU GENERAL PUBLIC LICENSE, Version 3, 29 June 2007
|
||||
|
||||
"""
|
||||
GPUDirect Storage (GDS) Integration for ComfyUI
|
||||
Direct SSD-to-GPU model loading without RAM/CPU bottlenecks
|
||||
Still there will be some CPU/RAM usage, mostly for safetensors parsing and small buffers.
|
||||
|
||||
This module provides GPUDirect Storage functionality to load models directly
|
||||
from NVMe SSDs to GPU memory, bypassing system RAM and CPU.
|
||||
"""
|
||||
|
||||
import os
|
||||
import logging
|
||||
import torch
|
||||
import time
|
||||
from typing import Optional, Dict, Any, Union
|
||||
from pathlib import Path
|
||||
import safetensors
|
||||
import gc
|
||||
import mmap
|
||||
from dataclasses import dataclass
|
||||
|
||||
try:
|
||||
import cupy
|
||||
import cupy.cuda.runtime as cuda_runtime
|
||||
CUPY_AVAILABLE = True
|
||||
except ImportError:
|
||||
CUPY_AVAILABLE = False
|
||||
logging.warning("CuPy not available. GDS will use fallback mode.")
|
||||
|
||||
try:
|
||||
import cudf # RAPIDS for GPU dataframes
|
||||
RAPIDS_AVAILABLE = True
|
||||
except ImportError:
|
||||
RAPIDS_AVAILABLE = False
|
||||
|
||||
try:
|
||||
import pynvml
|
||||
pynvml.nvmlInit()
|
||||
NVML_AVAILABLE = True
|
||||
except ImportError:
|
||||
NVML_AVAILABLE = False
|
||||
logging.warning("NVIDIA-ML-Py not available. GPU monitoring disabled.")
|
||||
|
||||
@dataclass
|
||||
class GDSConfig:
|
||||
"""Configuration for GPUDirect Storage"""
|
||||
enabled: bool = True
|
||||
min_file_size_mb: int = 100 # Only use GDS for files larger than this
|
||||
chunk_size_mb: int = 64 # Size of chunks to transfer
|
||||
use_pinned_memory: bool = True
|
||||
prefetch_enabled: bool = True
|
||||
compression_aware: bool = True
|
||||
max_concurrent_streams: int = 4
|
||||
fallback_to_cpu: bool = True
|
||||
show_stats: bool = False # Whether to show stats on exit
|
||||
|
||||
|
||||
class GDSError(Exception):
|
||||
"""GDS-specific errors"""
|
||||
pass
|
||||
|
||||
|
||||
class GPUDirectStorage:
|
||||
"""
|
||||
GPUDirect Storage implementation for ComfyUI
|
||||
Enables direct SSD-to-GPU transfers for model loading
|
||||
"""
|
||||
|
||||
def __init__(self, config: Optional[GDSConfig] = None):
|
||||
self.config = config or GDSConfig()
|
||||
self.device = torch.cuda.current_device() if torch.cuda.is_available() else None
|
||||
self.cuda_streams = []
|
||||
self.pinned_buffers = {}
|
||||
self.stats = {
|
||||
'gds_loads': 0,
|
||||
'fallback_loads': 0,
|
||||
'total_bytes_gds': 0,
|
||||
'total_time_gds': 0.0,
|
||||
'avg_bandwidth_gbps': 0.0
|
||||
}
|
||||
|
||||
# Initialize GDS if available
|
||||
self._gds_available = self._check_gds_availability()
|
||||
if self._gds_available:
|
||||
self._init_gds()
|
||||
else:
|
||||
logging.warning("GDS not available, using fallback methods")
|
||||
|
||||
def _check_gds_availability(self) -> bool:
|
||||
"""Check if GDS is available on the system"""
|
||||
if not torch.cuda.is_available():
|
||||
return False
|
||||
|
||||
if not CUPY_AVAILABLE:
|
||||
return False
|
||||
|
||||
# Check for GPUDirect Storage support
|
||||
try:
|
||||
# Check CUDA version (GDS requires CUDA 11.4+)
|
||||
cuda_version = torch.version.cuda
|
||||
if cuda_version:
|
||||
major, minor = map(int, cuda_version.split('.')[:2])
|
||||
if major < 11 or (major == 11 and minor < 4):
|
||||
logging.warning(f"CUDA {cuda_version} detected. GDS requires CUDA 11.4+")
|
||||
return False
|
||||
|
||||
# Check if cuFile is available (part of CUDA toolkit)
|
||||
try:
|
||||
import cupy.cuda.cufile as cufile
|
||||
# Try to initialize cuFile
|
||||
cufile.initialize()
|
||||
return True
|
||||
except (ImportError, RuntimeError) as e:
|
||||
logging.warning(f"cuFile not available: {e}")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
logging.warning(f"GDS availability check failed: {e}")
|
||||
return False
|
||||
|
||||
def _init_gds(self):
|
||||
"""Initialize GDS resources"""
|
||||
try:
|
||||
# Create CUDA streams for async operations
|
||||
for i in range(self.config.max_concurrent_streams):
|
||||
stream = torch.cuda.Stream()
|
||||
self.cuda_streams.append(stream)
|
||||
|
||||
# Pre-allocate pinned memory buffers
|
||||
if self.config.use_pinned_memory:
|
||||
self._allocate_pinned_buffers()
|
||||
|
||||
logging.info(f"GDS initialized with {len(self.cuda_streams)} streams")
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Failed to initialize GDS: {e}")
|
||||
self._gds_available = False
|
||||
|
||||
def _allocate_pinned_buffers(self):
|
||||
"""Pre-allocate pinned memory buffers for staging"""
|
||||
try:
|
||||
# Allocate buffers of different sizes
|
||||
buffer_sizes = [16, 32, 64, 128, 256] # MB
|
||||
|
||||
for size_mb in buffer_sizes:
|
||||
size_bytes = size_mb * 1024 * 1024
|
||||
# Allocate pinned memory using CuPy
|
||||
if CUPY_AVAILABLE:
|
||||
buffer = cupy.cuda.alloc_pinned_memory(size_bytes)
|
||||
self.pinned_buffers[size_mb] = buffer
|
||||
|
||||
except Exception as e:
|
||||
logging.warning(f"Failed to allocate pinned buffers: {e}")
|
||||
|
||||
def _get_file_size(self, file_path: str) -> int:
|
||||
"""Get file size in bytes"""
|
||||
return os.path.getsize(file_path)
|
||||
|
||||
def _should_use_gds(self, file_path: str) -> bool:
|
||||
"""Determine if GDS should be used for this file"""
|
||||
if not self._gds_available or not self.config.enabled:
|
||||
return False
|
||||
|
||||
file_size_mb = self._get_file_size(file_path) / (1024 * 1024)
|
||||
return file_size_mb >= self.config.min_file_size_mb
|
||||
|
||||
def _load_with_gds(self, file_path: str) -> Dict[str, torch.Tensor]:
|
||||
"""Load model using GPUDirect Storage"""
|
||||
start_time = time.time()
|
||||
|
||||
try:
|
||||
if file_path.lower().endswith(('.safetensors', '.sft')):
|
||||
return self._load_safetensors_gds(file_path)
|
||||
else:
|
||||
return self._load_pytorch_gds(file_path)
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"GDS loading failed for {file_path}: {e}")
|
||||
if self.config.fallback_to_cpu:
|
||||
logging.info("Falling back to CPU loading")
|
||||
self.stats['fallback_loads'] += 1
|
||||
return self._load_fallback(file_path)
|
||||
else:
|
||||
raise GDSError(f"GDS loading failed: {e}")
|
||||
finally:
|
||||
load_time = time.time() - start_time
|
||||
self.stats['total_time_gds'] += load_time
|
||||
|
||||
def _load_safetensors_gds(self, file_path: str) -> Dict[str, torch.Tensor]:
|
||||
"""Load safetensors file using GDS"""
|
||||
try:
|
||||
import cupy.cuda.cufile as cufile
|
||||
|
||||
# Open file with cuFile for direct GPU loading
|
||||
with cufile.CuFileManager() as manager:
|
||||
# Memory-map the file for efficient access
|
||||
with open(file_path, 'rb') as f:
|
||||
# Use mmap for large files
|
||||
with mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) as mmapped_file:
|
||||
|
||||
# Parse safetensors header
|
||||
header_size = int.from_bytes(mmapped_file[:8], 'little')
|
||||
header_bytes = mmapped_file[8:8+header_size]
|
||||
|
||||
import json
|
||||
header = json.loads(header_bytes.decode('utf-8'))
|
||||
|
||||
# Load tensors directly to GPU
|
||||
tensors = {}
|
||||
data_offset = 8 + header_size
|
||||
|
||||
for name, info in header.items():
|
||||
if name == "__metadata__":
|
||||
continue
|
||||
|
||||
dtype_map = {
|
||||
'F32': torch.float32,
|
||||
'F16': torch.float16,
|
||||
'BF16': torch.bfloat16,
|
||||
'I8': torch.int8,
|
||||
'I16': torch.int16,
|
||||
'I32': torch.int32,
|
||||
'I64': torch.int64,
|
||||
'U8': torch.uint8,
|
||||
}
|
||||
|
||||
dtype = dtype_map.get(info['dtype'], torch.float32)
|
||||
shape = info['shape']
|
||||
start_offset = data_offset + info['data_offsets'][0]
|
||||
end_offset = data_offset + info['data_offsets'][1]
|
||||
|
||||
# Direct GPU allocation
|
||||
tensor = torch.empty(shape, dtype=dtype, device=f'cuda:{self.device}')
|
||||
|
||||
# Use cuFile for direct transfer
|
||||
tensor_bytes = end_offset - start_offset
|
||||
|
||||
# Get GPU memory pointer
|
||||
gpu_ptr = tensor.data_ptr()
|
||||
|
||||
# Direct file-to-GPU transfer
|
||||
cufile.copy_from_file(
|
||||
gpu_ptr,
|
||||
mmapped_file[start_offset:end_offset],
|
||||
tensor_bytes
|
||||
)
|
||||
|
||||
tensors[name] = tensor
|
||||
|
||||
self.stats['gds_loads'] += 1
|
||||
self.stats['total_bytes_gds'] += self._get_file_size(file_path)
|
||||
|
||||
return tensors
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"GDS safetensors loading failed: {e}")
|
||||
raise
|
||||
|
||||
def _load_pytorch_gds(self, file_path: str) -> Dict[str, torch.Tensor]:
|
||||
"""Load PyTorch file using GDS with staging"""
|
||||
try:
|
||||
# For PyTorch files, we need to use a staging approach
|
||||
# since torch.load doesn't support direct GPU loading
|
||||
|
||||
# Load to pinned memory first
|
||||
with open(file_path, 'rb') as f:
|
||||
file_size = self._get_file_size(file_path)
|
||||
|
||||
# Choose appropriate buffer or allocate new one
|
||||
buffer_size_mb = min(256, max(64, file_size // (1024 * 1024)))
|
||||
|
||||
if buffer_size_mb in self.pinned_buffers:
|
||||
pinned_buffer = self.pinned_buffers[buffer_size_mb]
|
||||
else:
|
||||
# Allocate temporary pinned buffer
|
||||
pinned_buffer = cupy.cuda.alloc_pinned_memory(file_size)
|
||||
|
||||
# Read file to pinned memory
|
||||
f.readinto(pinned_buffer)
|
||||
|
||||
# Use torch.load with map_location to specific GPU
|
||||
# This will be faster due to pinned memory
|
||||
state_dict = torch.load(
|
||||
f,
|
||||
map_location=f'cuda:{self.device}',
|
||||
weights_only=True
|
||||
)
|
||||
|
||||
self.stats['gds_loads'] += 1
|
||||
self.stats['total_bytes_gds'] += file_size
|
||||
|
||||
return state_dict
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"GDS PyTorch loading failed: {e}")
|
||||
raise
|
||||
|
||||
def _load_fallback(self, file_path: str) -> Dict[str, torch.Tensor]:
|
||||
"""Fallback loading method using standard approaches"""
|
||||
if file_path.lower().endswith(('.safetensors', '.sft')):
|
||||
# Use safetensors with device parameter
|
||||
with safetensors.safe_open(file_path, framework="pt", device=f'cuda:{self.device}') as f:
|
||||
return {k: f.get_tensor(k) for k in f.keys()}
|
||||
else:
|
||||
# Standard PyTorch loading
|
||||
return torch.load(file_path, map_location=f'cuda:{self.device}', weights_only=True)
|
||||
|
||||
def load_model(self, file_path: str, device: Optional[torch.device] = None) -> Dict[str, torch.Tensor]:
|
||||
"""
|
||||
Main entry point for loading models with GDS
|
||||
|
||||
Args:
|
||||
file_path: Path to the model file
|
||||
device: Target device (if None, uses current CUDA device)
|
||||
|
||||
Returns:
|
||||
Dictionary of tensors loaded directly to GPU
|
||||
"""
|
||||
if device is not None and device.type == 'cuda':
|
||||
self.device = device.index or 0
|
||||
|
||||
if self._should_use_gds(file_path):
|
||||
logging.info(f"Loading {file_path} with GDS")
|
||||
return self._load_with_gds(file_path)
|
||||
else:
|
||||
logging.info(f"Loading {file_path} with standard method")
|
||||
self.stats['fallback_loads'] += 1
|
||||
return self._load_fallback(file_path)
|
||||
|
||||
def prefetch_model(self, file_path: str) -> bool:
|
||||
"""
|
||||
Prefetch model to GPU memory cache (if supported)
|
||||
|
||||
Args:
|
||||
file_path: Path to the model file
|
||||
|
||||
Returns:
|
||||
True if prefetch was successful
|
||||
"""
|
||||
if not self.config.prefetch_enabled or not self._gds_available:
|
||||
return False
|
||||
|
||||
try:
|
||||
# Basic prefetch implementation
|
||||
# This would ideally use NVIDIA's GPUDirect Storage API
|
||||
# to warm up the storage cache
|
||||
|
||||
file_size = self._get_file_size(file_path)
|
||||
logging.info(f"Prefetching {file_path} ({file_size // (1024*1024)} MB)")
|
||||
|
||||
# Read file metadata to warm caches
|
||||
with open(file_path, 'rb') as f:
|
||||
# Read first and last chunks to trigger prefetch
|
||||
f.read(1024 * 1024) # First 1MB
|
||||
f.seek(-min(1024 * 1024, file_size), 2) # Last 1MB
|
||||
f.read()
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logging.warning(f"Prefetch failed for {file_path}: {e}")
|
||||
return False
|
||||
|
||||
def get_stats(self) -> Dict[str, Any]:
|
||||
"""Get loading statistics"""
|
||||
total_loads = self.stats['gds_loads'] + self.stats['fallback_loads']
|
||||
|
||||
if self.stats['total_time_gds'] > 0 and self.stats['total_bytes_gds'] > 0:
|
||||
bandwidth_gbps = (self.stats['total_bytes_gds'] / (1024**3)) / self.stats['total_time_gds']
|
||||
self.stats['avg_bandwidth_gbps'] = bandwidth_gbps
|
||||
|
||||
return {
|
||||
**self.stats,
|
||||
'total_loads': total_loads,
|
||||
'gds_usage_percent': (self.stats['gds_loads'] / max(1, total_loads)) * 100,
|
||||
'gds_available': self._gds_available,
|
||||
'config': self.config.__dict__
|
||||
}
|
||||
|
||||
def cleanup(self):
|
||||
"""Clean up GDS resources"""
|
||||
try:
|
||||
# Clear CUDA streams
|
||||
for stream in self.cuda_streams:
|
||||
stream.synchronize()
|
||||
self.cuda_streams.clear()
|
||||
|
||||
# Free pinned buffers
|
||||
for buffer in self.pinned_buffers.values():
|
||||
if CUPY_AVAILABLE:
|
||||
cupy.cuda.free_pinned_memory(buffer)
|
||||
self.pinned_buffers.clear()
|
||||
|
||||
# Force garbage collection
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
|
||||
except Exception as e:
|
||||
logging.warning(f"GDS cleanup failed: {e}")
|
||||
|
||||
def __del__(self):
|
||||
"""Destructor to ensure cleanup"""
|
||||
self.cleanup()
|
||||
|
||||
|
||||
# Global GDS instance
|
||||
_gds_instance: Optional[GPUDirectStorage] = None
|
||||
|
||||
|
||||
def get_gds_instance(config: Optional[GDSConfig] = None) -> GPUDirectStorage:
|
||||
"""Get or create the global GDS instance"""
|
||||
global _gds_instance
|
||||
|
||||
if _gds_instance is None:
|
||||
_gds_instance = GPUDirectStorage(config)
|
||||
|
||||
return _gds_instance
|
||||
|
||||
|
||||
def load_torch_file_gds(ckpt: str, safe_load: bool = False, device: Optional[torch.device] = None) -> Dict[str, torch.Tensor]:
|
||||
"""
|
||||
GDS-enabled replacement for comfy.utils.load_torch_file
|
||||
|
||||
Args:
|
||||
ckpt: Path to checkpoint file
|
||||
safe_load: Whether to use safe loading (for compatibility)
|
||||
device: Target device
|
||||
|
||||
Returns:
|
||||
Dictionary of loaded tensors
|
||||
"""
|
||||
gds = get_gds_instance()
|
||||
|
||||
try:
|
||||
# Load with GDS
|
||||
return gds.load_model(ckpt, device)
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"GDS loading failed, falling back to standard method: {e}")
|
||||
# Fallback to original method
|
||||
import comfy.utils
|
||||
return comfy.utils.load_torch_file(ckpt, safe_load=safe_load, device=device)
|
||||
|
||||
|
||||
def prefetch_model_gds(file_path: str) -> bool:
|
||||
"""Prefetch model for faster loading"""
|
||||
gds = get_gds_instance()
|
||||
return gds.prefetch_model(file_path)
|
||||
|
||||
|
||||
def get_gds_stats() -> Dict[str, Any]:
|
||||
"""Get GDS statistics"""
|
||||
gds = get_gds_instance()
|
||||
return gds.get_stats()
|
||||
|
||||
|
||||
def configure_gds(config: GDSConfig):
|
||||
"""Configure GDS settings"""
|
||||
global _gds_instance
|
||||
_gds_instance = GPUDirectStorage(config)
|
||||
|
||||
|
||||
def init_gds(config: GDSConfig):
|
||||
"""
|
||||
Initialize GPUDirect Storage with the provided configuration
|
||||
|
||||
Args:
|
||||
config: GDSConfig object with initialization parameters
|
||||
"""
|
||||
try:
|
||||
# Configure GDS
|
||||
configure_gds(config)
|
||||
logging.info(f"GDS initialized: enabled={config.enabled}, min_size={config.min_file_size_mb}MB, streams={config.max_concurrent_streams}")
|
||||
|
||||
# Set up exit handler for stats if requested
|
||||
if hasattr(config, 'show_stats') and config.show_stats:
|
||||
import atexit
|
||||
def print_gds_stats():
|
||||
stats = get_gds_stats()
|
||||
logging.info("=== GDS Statistics ===")
|
||||
logging.info(f"Total loads: {stats['total_loads']}")
|
||||
logging.info(f"GDS loads: {stats['gds_loads']} ({stats['gds_usage_percent']:.1f}%)")
|
||||
logging.info(f"Fallback loads: {stats['fallback_loads']}")
|
||||
logging.info(f"Total bytes via GDS: {stats['total_bytes_gds'] / (1024**3):.2f} GB")
|
||||
logging.info(f"Average bandwidth: {stats['avg_bandwidth_gbps']:.2f} GB/s")
|
||||
logging.info("===================")
|
||||
atexit.register(print_gds_stats)
|
||||
|
||||
except ImportError as e:
|
||||
logging.warning(f"GDS initialization failed - missing dependencies: {e}")
|
||||
except Exception as e:
|
||||
logging.error(f"GDS initialization failed: {e}")
|
||||
202
comfy/ldm/anima/model.py
Normal file
202
comfy/ldm/anima/model.py
Normal file
@ -0,0 +1,202 @@
|
||||
from comfy.ldm.cosmos.predict2 import MiniTrainDIT
|
||||
import torch
|
||||
from torch import nn
|
||||
import torch.nn.functional as F
|
||||
|
||||
|
||||
def rotate_half(x):
|
||||
x1 = x[..., : x.shape[-1] // 2]
|
||||
x2 = x[..., x.shape[-1] // 2 :]
|
||||
return torch.cat((-x2, x1), dim=-1)
|
||||
|
||||
|
||||
def apply_rotary_pos_emb(x, cos, sin, unsqueeze_dim=1):
|
||||
cos = cos.unsqueeze(unsqueeze_dim)
|
||||
sin = sin.unsqueeze(unsqueeze_dim)
|
||||
x_embed = (x * cos) + (rotate_half(x) * sin)
|
||||
return x_embed
|
||||
|
||||
|
||||
class RotaryEmbedding(nn.Module):
|
||||
def __init__(self, head_dim):
|
||||
super().__init__()
|
||||
self.rope_theta = 10000
|
||||
inv_freq = 1.0 / (self.rope_theta ** (torch.arange(0, head_dim, 2, dtype=torch.int64).to(dtype=torch.float) / head_dim))
|
||||
self.register_buffer("inv_freq", inv_freq, persistent=False)
|
||||
|
||||
@torch.no_grad()
|
||||
def forward(self, x, position_ids):
|
||||
inv_freq_expanded = self.inv_freq[None, :, None].float().expand(position_ids.shape[0], -1, 1).to(x.device)
|
||||
position_ids_expanded = position_ids[:, None, :].float()
|
||||
|
||||
device_type = x.device.type if isinstance(x.device.type, str) and x.device.type != "mps" else "cpu"
|
||||
with torch.autocast(device_type=device_type, enabled=False): # Force float32
|
||||
freqs = (inv_freq_expanded.float() @ position_ids_expanded.float()).transpose(1, 2)
|
||||
emb = torch.cat((freqs, freqs), dim=-1)
|
||||
cos = emb.cos()
|
||||
sin = emb.sin()
|
||||
|
||||
return cos.to(dtype=x.dtype), sin.to(dtype=x.dtype)
|
||||
|
||||
|
||||
class Attention(nn.Module):
|
||||
def __init__(self, query_dim, context_dim, n_heads, head_dim, device=None, dtype=None, operations=None):
|
||||
super().__init__()
|
||||
|
||||
inner_dim = head_dim * n_heads
|
||||
self.n_heads = n_heads
|
||||
self.head_dim = head_dim
|
||||
self.query_dim = query_dim
|
||||
self.context_dim = context_dim
|
||||
|
||||
self.q_proj = operations.Linear(query_dim, inner_dim, bias=False, device=device, dtype=dtype)
|
||||
self.q_norm = operations.RMSNorm(self.head_dim, eps=1e-6, device=device, dtype=dtype)
|
||||
|
||||
self.k_proj = operations.Linear(context_dim, inner_dim, bias=False, device=device, dtype=dtype)
|
||||
self.k_norm = operations.RMSNorm(self.head_dim, eps=1e-6, device=device, dtype=dtype)
|
||||
|
||||
self.v_proj = operations.Linear(context_dim, inner_dim, bias=False, device=device, dtype=dtype)
|
||||
|
||||
self.o_proj = operations.Linear(inner_dim, query_dim, bias=False, device=device, dtype=dtype)
|
||||
|
||||
def forward(self, x, mask=None, context=None, position_embeddings=None, position_embeddings_context=None):
|
||||
context = x if context is None else context
|
||||
input_shape = x.shape[:-1]
|
||||
q_shape = (*input_shape, self.n_heads, self.head_dim)
|
||||
context_shape = context.shape[:-1]
|
||||
kv_shape = (*context_shape, self.n_heads, self.head_dim)
|
||||
|
||||
query_states = self.q_norm(self.q_proj(x).view(q_shape)).transpose(1, 2)
|
||||
key_states = self.k_norm(self.k_proj(context).view(kv_shape)).transpose(1, 2)
|
||||
value_states = self.v_proj(context).view(kv_shape).transpose(1, 2)
|
||||
|
||||
if position_embeddings is not None:
|
||||
assert position_embeddings_context is not None
|
||||
cos, sin = position_embeddings
|
||||
query_states = apply_rotary_pos_emb(query_states, cos, sin)
|
||||
cos, sin = position_embeddings_context
|
||||
key_states = apply_rotary_pos_emb(key_states, cos, sin)
|
||||
|
||||
attn_output = F.scaled_dot_product_attention(query_states, key_states, value_states, attn_mask=mask)
|
||||
|
||||
attn_output = attn_output.transpose(1, 2).reshape(*input_shape, -1).contiguous()
|
||||
attn_output = self.o_proj(attn_output)
|
||||
return attn_output
|
||||
|
||||
def init_weights(self):
|
||||
torch.nn.init.zeros_(self.o_proj.weight)
|
||||
|
||||
|
||||
class TransformerBlock(nn.Module):
|
||||
def __init__(self, source_dim, model_dim, num_heads=16, mlp_ratio=4.0, use_self_attn=False, layer_norm=False, device=None, dtype=None, operations=None):
|
||||
super().__init__()
|
||||
self.use_self_attn = use_self_attn
|
||||
|
||||
if self.use_self_attn:
|
||||
self.norm_self_attn = operations.LayerNorm(model_dim, device=device, dtype=dtype) if layer_norm else operations.RMSNorm(model_dim, eps=1e-6, device=device, dtype=dtype)
|
||||
self.self_attn = Attention(
|
||||
query_dim=model_dim,
|
||||
context_dim=model_dim,
|
||||
n_heads=num_heads,
|
||||
head_dim=model_dim//num_heads,
|
||||
device=device,
|
||||
dtype=dtype,
|
||||
operations=operations,
|
||||
)
|
||||
|
||||
self.norm_cross_attn = operations.LayerNorm(model_dim, device=device, dtype=dtype) if layer_norm else operations.RMSNorm(model_dim, eps=1e-6, device=device, dtype=dtype)
|
||||
self.cross_attn = Attention(
|
||||
query_dim=model_dim,
|
||||
context_dim=source_dim,
|
||||
n_heads=num_heads,
|
||||
head_dim=model_dim//num_heads,
|
||||
device=device,
|
||||
dtype=dtype,
|
||||
operations=operations,
|
||||
)
|
||||
|
||||
self.norm_mlp = operations.LayerNorm(model_dim, device=device, dtype=dtype) if layer_norm else operations.RMSNorm(model_dim, eps=1e-6, device=device, dtype=dtype)
|
||||
self.mlp = nn.Sequential(
|
||||
operations.Linear(model_dim, int(model_dim * mlp_ratio), device=device, dtype=dtype),
|
||||
nn.GELU(),
|
||||
operations.Linear(int(model_dim * mlp_ratio), model_dim, device=device, dtype=dtype)
|
||||
)
|
||||
|
||||
def forward(self, x, context, target_attention_mask=None, source_attention_mask=None, position_embeddings=None, position_embeddings_context=None):
|
||||
if self.use_self_attn:
|
||||
normed = self.norm_self_attn(x)
|
||||
attn_out = self.self_attn(normed, mask=target_attention_mask, position_embeddings=position_embeddings, position_embeddings_context=position_embeddings)
|
||||
x = x + attn_out
|
||||
|
||||
normed = self.norm_cross_attn(x)
|
||||
attn_out = self.cross_attn(normed, mask=source_attention_mask, context=context, position_embeddings=position_embeddings, position_embeddings_context=position_embeddings_context)
|
||||
x = x + attn_out
|
||||
|
||||
x = x + self.mlp(self.norm_mlp(x))
|
||||
return x
|
||||
|
||||
def init_weights(self):
|
||||
torch.nn.init.zeros_(self.mlp[2].weight)
|
||||
self.cross_attn.init_weights()
|
||||
|
||||
|
||||
class LLMAdapter(nn.Module):
|
||||
def __init__(
|
||||
self,
|
||||
source_dim=1024,
|
||||
target_dim=1024,
|
||||
model_dim=1024,
|
||||
num_layers=6,
|
||||
num_heads=16,
|
||||
use_self_attn=True,
|
||||
layer_norm=False,
|
||||
device=None,
|
||||
dtype=None,
|
||||
operations=None,
|
||||
):
|
||||
super().__init__()
|
||||
|
||||
self.embed = operations.Embedding(32128, target_dim, device=device, dtype=dtype)
|
||||
if model_dim != target_dim:
|
||||
self.in_proj = operations.Linear(target_dim, model_dim, device=device, dtype=dtype)
|
||||
else:
|
||||
self.in_proj = nn.Identity()
|
||||
self.rotary_emb = RotaryEmbedding(model_dim//num_heads)
|
||||
self.blocks = nn.ModuleList([
|
||||
TransformerBlock(source_dim, model_dim, num_heads=num_heads, use_self_attn=use_self_attn, layer_norm=layer_norm, device=device, dtype=dtype, operations=operations) for _ in range(num_layers)
|
||||
])
|
||||
self.out_proj = operations.Linear(model_dim, target_dim, device=device, dtype=dtype)
|
||||
self.norm = operations.RMSNorm(target_dim, eps=1e-6, device=device, dtype=dtype)
|
||||
|
||||
def forward(self, source_hidden_states, target_input_ids, target_attention_mask=None, source_attention_mask=None):
|
||||
if target_attention_mask is not None:
|
||||
target_attention_mask = target_attention_mask.to(torch.bool)
|
||||
if target_attention_mask.ndim == 2:
|
||||
target_attention_mask = target_attention_mask.unsqueeze(1).unsqueeze(1)
|
||||
|
||||
if source_attention_mask is not None:
|
||||
source_attention_mask = source_attention_mask.to(torch.bool)
|
||||
if source_attention_mask.ndim == 2:
|
||||
source_attention_mask = source_attention_mask.unsqueeze(1).unsqueeze(1)
|
||||
|
||||
x = self.in_proj(self.embed(target_input_ids))
|
||||
context = source_hidden_states
|
||||
position_ids = torch.arange(x.shape[1], device=x.device).unsqueeze(0)
|
||||
position_ids_context = torch.arange(context.shape[1], device=x.device).unsqueeze(0)
|
||||
position_embeddings = self.rotary_emb(x, position_ids)
|
||||
position_embeddings_context = self.rotary_emb(x, position_ids_context)
|
||||
for block in self.blocks:
|
||||
x = block(x, context, target_attention_mask=target_attention_mask, source_attention_mask=source_attention_mask, position_embeddings=position_embeddings, position_embeddings_context=position_embeddings_context)
|
||||
return self.norm(self.out_proj(x))
|
||||
|
||||
|
||||
class Anima(MiniTrainDIT):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
self.llm_adapter = LLMAdapter(device=kwargs.get("device"), dtype=kwargs.get("dtype"), operations=kwargs.get("operations"))
|
||||
|
||||
def preprocess_text_embeds(self, text_embeds, text_ids):
|
||||
if text_ids is not None:
|
||||
return self.llm_adapter(text_embeds, text_ids)
|
||||
else:
|
||||
return text_embeds
|
||||
@ -49,6 +49,7 @@ import comfy.ldm.ace.model
|
||||
import comfy.ldm.omnigen.omnigen2
|
||||
import comfy.ldm.qwen_image.model
|
||||
import comfy.ldm.kandinsky5.model
|
||||
import comfy.ldm.anima.model
|
||||
|
||||
import comfy.model_management
|
||||
import comfy.patcher_extension
|
||||
@ -1147,6 +1148,27 @@ class CosmosPredict2(BaseModel):
|
||||
sigma = (sigma / (sigma + 1))
|
||||
return latent_image / (1.0 - sigma)
|
||||
|
||||
class Anima(BaseModel):
|
||||
def __init__(self, model_config, model_type=ModelType.FLOW, device=None):
|
||||
super().__init__(model_config, model_type, device=device, unet_model=comfy.ldm.anima.model.Anima)
|
||||
|
||||
def extra_conds(self, **kwargs):
|
||||
out = super().extra_conds(**kwargs)
|
||||
cross_attn = kwargs.get("cross_attn", None)
|
||||
t5xxl_ids = kwargs.get("t5xxl_ids", None)
|
||||
t5xxl_weights = kwargs.get("t5xxl_weights", None)
|
||||
device = kwargs["device"]
|
||||
if cross_attn is not None:
|
||||
if t5xxl_ids is not None:
|
||||
cross_attn = self.diffusion_model.preprocess_text_embeds(cross_attn.to(device=device, dtype=self.get_dtype()), t5xxl_ids.unsqueeze(0).to(device=device))
|
||||
if t5xxl_weights is not None:
|
||||
cross_attn *= t5xxl_weights.unsqueeze(0).unsqueeze(-1).to(cross_attn)
|
||||
|
||||
if cross_attn.shape[1] < 512:
|
||||
cross_attn = torch.nn.functional.pad(cross_attn, (0, 0, 0, 512 - cross_attn.shape[1]))
|
||||
out['c_crossattn'] = comfy.conds.CONDRegular(cross_attn)
|
||||
return out
|
||||
|
||||
class Lumina2(BaseModel):
|
||||
def __init__(self, model_config, model_type=ModelType.FLOW, device=None):
|
||||
super().__init__(model_config, model_type, device=device, unet_model=comfy.ldm.lumina.model.NextDiT)
|
||||
|
||||
@ -550,6 +550,8 @@ def detect_unet_config(state_dict, key_prefix, metadata=None):
|
||||
if '{}blocks.0.mlp.layer1.weight'.format(key_prefix) in state_dict_keys: # Cosmos predict2
|
||||
dit_config = {}
|
||||
dit_config["image_model"] = "cosmos_predict2"
|
||||
if "{}llm_adapter.blocks.0.cross_attn.q_proj.weight".format(key_prefix) in state_dict_keys:
|
||||
dit_config["image_model"] = "anima"
|
||||
dit_config["max_img_h"] = 240
|
||||
dit_config["max_img_w"] = 240
|
||||
dit_config["max_frames"] = 128
|
||||
|
||||
@ -57,6 +57,7 @@ import comfy.text_encoders.ovis
|
||||
import comfy.text_encoders.kandinsky5
|
||||
import comfy.text_encoders.jina_clip_2
|
||||
import comfy.text_encoders.newbie
|
||||
import comfy.text_encoders.anima
|
||||
|
||||
import comfy.model_patcher
|
||||
import comfy.lora
|
||||
@ -1048,6 +1049,7 @@ class TEModel(Enum):
|
||||
GEMMA_3_12B = 18
|
||||
JINA_CLIP_2 = 19
|
||||
QWEN3_8B = 20
|
||||
QWEN3_06B = 21
|
||||
|
||||
|
||||
def detect_te_model(sd):
|
||||
@ -1093,6 +1095,8 @@ def detect_te_model(sd):
|
||||
return TEModel.QWEN3_2B
|
||||
elif weight.shape[0] == 4096:
|
||||
return TEModel.QWEN3_8B
|
||||
elif weight.shape[0] == 1024:
|
||||
return TEModel.QWEN3_06B
|
||||
if weight.shape[0] == 5120:
|
||||
if "model.layers.39.post_attention_layernorm.weight" in sd:
|
||||
return TEModel.MISTRAL3_24B
|
||||
@ -1233,6 +1237,9 @@ def load_text_encoder_state_dicts(state_dicts=[], embedding_directory=None, clip
|
||||
elif te_model == TEModel.JINA_CLIP_2:
|
||||
clip_target.clip = comfy.text_encoders.jina_clip_2.JinaClip2TextModelWrapper
|
||||
clip_target.tokenizer = comfy.text_encoders.jina_clip_2.JinaClip2TokenizerWrapper
|
||||
elif te_model == TEModel.QWEN3_06B:
|
||||
clip_target.clip = comfy.text_encoders.anima.te(**llama_detect(clip_data))
|
||||
clip_target.tokenizer = comfy.text_encoders.anima.AnimaTokenizer
|
||||
else:
|
||||
# clip_l
|
||||
if clip_type == CLIPType.SD3:
|
||||
|
||||
@ -23,6 +23,7 @@ import comfy.text_encoders.qwen_image
|
||||
import comfy.text_encoders.hunyuan_image
|
||||
import comfy.text_encoders.kandinsky5
|
||||
import comfy.text_encoders.z_image
|
||||
import comfy.text_encoders.anima
|
||||
|
||||
from . import supported_models_base
|
||||
from . import latent_formats
|
||||
@ -992,6 +993,36 @@ class CosmosT2IPredict2(supported_models_base.BASE):
|
||||
t5_detect = comfy.text_encoders.sd3_clip.t5_xxl_detect(state_dict, "{}t5xxl.transformer.".format(pref))
|
||||
return supported_models_base.ClipTarget(comfy.text_encoders.cosmos.CosmosT5Tokenizer, comfy.text_encoders.cosmos.te(**t5_detect))
|
||||
|
||||
class Anima(supported_models_base.BASE):
|
||||
unet_config = {
|
||||
"image_model": "anima",
|
||||
}
|
||||
|
||||
sampling_settings = {
|
||||
"multiplier": 1.0,
|
||||
"shift": 3.0,
|
||||
}
|
||||
|
||||
unet_extra_config = {}
|
||||
latent_format = latent_formats.Wan21
|
||||
|
||||
memory_usage_factor = 1.0
|
||||
|
||||
supported_inference_dtypes = [torch.bfloat16, torch.float32]
|
||||
|
||||
def __init__(self, unet_config):
|
||||
super().__init__(unet_config)
|
||||
self.memory_usage_factor = (unet_config.get("model_channels", 2048) / 2048) * 0.95
|
||||
|
||||
def get_model(self, state_dict, prefix="", device=None):
|
||||
out = model_base.Anima(self, device=device)
|
||||
return out
|
||||
|
||||
def clip_target(self, state_dict={}):
|
||||
pref = self.text_encoder_key_prefix[0]
|
||||
detect = comfy.text_encoders.hunyuan_video.llama_detect(state_dict, "{}qwen3_06b.transformer.".format(pref))
|
||||
return supported_models_base.ClipTarget(comfy.text_encoders.anima.AnimaTokenizer, comfy.text_encoders.anima.te(**detect))
|
||||
|
||||
class CosmosI2VPredict2(CosmosT2IPredict2):
|
||||
unet_config = {
|
||||
"image_model": "cosmos_predict2",
|
||||
@ -1551,6 +1582,6 @@ class Kandinsky5Image(Kandinsky5):
|
||||
return supported_models_base.ClipTarget(comfy.text_encoders.kandinsky5.Kandinsky5TokenizerImage, comfy.text_encoders.kandinsky5.te(**hunyuan_detect))
|
||||
|
||||
|
||||
models = [LotusD, Stable_Zero123, SD15_instructpix2pix, SD15, SD20, SD21UnclipL, SD21UnclipH, SDXL_instructpix2pix, SDXLRefiner, SDXL, SSD1B, KOALA_700M, KOALA_1B, Segmind_Vega, SD_X4Upscaler, Stable_Cascade_C, Stable_Cascade_B, SV3D_u, SV3D_p, SD3, StableAudio, AuraFlow, PixArtAlpha, PixArtSigma, HunyuanDiT, HunyuanDiT1, FluxInpaint, Flux, FluxSchnell, GenmoMochi, LTXV, LTXAV, HunyuanVideo15_SR_Distilled, HunyuanVideo15, HunyuanImage21Refiner, HunyuanImage21, HunyuanVideoSkyreelsI2V, HunyuanVideoI2V, HunyuanVideo, CosmosT2V, CosmosI2V, CosmosT2IPredict2, CosmosI2VPredict2, ZImage, Lumina2, WAN22_T2V, WAN21_T2V, WAN21_I2V, WAN21_FunControl2V, WAN21_Vace, WAN21_Camera, WAN22_Camera, WAN22_S2V, WAN21_HuMo, WAN22_Animate, Hunyuan3Dv2mini, Hunyuan3Dv2, Hunyuan3Dv2_1, HiDream, Chroma, ChromaRadiance, ACEStep, Omnigen2, QwenImage, Flux2, Kandinsky5Image, Kandinsky5]
|
||||
models = [LotusD, Stable_Zero123, SD15_instructpix2pix, SD15, SD20, SD21UnclipL, SD21UnclipH, SDXL_instructpix2pix, SDXLRefiner, SDXL, SSD1B, KOALA_700M, KOALA_1B, Segmind_Vega, SD_X4Upscaler, Stable_Cascade_C, Stable_Cascade_B, SV3D_u, SV3D_p, SD3, StableAudio, AuraFlow, PixArtAlpha, PixArtSigma, HunyuanDiT, HunyuanDiT1, FluxInpaint, Flux, FluxSchnell, GenmoMochi, LTXV, LTXAV, HunyuanVideo15_SR_Distilled, HunyuanVideo15, HunyuanImage21Refiner, HunyuanImage21, HunyuanVideoSkyreelsI2V, HunyuanVideoI2V, HunyuanVideo, CosmosT2V, CosmosI2V, CosmosT2IPredict2, CosmosI2VPredict2, ZImage, Lumina2, WAN22_T2V, WAN21_T2V, WAN21_I2V, WAN21_FunControl2V, WAN21_Vace, WAN21_Camera, WAN22_Camera, WAN22_S2V, WAN21_HuMo, WAN22_Animate, Hunyuan3Dv2mini, Hunyuan3Dv2, Hunyuan3Dv2_1, HiDream, Chroma, ChromaRadiance, ACEStep, Omnigen2, QwenImage, Flux2, Kandinsky5Image, Kandinsky5, Anima]
|
||||
|
||||
models += [SVD_img2vid]
|
||||
|
||||
61
comfy/text_encoders/anima.py
Normal file
61
comfy/text_encoders/anima.py
Normal file
@ -0,0 +1,61 @@
|
||||
from transformers import Qwen2Tokenizer, T5TokenizerFast
|
||||
import comfy.text_encoders.llama
|
||||
from comfy import sd1_clip
|
||||
import os
|
||||
import torch
|
||||
|
||||
|
||||
class Qwen3Tokenizer(sd1_clip.SDTokenizer):
|
||||
def __init__(self, embedding_directory=None, tokenizer_data={}):
|
||||
tokenizer_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "qwen25_tokenizer")
|
||||
super().__init__(tokenizer_path, pad_with_end=False, embedding_size=1024, embedding_key='qwen3_06b', tokenizer_class=Qwen2Tokenizer, has_start_token=False, has_end_token=False, pad_to_max_length=False, max_length=99999999, min_length=1, pad_token=151643, tokenizer_data=tokenizer_data)
|
||||
|
||||
class T5XXLTokenizer(sd1_clip.SDTokenizer):
|
||||
def __init__(self, embedding_directory=None, tokenizer_data={}):
|
||||
tokenizer_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "t5_tokenizer")
|
||||
super().__init__(tokenizer_path, embedding_directory=embedding_directory, pad_with_end=False, embedding_size=4096, embedding_key='t5xxl', tokenizer_class=T5TokenizerFast, has_start_token=False, pad_to_max_length=False, max_length=99999999, min_length=1, tokenizer_data=tokenizer_data)
|
||||
|
||||
class AnimaTokenizer:
|
||||
def __init__(self, embedding_directory=None, tokenizer_data={}):
|
||||
self.qwen3_06b = Qwen3Tokenizer(embedding_directory=embedding_directory, tokenizer_data=tokenizer_data)
|
||||
self.t5xxl = T5XXLTokenizer(embedding_directory=embedding_directory, tokenizer_data=tokenizer_data)
|
||||
|
||||
def tokenize_with_weights(self, text:str, return_word_ids=False, **kwargs):
|
||||
out = {}
|
||||
qwen_ids = self.qwen3_06b.tokenize_with_weights(text, return_word_ids, **kwargs)
|
||||
out["qwen3_06b"] = [[(token, 1.0) for token, _ in inner_list] for inner_list in qwen_ids] # Set weights to 1.0
|
||||
out["t5xxl"] = self.t5xxl.tokenize_with_weights(text, return_word_ids, **kwargs)
|
||||
return out
|
||||
|
||||
def untokenize(self, token_weight_pair):
|
||||
return self.t5xxl.untokenize(token_weight_pair)
|
||||
|
||||
def state_dict(self):
|
||||
return {}
|
||||
|
||||
|
||||
class Qwen3_06BModel(sd1_clip.SDClipModel):
|
||||
def __init__(self, device="cpu", layer="last", layer_idx=None, dtype=None, attention_mask=True, model_options={}):
|
||||
super().__init__(device=device, layer=layer, layer_idx=layer_idx, textmodel_json_config={}, dtype=dtype, special_tokens={"pad": 151643}, layer_norm_hidden_state=False, model_class=comfy.text_encoders.llama.Qwen3_06B, enable_attention_masks=attention_mask, return_attention_masks=attention_mask, model_options=model_options)
|
||||
|
||||
|
||||
class AnimaTEModel(sd1_clip.SD1ClipModel):
|
||||
def __init__(self, device="cpu", dtype=None, model_options={}):
|
||||
super().__init__(device=device, dtype=dtype, name="qwen3_06b", clip_model=Qwen3_06BModel, model_options=model_options)
|
||||
|
||||
def encode_token_weights(self, token_weight_pairs):
|
||||
out = super().encode_token_weights(token_weight_pairs)
|
||||
out[2]["t5xxl_ids"] = torch.tensor(list(map(lambda a: a[0], token_weight_pairs["t5xxl"][0])), dtype=torch.int)
|
||||
out[2]["t5xxl_weights"] = torch.tensor(list(map(lambda a: a[1], token_weight_pairs["t5xxl"][0])))
|
||||
return out
|
||||
|
||||
def te(dtype_llama=None, llama_quantization_metadata=None):
|
||||
class AnimaTEModel_(AnimaTEModel):
|
||||
def __init__(self, device="cpu", dtype=None, model_options={}):
|
||||
if dtype_llama is not None:
|
||||
dtype = dtype_llama
|
||||
if llama_quantization_metadata is not None:
|
||||
model_options = model_options.copy()
|
||||
model_options["quantization_metadata"] = llama_quantization_metadata
|
||||
super().__init__(device=device, dtype=dtype, model_options=model_options)
|
||||
return AnimaTEModel_
|
||||
@ -57,6 +57,18 @@ else:
|
||||
logging.warning("Warning, you are using an old pytorch version and some ckpt/pt files might be loaded unsafely. Upgrading to 2.4 or above is recommended as older versions of pytorch are no longer supported.")
|
||||
|
||||
def load_torch_file(ckpt, safe_load=False, device=None, return_metadata=False):
|
||||
# Try GDS loading first if available and device is GPU
|
||||
if device is not None and device.type == 'cuda':
|
||||
try:
|
||||
from . import gds_loader
|
||||
gds_result = gds_loader.load_torch_file_gds(ckpt, safe_load=safe_load, device=device)
|
||||
if return_metadata:
|
||||
# For GDS, we return empty metadata for now (can be enhanced)
|
||||
return (gds_result, {})
|
||||
return gds_result
|
||||
except Exception as e:
|
||||
logging.debug(f"GDS loading failed, using fallback: {e}")
|
||||
|
||||
if device is None:
|
||||
device = torch.device("cpu")
|
||||
metadata = None
|
||||
|
||||
@ -1249,6 +1249,7 @@ class NodeInfoV1:
|
||||
experimental: bool=None
|
||||
api_node: bool=None
|
||||
price_badge: dict | None = None
|
||||
search_aliases: list[str]=None
|
||||
|
||||
@dataclass
|
||||
class NodeInfoV3:
|
||||
@ -1346,6 +1347,8 @@ class Schema:
|
||||
hidden: list[Hidden] = field(default_factory=list)
|
||||
description: str=""
|
||||
"""Node description, shown as a tooltip when hovering over the node."""
|
||||
search_aliases: list[str] = field(default_factory=list)
|
||||
"""Alternative names for search. Useful for synonyms, abbreviations, or old names after renaming."""
|
||||
is_input_list: bool = False
|
||||
"""A flag indicating if this node implements the additional code necessary to deal with OUTPUT_IS_LIST nodes.
|
||||
|
||||
@ -1483,6 +1486,7 @@ class Schema:
|
||||
api_node=self.is_api_node,
|
||||
python_module=getattr(cls, "RELATIVE_PYTHON_MODULE", "nodes"),
|
||||
price_badge=self.price_badge.as_dict(self.inputs) if self.price_badge is not None else None,
|
||||
search_aliases=self.search_aliases if self.search_aliases else None,
|
||||
)
|
||||
return info
|
||||
|
||||
|
||||
293
comfy_extras/nodes_gds.py
Normal file
293
comfy_extras/nodes_gds.py
Normal file
@ -0,0 +1,293 @@
|
||||
# copyright 2025 Maifee Ul Asad @ github.com/maifeeulasad
|
||||
# copyright under GNU GENERAL PUBLIC LICENSE, Version 3, 29 June 2007
|
||||
|
||||
"""
|
||||
Enhanced model loading nodes with GPUDirect Storage support
|
||||
"""
|
||||
|
||||
import logging
|
||||
import time
|
||||
import asyncio
|
||||
from typing import Optional, Dict, Any
|
||||
|
||||
import torch
|
||||
import folder_paths
|
||||
import comfy.sd
|
||||
import comfy.utils
|
||||
from comfy.comfy_types import IO, ComfyNodeABC, InputTypeDict
|
||||
|
||||
|
||||
class CheckpointLoaderGDS(ComfyNodeABC):
|
||||
"""
|
||||
Enhanced checkpoint loader with GPUDirect Storage support
|
||||
Provides direct SSD-to-GPU loading and prefetching capabilities
|
||||
"""
|
||||
|
||||
@classmethod
|
||||
def INPUT_TYPES(s) -> InputTypeDict:
|
||||
return {
|
||||
"required": {
|
||||
"ckpt_name": (folder_paths.get_filename_list("checkpoints"), {
|
||||
"tooltip": "The name of the checkpoint (model) to load with GDS optimization."
|
||||
}),
|
||||
},
|
||||
"optional": {
|
||||
"prefetch": ("BOOLEAN", {
|
||||
"default": False,
|
||||
"tooltip": "Prefetch model to GPU cache for faster loading."
|
||||
}),
|
||||
"use_gds": ("BOOLEAN", {
|
||||
"default": True,
|
||||
"tooltip": "Use GPUDirect Storage if available."
|
||||
}),
|
||||
"target_device": (["auto", "cuda:0", "cuda:1", "cuda:2", "cuda:3", "cpu"], {
|
||||
"default": "auto",
|
||||
"tooltip": "Target device for model loading."
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
RETURN_TYPES = ("MODEL", "CLIP", "VAE", "STRING")
|
||||
RETURN_NAMES = ("model", "clip", "vae", "load_info")
|
||||
OUTPUT_TOOLTIPS = (
|
||||
"The model used for denoising latents.",
|
||||
"The CLIP model used for encoding text prompts.",
|
||||
"The VAE model used for encoding and decoding images to and from latent space.",
|
||||
"Loading information and statistics."
|
||||
)
|
||||
FUNCTION = "load_checkpoint_gds"
|
||||
CATEGORY = "loaders/advanced"
|
||||
DESCRIPTION = "Enhanced checkpoint loader with GPUDirect Storage support for direct SSD-to-GPU loading."
|
||||
EXPERIMENTAL = True
|
||||
|
||||
def load_checkpoint_gds(self, ckpt_name: str, prefetch: bool = False, use_gds: bool = True, target_device: str = "auto"):
|
||||
start_time = time.time()
|
||||
|
||||
ckpt_path = folder_paths.get_full_path_or_raise("checkpoints", ckpt_name)
|
||||
|
||||
# Determine target device
|
||||
if target_device == "auto":
|
||||
device = None # Let the system decide
|
||||
elif target_device == "cpu":
|
||||
device = torch.device("cpu")
|
||||
else:
|
||||
device = torch.device(target_device)
|
||||
|
||||
load_info = {
|
||||
"file": ckpt_name,
|
||||
"path": ckpt_path,
|
||||
"target_device": str(device) if device else "auto",
|
||||
"gds_enabled": use_gds,
|
||||
"prefetch_used": prefetch
|
||||
}
|
||||
|
||||
try:
|
||||
# Prefetch if requested
|
||||
if prefetch and use_gds:
|
||||
try:
|
||||
from comfy.gds_loader import prefetch_model_gds
|
||||
prefetch_success = prefetch_model_gds(ckpt_path)
|
||||
load_info["prefetch_success"] = prefetch_success
|
||||
if prefetch_success:
|
||||
logging.info(f"Prefetched {ckpt_name} to GPU cache")
|
||||
except Exception as e:
|
||||
logging.warning(f"Prefetch failed for {ckpt_name}: {e}")
|
||||
load_info["prefetch_error"] = str(e)
|
||||
|
||||
# Load checkpoint with potential GDS optimization
|
||||
if use_gds and device and device.type == 'cuda':
|
||||
try:
|
||||
from comfy.gds_loader import get_gds_instance
|
||||
gds = get_gds_instance()
|
||||
|
||||
# Check if GDS should be used for this file
|
||||
if gds._should_use_gds(ckpt_path):
|
||||
load_info["loader_used"] = "GDS"
|
||||
logging.info(f"Loading {ckpt_name} with GDS")
|
||||
else:
|
||||
load_info["loader_used"] = "Standard"
|
||||
logging.info(f"Loading {ckpt_name} with standard method (file too small for GDS)")
|
||||
|
||||
except Exception as e:
|
||||
logging.warning(f"GDS check failed, using standard loading: {e}")
|
||||
load_info["loader_used"] = "Standard (GDS failed)"
|
||||
else:
|
||||
load_info["loader_used"] = "Standard"
|
||||
|
||||
# Load the actual checkpoint
|
||||
out = comfy.sd.load_checkpoint_guess_config(
|
||||
ckpt_path,
|
||||
output_vae=True,
|
||||
output_clip=True,
|
||||
embedding_directory=folder_paths.get_folder_paths("embeddings")
|
||||
)
|
||||
|
||||
load_time = time.time() - start_time
|
||||
load_info["load_time_seconds"] = round(load_time, 3)
|
||||
load_info["load_success"] = True
|
||||
|
||||
# Format load info as string
|
||||
info_str = f"Loaded: {ckpt_name}\n"
|
||||
info_str += f"Method: {load_info['loader_used']}\n"
|
||||
info_str += f"Time: {load_info['load_time_seconds']}s\n"
|
||||
info_str += f"Device: {load_info['target_device']}"
|
||||
|
||||
if "prefetch_success" in load_info:
|
||||
info_str += f"\nPrefetch: {'✓' if load_info['prefetch_success'] else '✗'}"
|
||||
|
||||
logging.info(f"Checkpoint loaded: {ckpt_name} in {load_time:.3f}s using {load_info['loader_used']}")
|
||||
|
||||
return (*out[:3], info_str)
|
||||
|
||||
except Exception as e:
|
||||
load_info["load_success"] = False
|
||||
load_info["error"] = str(e)
|
||||
error_str = f"Failed to load: {ckpt_name}\nError: {str(e)}"
|
||||
logging.error(f"Checkpoint loading failed: {e}")
|
||||
raise RuntimeError(error_str)
|
||||
|
||||
|
||||
class ModelPrefetcher(ComfyNodeABC):
|
||||
"""
|
||||
Node for prefetching models to GPU cache
|
||||
"""
|
||||
|
||||
@classmethod
|
||||
def INPUT_TYPES(s) -> InputTypeDict:
|
||||
return {
|
||||
"required": {
|
||||
"checkpoint_names": ("STRING", {
|
||||
"multiline": True,
|
||||
"default": "",
|
||||
"tooltip": "List of checkpoint names to prefetch (one per line)."
|
||||
}),
|
||||
"prefetch_enabled": ("BOOLEAN", {
|
||||
"default": True,
|
||||
"tooltip": "Enable/disable prefetching."
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
RETURN_TYPES = ("STRING",)
|
||||
RETURN_NAMES = ("prefetch_report",)
|
||||
OUTPUT_TOOLTIPS = ("Report of prefetch operations.",)
|
||||
FUNCTION = "prefetch_models"
|
||||
CATEGORY = "loaders/advanced"
|
||||
DESCRIPTION = "Prefetch multiple models to GPU cache for faster loading."
|
||||
OUTPUT_NODE = True
|
||||
|
||||
def prefetch_models(self, checkpoint_names: str, prefetch_enabled: bool = True):
|
||||
if not prefetch_enabled:
|
||||
return ("Prefetching disabled",)
|
||||
|
||||
# Parse checkpoint names
|
||||
names = [name.strip() for name in checkpoint_names.split('\n') if name.strip()]
|
||||
|
||||
if not names:
|
||||
return ("No checkpoints specified for prefetching",)
|
||||
|
||||
try:
|
||||
from comfy.gds_loader import prefetch_model_gds
|
||||
except ImportError:
|
||||
return ("GDS not available for prefetching",)
|
||||
|
||||
results = []
|
||||
successful_prefetches = 0
|
||||
|
||||
for name in names:
|
||||
try:
|
||||
ckpt_path = folder_paths.get_full_path_or_raise("checkpoints", name)
|
||||
success = prefetch_model_gds(ckpt_path)
|
||||
|
||||
if success:
|
||||
results.append(f"✓ {name}")
|
||||
successful_prefetches += 1
|
||||
else:
|
||||
results.append(f"✗ {name} (prefetch failed)")
|
||||
|
||||
except Exception as e:
|
||||
results.append(f"✗ {name} (error: {str(e)[:50]})")
|
||||
|
||||
report = f"Prefetch Report ({successful_prefetches}/{len(names)} successful):\n"
|
||||
report += "\n".join(results)
|
||||
|
||||
return (report,)
|
||||
|
||||
|
||||
class GDSStats(ComfyNodeABC):
|
||||
"""
|
||||
Node for displaying GDS statistics
|
||||
"""
|
||||
|
||||
@classmethod
|
||||
def INPUT_TYPES(s) -> InputTypeDict:
|
||||
return {
|
||||
"required": {
|
||||
"refresh": ("BOOLEAN", {
|
||||
"default": False,
|
||||
"tooltip": "Refresh statistics."
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
RETURN_TYPES = ("STRING",)
|
||||
RETURN_NAMES = ("stats_report",)
|
||||
OUTPUT_TOOLTIPS = ("GDS statistics and performance report.",)
|
||||
FUNCTION = "get_stats"
|
||||
CATEGORY = "utils/advanced"
|
||||
DESCRIPTION = "Display GPUDirect Storage statistics and performance metrics."
|
||||
OUTPUT_NODE = True
|
||||
|
||||
def get_stats(self, refresh: bool = False):
|
||||
try:
|
||||
from comfy.gds_loader import get_gds_stats
|
||||
stats = get_gds_stats()
|
||||
|
||||
report = "=== GPUDirect Storage Statistics ===\n\n"
|
||||
|
||||
# Availability
|
||||
report += f"GDS Available: {'✓' if stats['gds_available'] else '✗'}\n"
|
||||
|
||||
# Usage statistics
|
||||
report += f"Total Loads: {stats['total_loads']}\n"
|
||||
report += f"GDS Loads: {stats['gds_loads']} ({stats['gds_usage_percent']:.1f}%)\n"
|
||||
report += f"Fallback Loads: {stats['fallback_loads']}\n\n"
|
||||
|
||||
# Performance metrics
|
||||
if stats['total_bytes_gds'] > 0:
|
||||
gb_transferred = stats['total_bytes_gds'] / (1024**3)
|
||||
report += f"Data Transferred: {gb_transferred:.2f} GB\n"
|
||||
report += f"Average Bandwidth: {stats['avg_bandwidth_gbps']:.2f} GB/s\n"
|
||||
report += f"Total GDS Time: {stats['total_time_gds']:.2f}s\n\n"
|
||||
|
||||
# Configuration
|
||||
config = stats.get('config', {})
|
||||
if config:
|
||||
report += "Configuration:\n"
|
||||
report += f"- Enabled: {config.get('enabled', 'Unknown')}\n"
|
||||
report += f"- Min File Size: {config.get('min_file_size_mb', 'Unknown')} MB\n"
|
||||
report += f"- Chunk Size: {config.get('chunk_size_mb', 'Unknown')} MB\n"
|
||||
report += f"- Max Streams: {config.get('max_concurrent_streams', 'Unknown')}\n"
|
||||
report += f"- Prefetch: {config.get('prefetch_enabled', 'Unknown')}\n"
|
||||
report += f"- Fallback: {config.get('fallback_to_cpu', 'Unknown')}\n"
|
||||
|
||||
return (report,)
|
||||
|
||||
except ImportError:
|
||||
return ("GDS module not available",)
|
||||
except Exception as e:
|
||||
return (f"Error retrieving GDS stats: {str(e)}",)
|
||||
|
||||
|
||||
# Node mappings
|
||||
NODE_CLASS_MAPPINGS = {
|
||||
"CheckpointLoaderGDS": CheckpointLoaderGDS,
|
||||
"ModelPrefetcher": ModelPrefetcher,
|
||||
"GDSStats": GDSStats,
|
||||
}
|
||||
|
||||
NODE_DISPLAY_NAME_MAPPINGS = {
|
||||
"CheckpointLoaderGDS": "Load Checkpoint (GDS)",
|
||||
"ModelPrefetcher": "Model Prefetcher",
|
||||
"GDSStats": "GDS Statistics",
|
||||
}
|
||||
@ -550,6 +550,7 @@ class BatchImagesNode(io.ComfyNode):
|
||||
node_id="BatchImagesNode",
|
||||
display_name="Batch Images",
|
||||
category="image",
|
||||
search_aliases=["batch", "image batch", "batch images", "combine images", "merge images", "stack images"],
|
||||
inputs=[
|
||||
io.Autogrow.Input("images", template=autogrow_template)
|
||||
],
|
||||
|
||||
@ -16,6 +16,7 @@ class PreviewAny():
|
||||
OUTPUT_NODE = True
|
||||
|
||||
CATEGORY = "utils"
|
||||
SEARCH_ALIASES = ["preview", "show", "display", "view", "show text", "display text", "preview text", "show output", "inspect", "debug"]
|
||||
|
||||
def main(self, source=None):
|
||||
value = 'None'
|
||||
|
||||
@ -11,6 +11,7 @@ class StringConcatenate(io.ComfyNode):
|
||||
node_id="StringConcatenate",
|
||||
display_name="Concatenate",
|
||||
category="utils/string",
|
||||
search_aliases=["text concat", "join text", "merge text", "combine strings", "concat", "concatenate", "append text", "combine text", "string"],
|
||||
inputs=[
|
||||
io.String.Input("string_a", multiline=True),
|
||||
io.String.Input("string_b", multiline=True),
|
||||
|
||||
@ -53,6 +53,7 @@ class ImageUpscaleWithModel(io.ComfyNode):
|
||||
node_id="ImageUpscaleWithModel",
|
||||
display_name="Upscale Image (using Model)",
|
||||
category="image/upscaling",
|
||||
search_aliases=["upscale", "upscaler", "upsc", "enlarge image", "super resolution", "hires", "superres", "increase resolution"],
|
||||
inputs=[
|
||||
io.UpscaleModel.Input("upscale_model"),
|
||||
io.Image.Input("image"),
|
||||
|
||||
29
main.py
29
main.py
@ -184,6 +184,35 @@ import comfyui_version
|
||||
import app.logger
|
||||
import hook_breaker_ac10a0
|
||||
|
||||
# Initialize GPUDirect Storage if enabled
|
||||
def init_gds():
|
||||
"""Initialize GPUDirect Storage based on CLI arguments"""
|
||||
if hasattr(args, 'disable_gds') and args.disable_gds:
|
||||
logging.info("GDS explicitly disabled via --disable-gds")
|
||||
return
|
||||
|
||||
if not hasattr(args, 'enable_gds') and not hasattr(args, 'gds_prefetch') and not hasattr(args, 'gds_stats'):
|
||||
# GDS not explicitly requested, use auto-detection
|
||||
return
|
||||
|
||||
if hasattr(args, 'enable_gds') and args.enable_gds:
|
||||
from comfy.gds_loader import GDSConfig, init_gds as gds_init
|
||||
|
||||
config = GDSConfig(
|
||||
enabled=getattr(args, 'enable_gds', False) or getattr(args, 'gds_prefetch', False),
|
||||
min_file_size_mb=getattr(args, 'gds_min_file_size', 100),
|
||||
chunk_size_mb=getattr(args, 'gds_chunk_size', 64),
|
||||
max_concurrent_streams=getattr(args, 'gds_streams', 4),
|
||||
prefetch_enabled=getattr(args, 'gds_prefetch', True),
|
||||
fallback_to_cpu=not getattr(args, 'gds_no_fallback', False),
|
||||
show_stats=getattr(args, 'gds_stats', False)
|
||||
)
|
||||
|
||||
gds_init(config)
|
||||
|
||||
# Initialize GDS
|
||||
init_gds()
|
||||
|
||||
def cuda_malloc_warning():
|
||||
device = comfy.model_management.get_torch_device()
|
||||
device_name = comfy.model_management.get_torch_device_name(device)
|
||||
|
||||
16
nodes.py
16
nodes.py
@ -70,6 +70,7 @@ class CLIPTextEncode(ComfyNodeABC):
|
||||
|
||||
CATEGORY = "conditioning"
|
||||
DESCRIPTION = "Encodes a text prompt using a CLIP model into an embedding that can be used to guide the diffusion model towards generating specific images."
|
||||
SEARCH_ALIASES = ["text", "prompt", "text prompt", "positive prompt", "negative prompt", "encode text", "text encoder", "encode prompt"]
|
||||
|
||||
def encode(self, clip, text):
|
||||
if clip is None:
|
||||
@ -86,6 +87,7 @@ class ConditioningCombine:
|
||||
FUNCTION = "combine"
|
||||
|
||||
CATEGORY = "conditioning"
|
||||
SEARCH_ALIASES = ["combine", "merge conditioning", "combine prompts", "merge prompts", "mix prompts", "add prompt"]
|
||||
|
||||
def combine(self, conditioning_1, conditioning_2):
|
||||
return (conditioning_1 + conditioning_2, )
|
||||
@ -294,6 +296,7 @@ class VAEDecode:
|
||||
|
||||
CATEGORY = "latent"
|
||||
DESCRIPTION = "Decodes latent images back into pixel space images."
|
||||
SEARCH_ALIASES = ["decode", "decode latent", "latent to image", "render latent"]
|
||||
|
||||
def decode(self, vae, samples):
|
||||
latent = samples["samples"]
|
||||
@ -346,6 +349,7 @@ class VAEEncode:
|
||||
FUNCTION = "encode"
|
||||
|
||||
CATEGORY = "latent"
|
||||
SEARCH_ALIASES = ["encode", "encode image", "image to latent"]
|
||||
|
||||
def encode(self, vae, pixels):
|
||||
t = vae.encode(pixels)
|
||||
@ -581,6 +585,7 @@ class CheckpointLoaderSimple:
|
||||
|
||||
CATEGORY = "loaders"
|
||||
DESCRIPTION = "Loads a diffusion model checkpoint, diffusion models are used to denoise latents."
|
||||
SEARCH_ALIASES = ["load model", "checkpoint", "model loader", "load checkpoint", "ckpt", "model"]
|
||||
|
||||
def load_checkpoint(self, ckpt_name):
|
||||
ckpt_path = folder_paths.get_full_path_or_raise("checkpoints", ckpt_name)
|
||||
@ -667,6 +672,7 @@ class LoraLoader:
|
||||
|
||||
CATEGORY = "loaders"
|
||||
DESCRIPTION = "LoRAs are used to modify diffusion and CLIP models, altering the way in which latents are denoised such as applying styles. Multiple LoRA nodes can be linked together."
|
||||
SEARCH_ALIASES = ["lora", "load lora", "apply lora", "lora loader", "lora model"]
|
||||
|
||||
def load_lora(self, model, clip, lora_name, strength_model, strength_clip):
|
||||
if strength_model == 0 and strength_clip == 0:
|
||||
@ -814,6 +820,7 @@ class ControlNetLoader:
|
||||
FUNCTION = "load_controlnet"
|
||||
|
||||
CATEGORY = "loaders"
|
||||
SEARCH_ALIASES = ["controlnet", "control net", "cn", "load controlnet", "controlnet loader"]
|
||||
|
||||
def load_controlnet(self, control_net_name):
|
||||
controlnet_path = folder_paths.get_full_path_or_raise("controlnet", control_net_name)
|
||||
@ -890,6 +897,7 @@ class ControlNetApplyAdvanced:
|
||||
FUNCTION = "apply_controlnet"
|
||||
|
||||
CATEGORY = "conditioning/controlnet"
|
||||
SEARCH_ALIASES = ["controlnet", "apply controlnet", "use controlnet", "control net"]
|
||||
|
||||
def apply_controlnet(self, positive, negative, control_net, image, strength, start_percent, end_percent, vae=None, extra_concat=[]):
|
||||
if strength == 0:
|
||||
@ -1200,6 +1208,7 @@ class EmptyLatentImage:
|
||||
|
||||
CATEGORY = "latent"
|
||||
DESCRIPTION = "Create a new batch of empty latent images to be denoised via sampling."
|
||||
SEARCH_ALIASES = ["empty", "empty latent", "new latent", "create latent", "blank latent", "blank"]
|
||||
|
||||
def generate(self, width, height, batch_size=1):
|
||||
latent = torch.zeros([batch_size, 4, height // 8, width // 8], device=self.device)
|
||||
@ -1540,6 +1549,7 @@ class KSampler:
|
||||
|
||||
CATEGORY = "sampling"
|
||||
DESCRIPTION = "Uses the provided model, positive and negative conditioning to denoise the latent image."
|
||||
SEARCH_ALIASES = ["sampler", "sample", "generate", "denoise", "diffuse", "txt2img", "img2img"]
|
||||
|
||||
def sample(self, model, seed, steps, cfg, sampler_name, scheduler, positive, negative, latent_image, denoise=1.0):
|
||||
return common_ksampler(model, seed, steps, cfg, sampler_name, scheduler, positive, negative, latent_image, denoise=denoise)
|
||||
@ -1604,6 +1614,7 @@ class SaveImage:
|
||||
|
||||
CATEGORY = "image"
|
||||
DESCRIPTION = "Saves the input images to your ComfyUI output directory."
|
||||
SEARCH_ALIASES = ["save", "save image", "export image", "output image", "write image", "download"]
|
||||
|
||||
def save_images(self, images, filename_prefix="ComfyUI", prompt=None, extra_pnginfo=None):
|
||||
filename_prefix += self.prefix_append
|
||||
@ -1640,6 +1651,8 @@ class PreviewImage(SaveImage):
|
||||
self.prefix_append = "_temp_" + ''.join(random.choice("abcdefghijklmnopqrstupvxyz") for x in range(5))
|
||||
self.compress_level = 1
|
||||
|
||||
SEARCH_ALIASES = ["preview", "preview image", "show image", "view image", "display image", "image viewer"]
|
||||
|
||||
@classmethod
|
||||
def INPUT_TYPES(s):
|
||||
return {"required":
|
||||
@ -1658,6 +1671,7 @@ class LoadImage:
|
||||
}
|
||||
|
||||
CATEGORY = "image"
|
||||
SEARCH_ALIASES = ["load image", "open image", "import image", "image input", "upload image", "read image", "image loader"]
|
||||
|
||||
RETURN_TYPES = ("IMAGE", "MASK")
|
||||
FUNCTION = "load_image"
|
||||
@ -1810,6 +1824,7 @@ class ImageScale:
|
||||
FUNCTION = "upscale"
|
||||
|
||||
CATEGORY = "image/upscaling"
|
||||
SEARCH_ALIASES = ["resize", "resize image", "scale image", "image resize", "zoom", "zoom in", "change size"]
|
||||
|
||||
def upscale(self, image, upscale_method, width, height, crop):
|
||||
if width == 0 and height == 0:
|
||||
@ -2367,6 +2382,7 @@ async def init_builtin_extra_nodes():
|
||||
"nodes_model_patch.py",
|
||||
"nodes_easycache.py",
|
||||
"nodes_audio_encoder.py",
|
||||
"nodes_gds.py",
|
||||
"nodes_rope.py",
|
||||
"nodes_logic.py",
|
||||
"nodes_nop.py",
|
||||
|
||||
@ -27,4 +27,4 @@ comfy-kitchen>=0.2.7
|
||||
kornia>=0.7.1
|
||||
spandrel
|
||||
pydantic~=2.0
|
||||
pydantic-settings~=2.0
|
||||
pydantic-settings~=2.0
|
||||
Loading…
Reference in New Issue
Block a user