mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2026-05-15 19:47:24 +08:00
Merge 61235fc35a into b112f68681
This commit is contained in:
commit
0fe12f69ed
@ -4,7 +4,10 @@ import comfy.model_management
|
||||
import comfy.ops
|
||||
import comfy.utils
|
||||
import logging
|
||||
import torchaudio
|
||||
try:
|
||||
import torchaudio
|
||||
except (ImportError, OSError):
|
||||
torchaudio = None
|
||||
|
||||
|
||||
class AudioEncoderModel():
|
||||
|
||||
@ -1,7 +1,10 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
import torchaudio
|
||||
try:
|
||||
import torchaudio
|
||||
except (ImportError, OSError):
|
||||
torchaudio = None
|
||||
from typing import Optional
|
||||
from comfy.ldm.modules.attention import optimized_attention_masked
|
||||
import comfy.ops
|
||||
|
||||
@ -2,7 +2,10 @@ import json
|
||||
from dataclasses import dataclass
|
||||
import math
|
||||
import torch
|
||||
import torchaudio
|
||||
try:
|
||||
import torchaudio
|
||||
except (ImportError, OSError):
|
||||
torchaudio = None
|
||||
|
||||
from comfy.ldm.mmaudio.vae.distributions import DiagonalGaussianDistribution
|
||||
from comfy.ldm.lightricks.symmetric_patchifier import AudioPatchifier
|
||||
|
||||
@ -337,7 +337,13 @@ def attention_split(q, k, v, heads, mask=None, attn_precision=None, skip_reshape
|
||||
|
||||
|
||||
if mem_required > mem_free_total:
|
||||
steps = 2**(math.ceil(math.log(mem_required / mem_free_total, 2)))
|
||||
if mem_free_total <= 0:
|
||||
# Backend (e.g. DirectML) cannot report free VRAM — use max split as a safe fallback.
|
||||
# 64 slices keeps individual tile memory tiny regardless of resolution.
|
||||
# See: github.com/comfyanonymous/ComfyUI/issues/1518
|
||||
steps = 64
|
||||
else:
|
||||
steps = 2**(math.ceil(math.log(mem_required / mem_free_total, 2)))
|
||||
# print(f"Expected tensor size:{tensor_size/gb:0.1f}GB, cuda free:{mem_free_cuda/gb:0.1f}GB "
|
||||
# f"torch free:{mem_free_torch/gb:0.1f} total:{mem_free_total/gb:0.1f} steps:{steps}")
|
||||
|
||||
|
||||
@ -243,7 +243,12 @@ def slice_attention(q, k, v):
|
||||
steps = 1
|
||||
|
||||
if mem_required > mem_free_total:
|
||||
steps = 2**(math.ceil(math.log(mem_required / mem_free_total, 2)))
|
||||
if mem_free_total <= 0:
|
||||
# Backend (e.g. DirectML) cannot report free VRAM — use max split as safe fallback.
|
||||
# See: github.com/comfyanonymous/ComfyUI/issues/1518
|
||||
steps = 64
|
||||
else:
|
||||
steps = 2**(math.ceil(math.log(mem_required / mem_free_total, 2)))
|
||||
|
||||
while True:
|
||||
try:
|
||||
|
||||
@ -101,7 +101,7 @@ if args.deterministic:
|
||||
|
||||
directml_enabled = False
|
||||
if args.directml is not None:
|
||||
logging.warning("WARNING: torch-directml barely works, is very slow, has not been updated in over 1 year and might be removed soon, please don't use it, there are better options.")
|
||||
logging.info("DirectML backend active (AMD/Intel GPU on Windows, no CUDA/ROCm required).")
|
||||
import torch_directml
|
||||
directml_enabled = True
|
||||
device_index = args.directml
|
||||
@ -213,7 +213,40 @@ def get_total_memory(dev=None, torch_total_too=False):
|
||||
mem_total_torch = mem_total
|
||||
else:
|
||||
if directml_enabled:
|
||||
mem_total = 1024 * 1024 * 1024 #TODO
|
||||
# Query real VRAM from Windows registry (qwMemorySize is 64-bit, AdapterRAM caps at 4GB)
|
||||
# Falls back to COMFYUI_DIRECTML_VRAM_MB env var, then 6GB default
|
||||
_dml_vram = 0
|
||||
try:
|
||||
_override = os.environ.get("COMFYUI_DIRECTML_VRAM_MB")
|
||||
if _override:
|
||||
_dml_vram = int(_override) * 1024 * 1024
|
||||
except Exception:
|
||||
pass
|
||||
if _dml_vram <= 0:
|
||||
try:
|
||||
import winreg as _winreg
|
||||
_base = r"SYSTEM\CurrentControlSet\Control\Class\{4d36e968-e325-11ce-bfc1-08002be10318}"
|
||||
with _winreg.OpenKey(_winreg.HKEY_LOCAL_MACHINE, _base) as _hbase:
|
||||
_i = 0
|
||||
while True:
|
||||
try:
|
||||
_sub = _winreg.EnumKey(_hbase, _i)
|
||||
_i += 1
|
||||
try:
|
||||
with _winreg.OpenKey(_hbase, _sub) as _hdev:
|
||||
_mem, _ = _winreg.QueryValueEx(_hdev, "HardwareInformation.qwMemorySize")
|
||||
if isinstance(_mem, int) and _mem > 128 * 1024 * 1024:
|
||||
_dml_vram = _mem
|
||||
break
|
||||
except Exception:
|
||||
pass
|
||||
except OSError:
|
||||
break
|
||||
except Exception:
|
||||
pass
|
||||
if _dml_vram <= 0:
|
||||
_dml_vram = 6 * 1024 * 1024 * 1024 # 6GB safe default for modern AMD cards
|
||||
mem_total = _dml_vram
|
||||
mem_total_torch = mem_total
|
||||
elif is_intel_xpu():
|
||||
stats = torch.xpu.memory_stats(dev)
|
||||
@ -511,6 +544,11 @@ def module_mmap_residency(module, free=False):
|
||||
for k in sd:
|
||||
t = sd[k]
|
||||
module_mem += t.nbytes
|
||||
# DirectML tensors (device.type == 'privateuseone') are backed by OpaqueTensorImpl
|
||||
# and do not expose host storage. Mmap tracking is meaningless for GPU-side tensors;
|
||||
# skip entirely. See: github.com/Comfy-Org/ComfyUI/issues/8347
|
||||
if hasattr(t, 'device') and t.device.type == 'privateuseone':
|
||||
continue
|
||||
storage = t._qdata.untyped_storage() if isinstance(t, comfy.quant_ops.QuantizedTensor) else t.untyped_storage()
|
||||
if not getattr(storage, "_comfy_tensor_mmap_touched", False):
|
||||
continue
|
||||
@ -1295,6 +1333,11 @@ def cast_to_gathered(tensors, r, non_blocking=False, stream=None):
|
||||
continue
|
||||
if comfy.memory_management.read_tensor_file_slice_into(tensor, dest_view):
|
||||
continue
|
||||
# DirectML tensors are OpaqueTensorImpl — no host storage to mark.
|
||||
# Skip mmap tracking and perform the copy directly.
|
||||
if hasattr(tensor, 'device') and tensor.device.type == 'privateuseone':
|
||||
dest_view.copy_(tensor, non_blocking=non_blocking)
|
||||
continue
|
||||
storage = tensor._qdata.untyped_storage() if isinstance(tensor, comfy.quant_ops.QuantizedTensor) else tensor.untyped_storage()
|
||||
if hasattr(storage, "_comfy_tensor_mmap_touched"):
|
||||
storage._comfy_tensor_mmap_touched = True
|
||||
@ -1504,7 +1547,16 @@ def get_free_memory(dev=None, torch_free_too=False):
|
||||
mem_free_torch = mem_free_total
|
||||
else:
|
||||
if directml_enabled:
|
||||
mem_free_total = 1024 * 1024 * 1024 #TODO
|
||||
# gpu_memory(0) returns a list of per-tile usage fractions [0.0–1.0]
|
||||
# total_vram (module-level) is the registry-queried real VRAM in MB
|
||||
try:
|
||||
import torch_directml as _tdml
|
||||
_usage_fracs = _tdml.gpu_memory(0)
|
||||
_usage_pct = max(_usage_fracs) if _usage_fracs else 0.0
|
||||
_total = int(total_vram * 1024 * 1024)
|
||||
mem_free_total = max(0, int(_total * (1.0 - _usage_pct)))
|
||||
except Exception:
|
||||
mem_free_total = int(total_vram * 1024 * 1024)
|
||||
mem_free_torch = mem_free_total
|
||||
elif is_intel_xpu():
|
||||
stats = torch.xpu.memory_stats(dev)
|
||||
|
||||
@ -1,7 +1,10 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import av
|
||||
import torchaudio
|
||||
try:
|
||||
import torchaudio
|
||||
except (ImportError, OSError):
|
||||
torchaudio = None
|
||||
import torch
|
||||
import comfy.model_management
|
||||
import folder_paths
|
||||
|
||||
@ -1,7 +1,10 @@
|
||||
import nodes
|
||||
import node_helpers
|
||||
import torch
|
||||
import torchaudio
|
||||
try:
|
||||
import torchaudio
|
||||
except (ImportError, OSError):
|
||||
torchaudio = None
|
||||
import comfy.model_management
|
||||
import comfy.model_sampling
|
||||
import comfy.samplers
|
||||
|
||||
@ -2,7 +2,10 @@ import math
|
||||
import nodes
|
||||
import node_helpers
|
||||
import torch
|
||||
import torchaudio
|
||||
try:
|
||||
import torchaudio
|
||||
except (ImportError, OSError):
|
||||
torchaudio = None
|
||||
import comfy.model_management
|
||||
import comfy.utils
|
||||
import numpy as np
|
||||
|
||||
Loading…
Reference in New Issue
Block a user