mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2026-06-13 01:30:32 +08:00
Compare commits
2 Commits
e860732dba
...
61235fc35a
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
61235fc35a | ||
|
|
93510fde17 |
@ -337,7 +337,13 @@ def attention_split(q, k, v, heads, mask=None, attn_precision=None, skip_reshape
|
||||
|
||||
|
||||
if mem_required > mem_free_total:
|
||||
steps = 2**(math.ceil(math.log(mem_required / mem_free_total, 2)))
|
||||
if mem_free_total <= 0:
|
||||
# Backend (e.g. DirectML) cannot report free VRAM — use max split as a safe fallback.
|
||||
# 64 slices keeps individual tile memory tiny regardless of resolution.
|
||||
# See: github.com/comfyanonymous/ComfyUI/issues/1518
|
||||
steps = 64
|
||||
else:
|
||||
steps = 2**(math.ceil(math.log(mem_required / mem_free_total, 2)))
|
||||
# print(f"Expected tensor size:{tensor_size/gb:0.1f}GB, cuda free:{mem_free_cuda/gb:0.1f}GB "
|
||||
# f"torch free:{mem_free_torch/gb:0.1f} total:{mem_free_total/gb:0.1f} steps:{steps}")
|
||||
|
||||
|
||||
@ -243,7 +243,12 @@ def slice_attention(q, k, v):
|
||||
steps = 1
|
||||
|
||||
if mem_required > mem_free_total:
|
||||
steps = 2**(math.ceil(math.log(mem_required / mem_free_total, 2)))
|
||||
if mem_free_total <= 0:
|
||||
# Backend (e.g. DirectML) cannot report free VRAM — use max split as safe fallback.
|
||||
# See: github.com/comfyanonymous/ComfyUI/issues/1518
|
||||
steps = 64
|
||||
else:
|
||||
steps = 2**(math.ceil(math.log(mem_required / mem_free_total, 2)))
|
||||
|
||||
while True:
|
||||
try:
|
||||
|
||||
@ -544,6 +544,11 @@ def module_mmap_residency(module, free=False):
|
||||
for k in sd:
|
||||
t = sd[k]
|
||||
module_mem += t.nbytes
|
||||
# DirectML tensors (device.type == 'privateuseone') are backed by OpaqueTensorImpl
|
||||
# and do not expose host storage. Mmap tracking is meaningless for GPU-side tensors;
|
||||
# skip entirely. See: github.com/Comfy-Org/ComfyUI/issues/8347
|
||||
if hasattr(t, 'device') and t.device.type == 'privateuseone':
|
||||
continue
|
||||
storage = t._qdata.untyped_storage() if isinstance(t, comfy.quant_ops.QuantizedTensor) else t.untyped_storage()
|
||||
if not getattr(storage, "_comfy_tensor_mmap_touched", False):
|
||||
continue
|
||||
@ -1328,6 +1333,11 @@ def cast_to_gathered(tensors, r, non_blocking=False, stream=None):
|
||||
continue
|
||||
if comfy.memory_management.read_tensor_file_slice_into(tensor, dest_view):
|
||||
continue
|
||||
# DirectML tensors are OpaqueTensorImpl — no host storage to mark.
|
||||
# Skip mmap tracking and perform the copy directly.
|
||||
if hasattr(tensor, 'device') and tensor.device.type == 'privateuseone':
|
||||
dest_view.copy_(tensor, non_blocking=non_blocking)
|
||||
continue
|
||||
storage = tensor._qdata.untyped_storage() if isinstance(tensor, comfy.quant_ops.QuantizedTensor) else tensor.untyped_storage()
|
||||
if hasattr(storage, "_comfy_tensor_mmap_touched"):
|
||||
storage._comfy_tensor_mmap_touched = True
|
||||
|
||||
Loading…
Reference in New Issue
Block a user