diff --git a/comfy/ldm/modules/attention.py b/comfy/ldm/modules/attention.py index 37b2a8ee3..172dd18b4 100644 --- a/comfy/ldm/modules/attention.py +++ b/comfy/ldm/modules/attention.py @@ -336,12 +336,14 @@ def attention_split(q, k, v, heads, mask=None, attn_precision=None, skip_reshape steps = 1 - if mem_free_total <= 0: - # DirectML doesn't expose free VRAM — assume 4GB free as a safe fallback for 6GB cards - mem_free_total = 4 * (1024 ** 3) - if mem_required > mem_free_total: - steps = 2**(math.ceil(math.log(mem_required / mem_free_total, 2))) + if mem_free_total <= 0: + # Backend (e.g. DirectML) cannot report free VRAM — use max split as a safe fallback. + # 64 slices keeps individual tile memory tiny regardless of resolution. + # See: github.com/comfyanonymous/ComfyUI/issues/1518 + steps = 64 + else: + steps = 2**(math.ceil(math.log(mem_required / mem_free_total, 2))) # print(f"Expected tensor size:{tensor_size/gb:0.1f}GB, cuda free:{mem_free_cuda/gb:0.1f}GB " # f"torch free:{mem_free_torch/gb:0.1f} total:{mem_free_total/gb:0.1f} steps:{steps}") diff --git a/comfy/ldm/modules/diffusionmodules/model.py b/comfy/ldm/modules/diffusionmodules/model.py index fcbaa074f..3aa8db1a4 100644 --- a/comfy/ldm/modules/diffusionmodules/model.py +++ b/comfy/ldm/modules/diffusionmodules/model.py @@ -243,7 +243,12 @@ def slice_attention(q, k, v): steps = 1 if mem_required > mem_free_total: - steps = 2**(math.ceil(math.log(mem_required / mem_free_total, 2))) + if mem_free_total <= 0: + # Backend (e.g. DirectML) cannot report free VRAM — use max split as safe fallback. + # See: github.com/comfyanonymous/ComfyUI/issues/1518 + steps = 64 + else: + steps = 2**(math.ceil(math.log(mem_required / mem_free_total, 2))) while True: try: diff --git a/comfy/model_management.py b/comfy/model_management.py index a14627842..6ae363897 100644 --- a/comfy/model_management.py +++ b/comfy/model_management.py @@ -544,11 +544,12 @@ def module_mmap_residency(module, free=False): for k in sd: t = sd[k] module_mem += t.nbytes - try: - storage = t._qdata.untyped_storage() if isinstance(t, comfy.quant_ops.QuantizedTensor) else t.untyped_storage() - except NotImplementedError: - # DirectML (AMD) tensors are opaque — no host storage to inspect; skip mmap tracking + # DirectML tensors (device.type == 'privateuseone') are backed by OpaqueTensorImpl + # and do not expose host storage. Mmap tracking is meaningless for GPU-side tensors; + # skip entirely. See: github.com/Comfy-Org/ComfyUI/issues/8347 + if hasattr(t, 'device') and t.device.type == 'privateuseone': continue + storage = t._qdata.untyped_storage() if isinstance(t, comfy.quant_ops.QuantizedTensor) else t.untyped_storage() if not getattr(storage, "_comfy_tensor_mmap_touched", False): continue mmap_touched_mem += t.nbytes @@ -1332,12 +1333,12 @@ def cast_to_gathered(tensors, r, non_blocking=False, stream=None): continue if comfy.memory_management.read_tensor_file_slice_into(tensor, dest_view): continue - try: - storage = tensor._qdata.untyped_storage() if isinstance(tensor, comfy.quant_ops.QuantizedTensor) else tensor.untyped_storage() - except NotImplementedError: - # DirectML tensors are opaque — skip mmap marking, just copy + # DirectML tensors are OpaqueTensorImpl — no host storage to mark. + # Skip mmap tracking and perform the copy directly. + if hasattr(tensor, 'device') and tensor.device.type == 'privateuseone': dest_view.copy_(tensor, non_blocking=non_blocking) continue + storage = tensor._qdata.untyped_storage() if isinstance(tensor, comfy.quant_ops.QuantizedTensor) else tensor.untyped_storage() if hasattr(storage, "_comfy_tensor_mmap_touched"): storage._comfy_tensor_mmap_touched = True dest_view.copy_(tensor, non_blocking=non_blocking)