2026-06-16 04:49:35 +08:00
3 changed files with 2 additions and 23 deletions
--- a/comfy/ldm/modules/attention.py
+++ b/comfy/ldm/modules/attention.py
@ -337,13 +337,7 @@ def attention_split(q, k, v, heads, mask=None, attn_precision=None, skip_reshape


    if mem_required > mem_free_total:
-        if mem_free_total <= 0:
-            # Backend (e.g. DirectML) cannot report free VRAM — use max split as a safe fallback.
-            # 64 slices keeps individual tile memory tiny regardless of resolution.
-            # See: github.com/comfyanonymous/ComfyUI/issues/1518
-            steps = 64
-        else:
-            steps = 2**(math.ceil(math.log(mem_required / mem_free_total, 2)))
+        steps = 2**(math.ceil(math.log(mem_required / mem_free_total, 2)))
        # print(f"Expected tensor size:{tensor_size/gb:0.1f}GB, cuda free:{mem_free_cuda/gb:0.1f}GB "
        #      f"torch free:{mem_free_torch/gb:0.1f} total:{mem_free_total/gb:0.1f} steps:{steps}")

--- a/comfy/ldm/modules/diffusionmodules/model.py
+++ b/comfy/ldm/modules/diffusionmodules/model.py
@ -243,12 +243,7 @@ def slice_attention(q, k, v):
    steps = 1

    if mem_required > mem_free_total:
-        if mem_free_total <= 0:
-            # Backend (e.g. DirectML) cannot report free VRAM — use max split as safe fallback.
-            # See: github.com/comfyanonymous/ComfyUI/issues/1518
-            steps = 64
-        else:
-            steps = 2**(math.ceil(math.log(mem_required / mem_free_total, 2)))
+        steps = 2**(math.ceil(math.log(mem_required / mem_free_total, 2)))

    while True:
        try:
--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@ -544,11 +544,6 @@ def module_mmap_residency(module, free=False):
    for k in sd:
        t = sd[k]
        module_mem += t.nbytes
-        # DirectML tensors (device.type == 'privateuseone') are backed by OpaqueTensorImpl
-        # and do not expose host storage. Mmap tracking is meaningless for GPU-side tensors;
-        # skip entirely. See: github.com/Comfy-Org/ComfyUI/issues/8347
-        if hasattr(t, 'device') and t.device.type == 'privateuseone':
-            continue
        storage = t._qdata.untyped_storage() if isinstance(t, comfy.quant_ops.QuantizedTensor) else t.untyped_storage()
        if not getattr(storage, "_comfy_tensor_mmap_touched", False):
            continue
@ -1333,11 +1328,6 @@ def cast_to_gathered(tensors, r, non_blocking=False, stream=None):
                continue
            if comfy.memory_management.read_tensor_file_slice_into(tensor, dest_view):
                continue
-            # DirectML tensors are OpaqueTensorImpl — no host storage to mark.
-            # Skip mmap tracking and perform the copy directly.
-            if hasattr(tensor, 'device') and tensor.device.type == 'privateuseone':
-                dest_view.copy_(tensor, non_blocking=non_blocking)
-                continue
            storage = tensor._qdata.untyped_storage() if isinstance(tensor, comfy.quant_ops.QuantizedTensor) else tensor.untyped_storage()
            if hasattr(storage, "_comfy_tensor_mmap_touched"):
                storage._comfy_tensor_mmap_touched = True