From ab9a9deff48b5780bd105dfd6d19f5f8333ef608 Mon Sep 17 00:00:00 2001
From: comfyanonymous <comfyanonymous@protonmail.com>
Date: Fri, 28 Apr 2023 09:03:39 -0400
Subject: [PATCH 1/5] Fix nightly CI builds.

No cu121 builds for windows yet.
---
 .../update_windows/update_comfyui_and_python_dependencies.bat   | 2 +-
 .github/workflows/windows_release_nightly_pytorch.yml           | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.ci/nightly/update_windows/update_comfyui_and_python_dependencies.bat b/.ci/nightly/update_windows/update_comfyui_and_python_dependencies.bat
index c345a6992..b4989534f 100755
--- a/.ci/nightly/update_windows/update_comfyui_and_python_dependencies.bat
+++ b/.ci/nightly/update_windows/update_comfyui_and_python_dependencies.bat
@@ -1,3 +1,3 @@
 ..\python_embeded\python.exe .\update.py ..\ComfyUI\
-..\python_embeded\python.exe -s -m pip install --upgrade --pre torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu121 -r ../ComfyUI/requirements.txt pygit2
+..\python_embeded\python.exe -s -m pip install --upgrade --pre torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/nightly/cu118 -r ../ComfyUI/requirements.txt pygit2
 pause
diff --git a/.github/workflows/windows_release_nightly_pytorch.yml b/.github/workflows/windows_release_nightly_pytorch.yml
index 4d686ded8..f23cae6d5 100644
--- a/.github/workflows/windows_release_nightly_pytorch.yml
+++ b/.github/workflows/windows_release_nightly_pytorch.yml
@@ -30,7 +30,7 @@ jobs:
             echo 'import site' >> ./python310._pth
             curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py
             ./python.exe get-pip.py
-            python -m pip wheel torch torchvision torchaudio --pre --extra-index-url https://download.pytorch.org/whl/nightly/cu121 -r ../ComfyUI/requirements.txt pygit2 -w ../temp_wheel_dir
+            python -m pip wheel torch torchvision torchaudio --pre --extra-index-url https://download.pytorch.org/whl/nightly/cu118 -r ../ComfyUI/requirements.txt pygit2 -w ../temp_wheel_dir
             ls ../temp_wheel_dir
             ./python.exe -s -m pip install --pre ../temp_wheel_dir/*
             sed -i '1i../ComfyUI' ./python310._pth

From 3baded9892a6ac02f57caaf68053791ec0e14c5a Mon Sep 17 00:00:00 2001
From: comfyanonymous <comfyanonymous@protonmail.com>
Date: Fri, 28 Apr 2023 14:28:57 -0400
Subject: [PATCH 2/5] Basic torch_directml support. Use --directml to use it.

---
 comfy/cli_args.py         |  1 +
 comfy/model_management.py | 27 ++++++++++++++++++++++++++-
 2 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/comfy/cli_args.py b/comfy/cli_args.py
index b24054ce0..05b9c5e08 100644
--- a/comfy/cli_args.py
+++ b/comfy/cli_args.py
@@ -10,6 +10,7 @@ parser.add_argument("--output-directory", type=str, default=None, help="Set the
 parser.add_argument("--cuda-device", type=int, default=None, metavar="DEVICE_ID", help="Set the id of the cuda device this instance will use.")
 parser.add_argument("--dont-upcast-attention", action="store_true", help="Disable upcasting of attention. Can boost speed but increase the chances of black images.")
 parser.add_argument("--force-fp32", action="store_true", help="Force fp32 (If this makes your GPU work better please report it).")
+parser.add_argument("--directml", action="store_true", help="Use torch-directml.")
 
 attn_group = parser.add_mutually_exclusive_group()
 attn_group.add_argument("--use-split-cross-attention", action="store_true", help="Use the split cross attention optimization instead of the sub-quadratic one. Ignored when xformers is used.")
diff --git a/comfy/model_management.py b/comfy/model_management.py
index 6e3a03530..339111c4d 100644
--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@@ -20,6 +20,13 @@ total_vram_available_mb = -1
 accelerate_enabled = False
 xpu_available = False
 
+directml_enabled = False
+if args.directml:
+    import torch_directml
+    print("Using directml")
+    directml_enabled = True
+    # torch_directml.disable_tiled_resources(True)
+
 try:
     import torch
     try:
@@ -217,6 +224,9 @@ def unload_if_low_vram(model):
 
 def get_torch_device():
     global xpu_available
+    global directml_enabled
+    if directml_enabled:
+        return torch_directml.device()
     if vram_state == VRAMState.MPS:
         return torch.device("mps")
     if vram_state == VRAMState.CPU:
@@ -234,8 +244,14 @@ def get_autocast_device(dev):
 
 
 def xformers_enabled():
+    global xpu_available
+    global directml_enabled
     if vram_state == VRAMState.CPU:
         return False
+    if xpu_available:
+        return False
+    if directml_enabled:
+        return False
     return XFORMERS_IS_AVAILABLE
 
 
@@ -251,6 +267,7 @@ def pytorch_attention_enabled():
 
 def get_free_memory(dev=None, torch_free_too=False):
     global xpu_available
+    global directml_enabled
     if dev is None:
         dev = get_torch_device()
 
@@ -258,7 +275,10 @@ def get_free_memory(dev=None, torch_free_too=False):
         mem_free_total = psutil.virtual_memory().available
         mem_free_torch = mem_free_total
     else:
-        if xpu_available:
+        if directml_enabled:
+            mem_free_total = 1024 * 1024 * 1024 #TODO
+            mem_free_torch = mem_free_total
+        elif xpu_available:
             mem_free_total = torch.xpu.get_device_properties(dev).total_memory - torch.xpu.memory_allocated(dev)
             mem_free_torch = mem_free_total
         else:
@@ -293,9 +313,14 @@ def mps_mode():
 
 def should_use_fp16():
     global xpu_available
+    global directml_enabled
+
     if FORCE_FP32:
         return False
 
+    if directml_enabled:
+        return False
+
     if cpu_mode() or mps_mode() or xpu_available:
         return False #TODO ?
 

From 0306371e54ddb7472622eb43ed2180a109be6e6b Mon Sep 17 00:00:00 2001
From: comfyanonymous <comfyanonymous@protonmail.com>
Date: Fri, 28 Apr 2023 16:18:54 -0400
Subject: [PATCH 3/5] Add "Installing" link to top of readme.

---
 README.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/README.md b/README.md
index 5b6346a67..00b228497 100644
--- a/README.md
+++ b/README.md
@@ -7,6 +7,8 @@ A powerful and modular stable diffusion GUI and backend.
 This ui will let you design and execute advanced stable diffusion pipelines using a graph/nodes/flowchart based interface. For some workflow examples and see what ComfyUI can do you can check out:
 ### [ComfyUI Examples](https://comfyanonymous.github.io/ComfyUI_examples/)
 
+### [Installing](#installing)
+
 ## Features
 - Nodes/graph/flowchart interface to experiment and create complex Stable Diffusion workflows without needing to code anything.
 - Fully supports SD1.x and SD2.x

From cab80973d187903d9c415cfcc2575e4616befaa8 Mon Sep 17 00:00:00 2001
From: comfyanonymous <comfyanonymous@protonmail.com>
Date: Fri, 28 Apr 2023 16:19:56 -0400
Subject: [PATCH 4/5] Fix Readme.

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 00b228497..3b3824714 100644
--- a/README.md
+++ b/README.md
@@ -7,7 +7,7 @@ A powerful and modular stable diffusion GUI and backend.
 This ui will let you design and execute advanced stable diffusion pipelines using a graph/nodes/flowchart based interface. For some workflow examples and see what ComfyUI can do you can check out:
 ### [ComfyUI Examples](https://comfyanonymous.github.io/ComfyUI_examples/)
 
-### [Installing](#installing)
+### [Installing ComfyUI](#installing)
 
 ## Features
 - Nodes/graph/flowchart interface to experiment and create complex Stable Diffusion workflows without needing to code anything.

From 2ca934f7d4df3e4fa5a74172e5bbc1dd5e1a2ff9 Mon Sep 17 00:00:00 2001
From: comfyanonymous <comfyanonymous@protonmail.com>
Date: Fri, 28 Apr 2023 16:51:35 -0400
Subject: [PATCH 5/5] You can now select the device index with: --directml id

Like this for example: --directml 1
---
 comfy/cli_args.py         |  2 +-
 comfy/model_management.py | 12 +++++++++---
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/comfy/cli_args.py b/comfy/cli_args.py
index 05b9c5e08..764427165 100644
--- a/comfy/cli_args.py
+++ b/comfy/cli_args.py
@@ -10,7 +10,7 @@ parser.add_argument("--output-directory", type=str, default=None, help="Set the
 parser.add_argument("--cuda-device", type=int, default=None, metavar="DEVICE_ID", help="Set the id of the cuda device this instance will use.")
 parser.add_argument("--dont-upcast-attention", action="store_true", help="Disable upcasting of attention. Can boost speed but increase the chances of black images.")
 parser.add_argument("--force-fp32", action="store_true", help="Force fp32 (If this makes your GPU work better please report it).")
-parser.add_argument("--directml", action="store_true", help="Use torch-directml.")
+parser.add_argument("--directml", type=int, nargs="?", metavar="DIRECTML_DEVICE", const=-1, help="Use torch-directml.")
 
 attn_group = parser.add_mutually_exclusive_group()
 attn_group.add_argument("--use-split-cross-attention", action="store_true", help="Use the split cross attention optimization instead of the sub-quadratic one. Ignored when xformers is used.")
diff --git a/comfy/model_management.py b/comfy/model_management.py
index 339111c4d..9497ae7af 100644
--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@@ -21,10 +21,15 @@ accelerate_enabled = False
 xpu_available = False
 
 directml_enabled = False
-if args.directml:
+if args.directml is not None:
     import torch_directml
-    print("Using directml")
     directml_enabled = True
+    device_index = args.directml
+    if device_index < 0:
+        directml_device = torch_directml.device()
+    else:
+        directml_device = torch_directml.device(device_index)
+    print("Using directml with device:", torch_directml.device_name(device_index))
     # torch_directml.disable_tiled_resources(True)
 
 try:
@@ -226,7 +231,8 @@ def get_torch_device():
     global xpu_available
     global directml_enabled
     if directml_enabled:
-        return torch_directml.device()
+        global directml_device
+        return directml_device
     if vram_state == VRAMState.MPS:
         return torch.device("mps")
     if vram_state == VRAMState.CPU: