From d7a0aef65033bf0fe56e521577a44fac1830b8b3 Mon Sep 17 00:00:00 2001
From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com>
Date: Fri, 5 Dec 2025 21:15:21 -0800
Subject: [PATCH] Set OCL_SET_SVM_SIZE on AMD. (#11139)

---
 cuda_malloc.py | 27 +++++++++++++++++----------
 main.py        |  3 +++
 2 files changed, 20 insertions(+), 10 deletions(-)

diff --git a/cuda_malloc.py b/cuda_malloc.py
index 6520d5123..ee2bc4b69 100644
--- a/cuda_malloc.py
+++ b/cuda_malloc.py
@@ -63,18 +63,22 @@ def cuda_malloc_supported():
     return True
 
 
+version = ""
+
+try:
+    torch_spec = importlib.util.find_spec("torch")
+    for folder in torch_spec.submodule_search_locations:
+        ver_file = os.path.join(folder, "version.py")
+        if os.path.isfile(ver_file):
+            spec = importlib.util.spec_from_file_location("torch_version_import", ver_file)
+            module = importlib.util.module_from_spec(spec)
+            spec.loader.exec_module(module)
+            version = module.__version__
+except:
+    pass
+
 if not args.cuda_malloc:
     try:
-        version = ""
-        torch_spec = importlib.util.find_spec("torch")
-        for folder in torch_spec.submodule_search_locations:
-            ver_file = os.path.join(folder, "version.py")
-            if os.path.isfile(ver_file):
-                spec = importlib.util.spec_from_file_location("torch_version_import", ver_file)
-                module = importlib.util.module_from_spec(spec)
-                spec.loader.exec_module(module)
-                version = module.__version__
-
         if int(version[0]) >= 2 and "+cu" in version:  # enable by default for torch version 2.0 and up only on cuda torch
             if PerformanceFeature.AutoTune not in args.fast:  # Autotune has issues with cuda malloc
                 args.cuda_malloc = cuda_malloc_supported()
@@ -90,3 +94,6 @@ if args.cuda_malloc and not args.disable_cuda_malloc:
         env_var += ",backend:cudaMallocAsync"
 
     os.environ['PYTORCH_CUDA_ALLOC_CONF'] = env_var
+
+def get_torch_version_noimport():
+    return str(version)
diff --git a/main.py b/main.py
index 0cd815d9e..0d02a087b 100644
--- a/main.py
+++ b/main.py
@@ -167,6 +167,9 @@ if __name__ == "__main__":
             os.environ['CUBLAS_WORKSPACE_CONFIG'] = ":4096:8"
 
     import cuda_malloc
+    if "rocm" in cuda_malloc.get_torch_version_noimport():
+        os.environ['OCL_SET_SVM_SIZE'] = '262144'  # set at the request of AMD
+
 
 if 'torch' in sys.modules:
     logging.warning("WARNING: Potential Error in code: Torch already imported, torch should never be imported before this point.")