diff --git a/comfy/model_management.py b/comfy/model_management.py index 10a706793..e9af7f3a7 100644 --- a/comfy/model_management.py +++ b/comfy/model_management.py @@ -207,7 +207,7 @@ def load_model_gpu(model): if lowvram_available and (vram_set_state == VRAMState.LOW_VRAM or vram_set_state == VRAMState.NORMAL_VRAM): model_size = model.model_size() current_free_mem = get_free_memory(torch_dev) - lowvram_model_memory = int(max(256 * (1024 * 1024), (current_free_mem - 1024 * (1024 * 1024)) / 1.2 )) + lowvram_model_memory = int(max(256 * (1024 * 1024), (current_free_mem - 1024 * (1024 * 1024)) / 1.3 )) if model_size > (current_free_mem - (512 * 1024 * 1024)): #only switch to lowvram if really necessary vram_set_state = VRAMState.LOW_VRAM @@ -389,7 +389,10 @@ def should_use_fp16(): def soft_empty_cache(): global xpu_available - if xpu_available: + global vram_state + if vram_state == VRAMState.MPS: + torch.mps.empty_cache() + elif xpu_available: torch.xpu.empty_cache() elif torch.cuda.is_available(): if torch.version.cuda: #This seems to make things worse on ROCm so I only do it for cuda