diff --git a/comfy/model_management.py b/comfy/model_management.py
index 10a706793..e9af7f3a7 100644
--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@@ -207,7 +207,7 @@ def load_model_gpu(model):
     if lowvram_available and (vram_set_state == VRAMState.LOW_VRAM or vram_set_state == VRAMState.NORMAL_VRAM):
         model_size = model.model_size()
         current_free_mem = get_free_memory(torch_dev)
-        lowvram_model_memory = int(max(256 * (1024 * 1024), (current_free_mem - 1024 * (1024 * 1024)) / 1.2 ))
+        lowvram_model_memory = int(max(256 * (1024 * 1024), (current_free_mem - 1024 * (1024 * 1024)) / 1.3 ))
         if model_size > (current_free_mem - (512 * 1024 * 1024)): #only switch to lowvram if really necessary
             vram_set_state = VRAMState.LOW_VRAM
 
@@ -389,7 +389,10 @@ def should_use_fp16():
 
 def soft_empty_cache():
     global xpu_available
-    if xpu_available:
+    global vram_state
+    if vram_state == VRAMState.MPS:
+        torch.mps.empty_cache()
+    elif xpu_available:
         torch.xpu.empty_cache()
     elif torch.cuda.is_available():
         if torch.version.cuda: #This seems to make things worse on ROCm so I only do it for cuda