diff --git a/comfy/model_management.py b/comfy/model_management.py index b1afeb715..241706925 100644 --- a/comfy/model_management.py +++ b/comfy/model_management.py @@ -281,19 +281,23 @@ def load_model_gpu(model): vram_set_state = VRAMState.LOW_VRAM real_model = model.model + patch_model_to = None if vram_set_state == VRAMState.DISABLED: pass elif vram_set_state == VRAMState.NORMAL_VRAM or vram_set_state == VRAMState.HIGH_VRAM or vram_set_state == VRAMState.SHARED: model_accelerated = False - real_model.to(torch_dev) + patch_model_to = torch_dev try: - real_model = model.patch_model() + real_model = model.patch_model(device_to=patch_model_to) except Exception as e: model.unpatch_model() unload_model() raise e + if patch_model_to is not None: + real_model.to(torch_dev) + if vram_set_state == VRAMState.NO_VRAM: device_map = accelerate.infer_auto_device_map(real_model, max_memory={0: "256MiB", "cpu": "16GiB"}) accelerate.dispatch_model(real_model, device_map=device_map, main_device=torch_dev) diff --git a/comfy/samplers.py b/comfy/samplers.py index 50fda016d..9eee25a92 100644 --- a/comfy/samplers.py +++ b/comfy/samplers.py @@ -248,7 +248,10 @@ def sampling_function(model_function, x, timestep, uncond, cond, cond_scale, con c['transformer_options'] = transformer_options - output = model_function(input_x, timestep_, **c).chunk(batch_chunks) + if 'model_function_wrapper' in model_options: + output = model_options['model_function_wrapper'](model_function, {"input": input_x, "timestep": timestep_, "c": c, "cond_or_uncond": cond_or_uncond}).chunk(batch_chunks) + else: + output = model_function(input_x, timestep_, **c).chunk(batch_chunks) del input_x model_management.throw_exception_if_processing_interrupted() diff --git a/comfy/sd.py b/comfy/sd.py index 7f7c06bc5..1f364dd1f 100644 --- a/comfy/sd.py +++ b/comfy/sd.py @@ -338,7 +338,7 @@ class ModelPatcher: sd.pop(k) return sd - def patch_model(self): + def patch_model(self, device_to=None): model_sd = self.model_state_dict() for key in self.patches: if key not in model_sd: @@ -350,7 +350,10 @@ class ModelPatcher: if key not in self.backup: self.backup[key] = weight.to(self.offload_device) - temp_weight = weight.to(torch.float32, copy=True) + if device_to is not None: + temp_weight = weight.float().to(device_to, copy=True) + else: + temp_weight = weight.to(torch.float32, copy=True) out_weight = self.calculate_weight(self.patches[key], temp_weight, key).to(weight.dtype) set_attr(self.model, key, out_weight) del temp_weight diff --git a/cuda_malloc.py b/cuda_malloc.py index 382432215..faee91a34 100644 --- a/cuda_malloc.py +++ b/cuda_malloc.py @@ -37,7 +37,7 @@ def get_gpu_names(): return set() def cuda_malloc_supported(): - blacklist = {"GeForce GTX 960M", "GeForce GTX 950M", "GeForce 945M", "GeForce 940M", "GeForce 930M", "GeForce 920M", "GeForce 910M", "GeForce GTX 750", "GeForce GTX 745"} + blacklist = {"GeForce GTX 960", "GeForce GTX 950", "GeForce 945M", "GeForce 940M", "GeForce 930M", "GeForce 920M", "GeForce 910M", "GeForce GTX 750", "GeForce GTX 745"} try: names = get_gpu_names() except: