mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2026-02-12 22:42:36 +08:00
Merge branch 'comfyanonymous:master' into bugfix/extra_data
This commit is contained in:
commit
9f07df828c
@ -45,6 +45,8 @@ jobs:
|
|||||||
sed -i '1i../ComfyUI' ./python310._pth
|
sed -i '1i../ComfyUI' ./python310._pth
|
||||||
cd ..
|
cd ..
|
||||||
|
|
||||||
|
git clone https://github.com/comfyanonymous/taesd
|
||||||
|
cp taesd/*.pth ./ComfyUI_copy/models/vae_approx/
|
||||||
|
|
||||||
mkdir ComfyUI_windows_portable
|
mkdir ComfyUI_windows_portable
|
||||||
mv python_embeded ComfyUI_windows_portable
|
mv python_embeded ComfyUI_windows_portable
|
||||||
|
|||||||
@ -37,6 +37,8 @@ jobs:
|
|||||||
sed -i '1i../ComfyUI' ./python311._pth
|
sed -i '1i../ComfyUI' ./python311._pth
|
||||||
cd ..
|
cd ..
|
||||||
|
|
||||||
|
git clone https://github.com/comfyanonymous/taesd
|
||||||
|
cp taesd/*.pth ./ComfyUI_copy/models/vae_approx/
|
||||||
|
|
||||||
mkdir ComfyUI_windows_portable_nightly_pytorch
|
mkdir ComfyUI_windows_portable_nightly_pytorch
|
||||||
mv python_embeded ComfyUI_windows_portable_nightly_pytorch
|
mv python_embeded ComfyUI_windows_portable_nightly_pytorch
|
||||||
|
|||||||
@ -16,11 +16,14 @@ if model_management.xformers_enabled():
|
|||||||
import xformers
|
import xformers
|
||||||
import xformers.ops
|
import xformers.ops
|
||||||
|
|
||||||
# CrossAttn precision handling
|
|
||||||
import os
|
|
||||||
_ATTN_PRECISION = os.environ.get("ATTN_PRECISION", "fp32")
|
|
||||||
|
|
||||||
from comfy.cli_args import args
|
from comfy.cli_args import args
|
||||||
|
# CrossAttn precision handling
|
||||||
|
if args.dont_upcast_attention:
|
||||||
|
print("disabling upcasting of attention")
|
||||||
|
_ATTN_PRECISION = "fp16"
|
||||||
|
else:
|
||||||
|
_ATTN_PRECISION = "fp32"
|
||||||
|
|
||||||
|
|
||||||
def exists(val):
|
def exists(val):
|
||||||
return val is not None
|
return val is not None
|
||||||
|
|||||||
@ -245,6 +245,8 @@ def unload_model():
|
|||||||
n.cpu()
|
n.cpu()
|
||||||
current_gpu_controlnets = []
|
current_gpu_controlnets = []
|
||||||
|
|
||||||
|
def minimum_inference_memory():
|
||||||
|
return (768 * 1024 * 1024)
|
||||||
|
|
||||||
def load_model_gpu(model):
|
def load_model_gpu(model):
|
||||||
global current_loaded_model
|
global current_loaded_model
|
||||||
@ -272,7 +274,7 @@ def load_model_gpu(model):
|
|||||||
model_size = model.model_size()
|
model_size = model.model_size()
|
||||||
current_free_mem = get_free_memory(torch_dev)
|
current_free_mem = get_free_memory(torch_dev)
|
||||||
lowvram_model_memory = int(max(256 * (1024 * 1024), (current_free_mem - 1024 * (1024 * 1024)) / 1.3 ))
|
lowvram_model_memory = int(max(256 * (1024 * 1024), (current_free_mem - 1024 * (1024 * 1024)) / 1.3 ))
|
||||||
if model_size > (current_free_mem - (512 * 1024 * 1024)): #only switch to lowvram if really necessary
|
if model_size > (current_free_mem - minimum_inference_memory()): #only switch to lowvram if really necessary
|
||||||
vram_set_state = VRAMState.LOW_VRAM
|
vram_set_state = VRAMState.LOW_VRAM
|
||||||
|
|
||||||
current_loaded_model = model
|
current_loaded_model = model
|
||||||
@ -332,19 +334,19 @@ def unload_if_low_vram(model):
|
|||||||
return model
|
return model
|
||||||
|
|
||||||
def unet_offload_device():
|
def unet_offload_device():
|
||||||
if vram_state == VRAMState.HIGH_VRAM or vram_state == VRAMState.SHARED:
|
if vram_state == VRAMState.HIGH_VRAM:
|
||||||
return get_torch_device()
|
return get_torch_device()
|
||||||
else:
|
else:
|
||||||
return torch.device("cpu")
|
return torch.device("cpu")
|
||||||
|
|
||||||
def text_encoder_offload_device():
|
def text_encoder_offload_device():
|
||||||
if args.gpu_only or vram_state == VRAMState.SHARED:
|
if args.gpu_only:
|
||||||
return get_torch_device()
|
return get_torch_device()
|
||||||
else:
|
else:
|
||||||
return torch.device("cpu")
|
return torch.device("cpu")
|
||||||
|
|
||||||
def text_encoder_device():
|
def text_encoder_device():
|
||||||
if args.gpu_only or vram_state == VRAMState.SHARED:
|
if args.gpu_only:
|
||||||
return get_torch_device()
|
return get_torch_device()
|
||||||
elif vram_state == VRAMState.HIGH_VRAM or vram_state == VRAMState.NORMAL_VRAM:
|
elif vram_state == VRAMState.HIGH_VRAM or vram_state == VRAMState.NORMAL_VRAM:
|
||||||
if torch.get_num_threads() < 8: #leaving the text encoder on the CPU is faster than shifting it if the CPU is fast enough.
|
if torch.get_num_threads() < 8: #leaving the text encoder on the CPU is faster than shifting it if the CPU is fast enough.
|
||||||
@ -358,7 +360,7 @@ def vae_device():
|
|||||||
return get_torch_device()
|
return get_torch_device()
|
||||||
|
|
||||||
def vae_offload_device():
|
def vae_offload_device():
|
||||||
if args.gpu_only or vram_state == VRAMState.SHARED:
|
if args.gpu_only:
|
||||||
return get_torch_device()
|
return get_torch_device()
|
||||||
else:
|
else:
|
||||||
return torch.device("cpu")
|
return torch.device("cpu")
|
||||||
@ -458,7 +460,7 @@ def is_device_cpu(device):
|
|||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def should_use_fp16(device=None):
|
def should_use_fp16(device=None, model_params=0):
|
||||||
global xpu_available
|
global xpu_available
|
||||||
global directml_enabled
|
global directml_enabled
|
||||||
|
|
||||||
@ -482,10 +484,27 @@ def should_use_fp16(device=None):
|
|||||||
return True
|
return True
|
||||||
|
|
||||||
props = torch.cuda.get_device_properties("cuda")
|
props = torch.cuda.get_device_properties("cuda")
|
||||||
|
if props.major < 6:
|
||||||
|
return False
|
||||||
|
|
||||||
|
fp16_works = False
|
||||||
|
#FP16 is confirmed working on a 1080 (GP104) but it's a bit slower than FP32 so it should only be enabled
|
||||||
|
#when the model doesn't actually fit on the card
|
||||||
|
#TODO: actually test if GP106 and others have the same type of behavior
|
||||||
|
nvidia_10_series = ["1080", "1070", "titan x", "p3000", "p3200", "p4000", "p4200", "p5000", "p5200", "p6000", "1060", "1050"]
|
||||||
|
for x in nvidia_10_series:
|
||||||
|
if x in props.name.lower():
|
||||||
|
fp16_works = True
|
||||||
|
|
||||||
|
if fp16_works:
|
||||||
|
free_model_memory = (get_free_memory() * 0.9 - minimum_inference_memory())
|
||||||
|
if model_params * 4 > free_model_memory:
|
||||||
|
return True
|
||||||
|
|
||||||
if props.major < 7:
|
if props.major < 7:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
#FP32 is faster on those cards?
|
#FP16 is just broken on these cards
|
||||||
nvidia_16_series = ["1660", "1650", "1630", "T500", "T550", "T600"]
|
nvidia_16_series = ["1660", "1650", "1630", "T500", "T550", "T600"]
|
||||||
for x in nvidia_16_series:
|
for x in nvidia_16_series:
|
||||||
if x in props.name:
|
if x in props.name:
|
||||||
|
|||||||
@ -1122,6 +1122,12 @@ def load_checkpoint(config_path=None, ckpt_path=None, output_vae=True, output_cl
|
|||||||
|
|
||||||
return (ModelPatcher(model, load_device=model_management.get_torch_device(), offload_device=offload_device), clip, vae)
|
return (ModelPatcher(model, load_device=model_management.get_torch_device(), offload_device=offload_device), clip, vae)
|
||||||
|
|
||||||
|
def calculate_parameters(sd, prefix):
|
||||||
|
params = 0
|
||||||
|
for k in sd.keys():
|
||||||
|
if k.startswith(prefix):
|
||||||
|
params += sd[k].nelement()
|
||||||
|
return params
|
||||||
|
|
||||||
def load_checkpoint_guess_config(ckpt_path, output_vae=True, output_clip=True, output_clipvision=False, embedding_directory=None):
|
def load_checkpoint_guess_config(ckpt_path, output_vae=True, output_clip=True, output_clipvision=False, embedding_directory=None):
|
||||||
sd = utils.load_torch_file(ckpt_path)
|
sd = utils.load_torch_file(ckpt_path)
|
||||||
@ -1132,7 +1138,8 @@ def load_checkpoint_guess_config(ckpt_path, output_vae=True, output_clip=True, o
|
|||||||
model = None
|
model = None
|
||||||
clip_target = None
|
clip_target = None
|
||||||
|
|
||||||
fp16 = model_management.should_use_fp16()
|
parameters = calculate_parameters(sd, "model.diffusion_model.")
|
||||||
|
fp16 = model_management.should_use_fp16(model_params=parameters)
|
||||||
|
|
||||||
class WeightsLoader(torch.nn.Module):
|
class WeightsLoader(torch.nn.Module):
|
||||||
pass
|
pass
|
||||||
|
|||||||
4
main.py
4
main.py
@ -14,10 +14,6 @@ if os.name == "nt":
|
|||||||
logging.getLogger("xformers").addFilter(lambda record: 'A matching Triton is not available' not in record.getMessage())
|
logging.getLogger("xformers").addFilter(lambda record: 'A matching Triton is not available' not in record.getMessage())
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
if args.dont_upcast_attention:
|
|
||||||
print("disabling upcasting of attention")
|
|
||||||
os.environ['ATTN_PRECISION'] = "fp16"
|
|
||||||
|
|
||||||
if args.cuda_device is not None:
|
if args.cuda_device is not None:
|
||||||
os.environ['CUDA_VISIBLE_DEVICES'] = str(args.cuda_device)
|
os.environ['CUDA_VISIBLE_DEVICES'] = str(args.cuda_device)
|
||||||
print("Set cuda device to:", args.cuda_device)
|
print("Set cuda device to:", args.cuda_device)
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user