From 8ecb5c11e9927700f4e06919c15257b1bb8ac0f0 Mon Sep 17 00:00:00 2001 From: Benjamin Berman Date: Tue, 15 Aug 2023 18:18:10 -0700 Subject: [PATCH] deepfloyd --- .gitignore | 1 + comfy_extras/nodes/deepfloyd/__init__.py | 30 ++ comfy_extras/nodes/deepfloyd/deep_floyd.py | 326 +++++++++++++++++++++ requirements.txt | 4 +- 4 files changed, 360 insertions(+), 1 deletion(-) create mode 100644 comfy_extras/nodes/deepfloyd/deep_floyd.py diff --git a/.gitignore b/.gitignore index ffb9b1177..c42639937 100644 --- a/.gitignore +++ b/.gitignore @@ -7,6 +7,7 @@ /[Cc]ustom_nodes/* ![Cc]ustom_nodes/__init__.py !/custom_nodes/example_node.py.example +**/put*here /extra_model_paths.yaml /.vs .idea/ diff --git a/comfy_extras/nodes/deepfloyd/__init__.py b/comfy_extras/nodes/deepfloyd/__init__.py index e69de29bb..93b3e4bb9 100644 --- a/comfy_extras/nodes/deepfloyd/__init__.py +++ b/comfy_extras/nodes/deepfloyd/__init__.py @@ -0,0 +1,30 @@ +from transformers import logging as transformers_logging +from diffusers import logging as diffusers_logging +from warnings import filterwarnings +import logging + +from .deep_floyd import * + +transformers_logging.set_verbosity_error() +diffusers_logging.set_verbosity_error() +logging.getLogger("xformers").addFilter(lambda r: "A matching Triton is not available" not in r.getMessage()) +filterwarnings("ignore", category=FutureWarning, message="The `reduce_labels` parameter is deprecated") +filterwarnings("ignore", category=UserWarning, message="You seem to be using the pipelines sequentially on GPU") +filterwarnings("ignore", category=UserWarning, message="TypedStorage is deprecated") + +NODE_CLASS_MAPPINGS = { + # DeepFloyd + "IF Loader": Loader, + "IF Encoder": Encoder, + "IF Stage I": StageI, + "IF Stage II": StageII, + "IF Stage III": StageIII, +} + +NODE_DISPLAY_NAME_MAPPINGS = { + "IF Loader": "IF Loader", + "IF Encoder": "IF Encoder", + "IF Stage I": "IF Stage I", + "IF Stage II": "IF Stage II", + "IF Stage III": "IF Stage III", +} diff --git a/comfy_extras/nodes/deepfloyd/deep_floyd.py b/comfy_extras/nodes/deepfloyd/deep_floyd.py new file mode 100644 index 000000000..529e19173 --- /dev/null +++ b/comfy_extras/nodes/deepfloyd/deep_floyd.py @@ -0,0 +1,326 @@ +import gc +import json +import os.path +import typing + +import torch +import torchvision.transforms.functional as TF +from diffusers import DiffusionPipeline, IFPipeline, StableDiffusionUpscalePipeline, IFSuperResolutionPipeline +from diffusers.utils import is_accelerate_available, is_accelerate_version +from transformers import T5EncoderModel + +from comfy.model_management import throw_exception_if_processing_interrupted, get_torch_device, cpu_state, CPUState +# todo: this relies on the setup-py cleanup fork +from comfy.utils import ProgressBar, get_project_root + +# todo: find or download the models automatically by their config jsons instead of using well known names +_model_base_path = os.path.join(get_project_root(), "models", "deepfloyd") + + +def _find_files(directory: str, filename: str) -> typing.List[str]: + return [os.path.join(root, file) for root, _, files in os.walk(directory) for file in files if file == filename] + + +# todo: ticket diffusers to correctly deal with an omitted unet +def _patched_enable_model_cpu_offload_ifpipeline(self: IFPipeline | IFSuperResolutionPipeline, gpu_id=0): + r""" + Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared + to `enable_sequential_cpu_offload`, this method moves one whole model at a time to the GPU when its `forward` + method is called, and the model remains in GPU until the next model runs. Memory savings are lower than with + `enable_sequential_cpu_offload`, but performance is much better due to the iterative execution of the `unet`. + """ + if is_accelerate_available() and is_accelerate_version(">=", "0.17.0.dev0"): + from accelerate import cpu_offload_with_hook + else: + raise ImportError("`enable_model_cpu_offload` requires `accelerate v0.17.0` or higher.") + + if cpu_state == CPUState.GPU: + device = torch.device(f"cuda:{gpu_id}") + else: + device = get_torch_device() + + if cpu_state == CPUState.CPU or cpu_state == CPUState.MPS: + return + + if self.device.type != "cpu": + self.to("cpu", silence_dtype_warnings=True) + torch.cuda.empty_cache() # otherwise we don't see the memory savings (but they probably exist) + + hook = None + + if self.text_encoder is not None: + _, hook = cpu_offload_with_hook(self.text_encoder, device, prev_module_hook=hook) + + # Accelerate will move the next model to the device _before_ calling the offload hook of the + # previous model. This will cause both models to be present on the device at the same time. + # IF uses T5 for its text encoder which is really large. We can manually call the offload + # hook for the text encoder to ensure it's moved to the cpu before the unet is moved to + # the GPU. + self.text_encoder_offload_hook = hook + + # todo: patch here + if self.unet is not None: + _, hook = cpu_offload_with_hook(self.unet, device, prev_module_hook=hook) + + # if the safety checker isn't called, `unet_offload_hook` will have to be called to manually offload the unet + self.unet_offload_hook = hook + + if self.safety_checker is not None: + _, hook = cpu_offload_with_hook(self.safety_checker, device, prev_module_hook=hook) + + # We'll offload the last model manually. + self.final_offload_hook = hook + + +def _cpu_offload(self: DiffusionPipeline, gpu_id=0): + # todo: use sequential for low vram, ordinary cpu offload for normal vram + if isinstance(self, IFPipeline) or isinstance(self, IFSuperResolutionPipeline): + _patched_enable_model_cpu_offload_ifpipeline(self, gpu_id) + # todo: include sequential usage + # elif isinstance(self, StableDiffusionUpscalePipeline): + # self.enable_sequential_cpu_offload(gpu_id) + elif hasattr(self, 'enable_model_cpu_offload'): + self.enable_model_cpu_offload(gpu_id) + + +class Loader: + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "model_name": (Loader._MODELS, {"default": "I-M"}), + "load_in_8bit": ([False, True], {"default": False}), + "device": ("STRING", {"default": ""}), + } + } + + CATEGORY = "deepfloyd" + FUNCTION = "process" + RETURN_TYPES = ("IF_MODEL",) + + _MODELS = ["I-M", "I-L", "I-XL", "II-M", "II-L", "III", "t5"] + + # todo: correctly use load_in_8bit + def process(self, model_name: str, load_in_8bit: bool, device: str): + assert model_name in Loader._MODELS + + model_v: DiffusionPipeline + model_path: str + kwargs = { + "variant": "fp16", + "torch_dtype": torch.float16, + "requires_safety_checker": False, + "feature_extractor": None, + "safety_checker": None, + "watermarker": None, + "load_in_8bit": load_in_8bit, + # todo: fix diffusers when using device_map auto on multi-gpu setups, layers are not assigned to different devices correctly + "device_map": None if device else "auto" + } + + if model_name == "t5": + # find any valid IF model + model_path = next(os.path.dirname(file) for file in _find_files(_model_base_path, "model_index.json") if + any(x == T5EncoderModel.__name__ for x in + json.load(open(file, 'r'))["text_encoder"])) + # todo: this must use load_in_8bit correctly + # kwargs["text_encoder"] = text_encoder + kwargs["unet"] = None + elif model_name == "III": + model_path = f"{_model_base_path}/stable-diffusion-x4-upscaler" + del kwargs["variant"] + else: + model_path = f"{_model_base_path}/IF-{model_name}-v1.0" + kwargs["text_encoder"] = None + + model_v = DiffusionPipeline.from_pretrained( + pretrained_model_name_or_path=model_path, + **kwargs + ) + + if device: + model_v = model_v.to(device) + + _cpu_offload(model_v, gpu_id=model_v.device.index) + + return (model_v,) + + +class Encoder: + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "model": ("IF_MODEL",), + "positive": ("STRING", {"default": "", "multiline": True}), + "negative": ("STRING", {"default": "", "multiline": True}), + }, + } + + CATEGORY = "deepfloyd" + FUNCTION = "process" + MODEL = None + RETURN_TYPES = ("POSITIVE", "NEGATIVE",) + TEXT_ENCODER = None + + def process(self, model: IFPipeline, positive, negative): + positive, negative = model.encode_prompt( + prompt=positive, + negative_prompt=negative, + ) + + return (positive, negative,) + + +class StageI: + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "positive": ("POSITIVE",), + "negative": ("NEGATIVE",), + "model": ("IF_MODEL",), + "width": ("INT", {"default": 64, "min": 8, "max": 128, "step": 8}), + "height": ("INT", {"default": 64, "min": 8, "max": 128, "step": 8}), + "batch_size": ("INT", {"default": 1, "min": 1, "max": 100}), + "seed": ("INT", {"default": 0, "min": 0, "max": 0xffffffffffffffff}), + "steps": ("INT", {"default": 20, "min": 1, "max": 10000}), + "cfg": ("FLOAT", {"default": 8.0, "min": 0.0, "max": 100.0}) + }, + } + + CATEGORY = "deepfloyd" + FUNCTION = "process" + RETURN_TYPES = ("IMAGE",) + + def process(self, model: IFPipeline, positive, negative, width, height, batch_size, seed, steps, cfg): + progress = ProgressBar(steps) + + def callback(step, time_step, latent): + throw_exception_if_processing_interrupted() + progress.update_absolute(step) + + gc.collect() + image = model( + prompt_embeds=positive, + negative_prompt_embeds=negative, + width=width, + height=height, + generator=torch.manual_seed(seed), + guidance_scale=cfg, + num_images_per_prompt=batch_size, + num_inference_steps=steps, + callback=callback, + output_type="pt", + ).images + + image = (image / 2 + 0.5).clamp(0, 1) + image = image.cpu().float().permute(0, 2, 3, 1) + return (image,) + + +class StageII: + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "positive": ("POSITIVE",), + "negative": ("NEGATIVE",), + "model": ("IF_MODEL",), + "images": ("IMAGE",), + "seed": ("INT", {"default": 0, "min": 0, "max": 0xffffffffffffffff}), + "steps": ("INT", {"default": 20, "min": 1, "max": 10000}), + "cfg": ("FLOAT", {"default": 8.0, "min": 0.0, "max": 100.0}), + }, + } + + CATEGORY = "deepfloyd" + FUNCTION = "process" + RETURN_NAMES = ("IMAGES",) + RETURN_TYPES = ("IMAGE",) + + def process(self, model, images, positive, negative, seed, steps, cfg): + images = images.permute(0, 3, 1, 2) + progress = ProgressBar(steps) + batch_size, channels, height, width = images.shape + max_dim = max(height, width) + images = TF.center_crop(images, max_dim) + model.unet.config.sample_size = max_dim * 4 + + if batch_size > 1: + positive = positive.repeat(batch_size, 1, 1) + negative = negative.repeat(batch_size, 1, 1) + + def callback(step, time_step, latent): + throw_exception_if_processing_interrupted() + progress.update_absolute(step) + + images = model( + image=images, + prompt_embeds=positive, + negative_prompt_embeds=negative, + generator=torch.manual_seed(seed), + guidance_scale=cfg, + num_inference_steps=steps, + callback=callback, + output_type="pt", + ).images.cpu().float() + + images = TF.center_crop(images, [height * 4, width * 4]) + images = images.permute(0, 2, 3, 1) + return (images,) + + +class StageIII: + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "model": ("IF_MODEL",), + "image": ("IMAGE",), + "tile": ([False, True], {"default": False}), + "tile_size": ("INT", {"default": 512, "min": 64, "max": 1024, "step": 64}), + "noise": ("INT", {"default": 20, "min": 0, "max": 100}), + "seed": ("INT", {"default": 0, "min": 0, "max": 0xffffffffffffffff}), + "steps": ("INT", {"default": 20, "min": 1, "max": 10000}), + "cfg": ("FLOAT", {"default": 8.0, "min": 0.0, "max": 100.0}), + "positive": ("STRING", {"default": "", "multiline": True}), + "negative": ("STRING", {"default": "", "multiline": True}), + }, + } + + CATEGORY = "deepfloyd" + FUNCTION = "process" + RETURN_TYPES = ("IMAGE",) + + def process(self, model: StableDiffusionUpscalePipeline, image, tile, tile_size, noise, seed, steps, cfg, positive, + negative): + image = image.permute(0, 3, 1, 2) + progress = ProgressBar(steps) + batch_size = image.shape[0] + + if batch_size > 1: + positive = [positive] * batch_size + negative = [negative] * batch_size + + if tile: + model.vae.config.sample_size = tile_size + model.vae.enable_tiling() + + def callback(step, time_step, latent): + throw_exception_if_processing_interrupted() + progress.update_absolute(step) + + image = model( + image=image, + prompt=positive, + negative_prompt=negative, + noise_level=noise, + generator=torch.manual_seed(seed), + guidance_scale=cfg, + num_inference_steps=steps, + callback=callback, + output_type="pt", + ).images.cpu().float().permute(0, 2, 3, 1) + + return (image,) diff --git a/requirements.txt b/requirements.txt index 8ab608952..6cdf3380b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -28,4 +28,6 @@ tqdm diffusers>=0.16.1 protobuf==3.20.3 rembg -psutil \ No newline at end of file +psutil +https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.40.1.post1-py3-none-win_amd64.whl; platform_system == "Windows" +bitsandbytes; platform_system != "Windows" \ No newline at end of file