diff --git a/.gitignore b/.gitignore index a46c3671c..bab63c0bc 100644 --- a/.gitignore +++ b/.gitignore @@ -2,13 +2,14 @@ /[Oo]utput/ /[Ii]nput/ !/input/example.png -/[Mm]odels/ +/[Mm]odels/* +![Mm]odels/deepfloyd/put_deepfloyd_hugginface_repos_or_diffusers_cache_here + /[Tt]emp/ /[Cc]ustom_nodes/* ![Cc]ustom_nodes/__init__.py !/custom_nodes/example_node.py.example **/put*here -![Mm]odels/deepfloyd/put_deepfloyd_repos_here /extra_model_paths.yaml /.vs .idea/ diff --git a/comfy/nodes/package_typing.py b/comfy/nodes/package_typing.py index 21da741c5..bf83c0a7e 100644 --- a/comfy/nodes/package_typing.py +++ b/comfy/nodes/package_typing.py @@ -1,5 +1,6 @@ from __future__ import annotations +import typing from typing import Protocol, ClassVar, Tuple, Dict from dataclasses import dataclass, field @@ -8,7 +9,7 @@ class CustomNode(Protocol): @classmethod def INPUT_TYPES(cls) -> dict: ... - RETURN_TYPES: ClassVar[Tuple[str]] + RETURN_TYPES: ClassVar[typing.Sequence[str]] RETURN_NAMES: ClassVar[Tuple[str]] = None OUTPUT_IS_LIST: ClassVar[Tuple[bool]] = None INPUT_IS_LIST: ClassVar[bool] = None diff --git a/comfy_extras/nodes/deepfloyd/__init__.py b/comfy_extras/nodes/deepfloyd/__init__.py index 93b3e4bb9..37760f3bb 100644 --- a/comfy_extras/nodes/deepfloyd/__init__.py +++ b/comfy_extras/nodes/deepfloyd/__init__.py @@ -14,17 +14,17 @@ filterwarnings("ignore", category=UserWarning, message="TypedStorage is deprecat NODE_CLASS_MAPPINGS = { # DeepFloyd - "IF Loader": Loader, - "IF Encoder": Encoder, - "IF Stage I": StageI, - "IF Stage II": StageII, - "IF Stage III": StageIII, + "IFLoader": IFLoader, + "IFEncoder": IFEncoder, + "IFStageI": IFStageI, + "IFStageII": IFStageII, + "IFStageIII": IFStageIII, } NODE_DISPLAY_NAME_MAPPINGS = { - "IF Loader": "IF Loader", - "IF Encoder": "IF Encoder", - "IF Stage I": "IF Stage I", - "IF Stage II": "IF Stage II", - "IF Stage III": "IF Stage III", + "IFLoader": "DeepFloyd IF Loader", + "IFEncoder": "DeepFloyd IF Encoder", + "IFStageI": "DeepFloyd IF Stage I", + "IFStageII": "DeepFloyd IF Stage II", + "IFStageIII": "DeepFloyd IF Stage III", } diff --git a/comfy_extras/nodes/deepfloyd/deep_floyd.py b/comfy_extras/nodes/deepfloyd/deep_floyd.py index 507a94757..f3e8670f0 100644 --- a/comfy_extras/nodes/deepfloyd/deep_floyd.py +++ b/comfy_extras/nodes/deepfloyd/deep_floyd.py @@ -4,13 +4,12 @@ import os.path import typing import torch -import torchvision.transforms.functional as TF from diffusers import DiffusionPipeline, IFPipeline, StableDiffusionUpscalePipeline, IFSuperResolutionPipeline from diffusers.utils import is_accelerate_available, is_accelerate_version from transformers import T5EncoderModel, BitsAndBytesConfig from comfy.model_management import throw_exception_if_processing_interrupted, get_torch_device, cpu_state, CPUState -# todo: this relies on the setup-py cleanup fork +from comfy.nodes.package_typing import CustomNode from comfy.utils import ProgressBar, get_project_root # todo: find or download the models automatically by their config jsons instead of using well known names @@ -83,13 +82,16 @@ def _cpu_offload(self: DiffusionPipeline, gpu_id=0): self.enable_model_cpu_offload(gpu_id) -class Loader: +class IFLoader(CustomNode): @classmethod def INPUT_TYPES(s): return { "required": { - "model_name": (Loader._MODELS, {"default": "I-M"}), - "quantization": (list(Loader._QUANTIZATIONS.keys()), {"default": "16-bit"}), + "model_name": (IFLoader._MODELS, {"default": "I-M"}), + "quantization": (list(IFLoader._QUANTIZATIONS.keys()), {"default": "16-bit"}), + }, + "optional": { + "hugging_face_token": ("STRING", {"default": ""}), } } @@ -110,9 +112,8 @@ class Loader: "16-bit": None, } - # todo: correctly use load_in_8bit - def process(self, model_name: str, quantization: str): - assert model_name in Loader._MODELS + def process(self, model_name: str, quantization: str, hugging_face_token: str = ""): + assert model_name in IFLoader._MODELS model_v: DiffusionPipeline model_path: str @@ -126,14 +127,22 @@ class Loader: "device_map": None } - if Loader._QUANTIZATIONS[quantization] is not None: - kwargs['quantization_config'] = Loader._QUANTIZATIONS[quantization] + if hugging_face_token is not None and hugging_face_token != "": + kwargs['access_token'] = hugging_face_token + elif 'HUGGING_FACE_HUB_TOKEN' in os.environ: + pass + + if IFLoader._QUANTIZATIONS[quantization] is not None: + kwargs['quantization_config'] = IFLoader._QUANTIZATIONS[quantization] if model_name == "t5": # find any valid IF model - model_path = next(os.path.dirname(file) for file in _find_files(_model_base_path, "model_index.json") if - any(x == T5EncoderModel.__name__ for x in - json.load(open(file, 'r'))["text_encoder"])) + try: + model_path = next(os.path.dirname(file) for file in _find_files(_model_base_path, "model_index.json") if + any(x == T5EncoderModel.__name__ for x in + json.load(open(file, 'r'))["text_encoder"])) + except: + model_path = "DeepFloyd/IF-I-M-v1.0" kwargs["unet"] = None elif model_name == "III": model_path = f"{_model_base_path}/stable-diffusion-x4-upscaler" @@ -142,6 +151,13 @@ class Loader: model_path = f"{_model_base_path}/IF-{model_name}-v1.0" kwargs["text_encoder"] = None + if not os.path.exists(model_path): + kwargs['cache_dir='] = os.path.abspath(_model_base_path) + if model_name == "t5": + model_path = "DeepFloyd/IF-I-M-v1.0" + else: + model_path = f"DeepFloyd/IF-{model_name}-v1.0" + model_v = DiffusionPipeline.from_pretrained( pretrained_model_name_or_path=model_path, **kwargs @@ -155,7 +171,7 @@ class Loader: return (model_v,) -class Encoder: +class IFEncoder(CustomNode): @classmethod def INPUT_TYPES(s): return { @@ -168,9 +184,7 @@ class Encoder: CATEGORY = "deepfloyd" FUNCTION = "process" - MODEL = None RETURN_TYPES = ("POSITIVE", "NEGATIVE",) - TEXT_ENCODER = None def process(self, model: IFPipeline, positive, negative): positive, negative = model.encode_prompt( @@ -181,7 +195,7 @@ class Encoder: return (positive, negative,) -class StageI: +class IFStageI: @classmethod def INPUT_TYPES(s): return { @@ -228,7 +242,7 @@ class StageI: return (image,) -class StageII: +class IFStageII: @classmethod def INPUT_TYPES(s): return { @@ -251,10 +265,7 @@ class StageII: def process(self, model, images, positive, negative, seed, steps, cfg): images = images.permute(0, 3, 1, 2) progress = ProgressBar(steps) - batch_size, channels, height, width = images.shape - max_dim = max(height, width) - images = TF.center_crop(images, max_dim) - model.unet.config.sample_size = max_dim * 4 + batch_size = images.shape[0] if batch_size > 1: positive = positive.repeat(batch_size, 1, 1) @@ -268,19 +279,22 @@ class StageII: image=images, prompt_embeds=positive, negative_prompt_embeds=negative, + height=images.shape[2] // 8 * 8 * 4, + width=images.shape[3] // 8 * 8 * 4, generator=torch.manual_seed(seed), guidance_scale=cfg, num_inference_steps=steps, callback=callback, output_type="pt", - ).images.cpu().float() + ).images - images = TF.center_crop(images, [height * 4, width * 4]) + images = images.clamp(0, 1) images = images.permute(0, 2, 3, 1) + images = images.to("cpu", torch.float32) return (images,) -class StageIII: +class IFStageIII: @classmethod def INPUT_TYPES(s): return { diff --git a/comfy_extras/nodes/pixel_art/k_centroid_downscale.py b/comfy_extras/nodes/pixel_art/k_centroid_downscale.py index 2f70c694c..a2850826d 100644 --- a/comfy_extras/nodes/pixel_art/k_centroid_downscale.py +++ b/comfy_extras/nodes/pixel_art/k_centroid_downscale.py @@ -11,9 +11,12 @@ import numpy as np from PIL import Image import torch +from comfy.nodes.package_typing import CustomNode + MAX_RESOLUTION = 1024 AUTO_FACTOR = 8 + def k_centroid_downscale(images, width, height, centroids=2): '''k-centroid scaling, based on: https://github.com/Astropulse/stable-diffusion-aseprite/blob/main/scripts/image_server.py.''' @@ -31,13 +34,13 @@ def k_centroid_downscale(images, width, height, centroids=2): # get most common (median) color color_counts = tile.getcolors() most_common_idx = max(color_counts, key=lambda x: x[0])[1] - downscaled[ii, y, x, :] = tile.getpalette()[most_common_idx*3:(most_common_idx + 1)*3] + downscaled[ii, y, x, :] = tile.getpalette()[most_common_idx * 3:(most_common_idx + 1) * 3] downscaled = downscaled.astype(np.float32) / 255.0 return torch.from_numpy(downscaled) -class ImageKCentroidDownscale: +class ImageKCentroidDownscale(CustomNode): @classmethod def INPUT_TYPES(s): return { @@ -58,7 +61,8 @@ class ImageKCentroidDownscale: s = k_centroid_downscale(image, width, height, centroids) return (s,) -class ImageKCentroidAutoDownscale: + +class ImageKCentroidAutoDownscale(CustomNode): @classmethod def INPUT_TYPES(s): return { diff --git a/comfy_extras/nodes/pixel_art/make_model_tileable.py b/comfy_extras/nodes/pixel_art/make_model_tileable.py index b2192dbd8..4d78f6952 100644 --- a/comfy_extras/nodes/pixel_art/make_model_tileable.py +++ b/comfy_extras/nodes/pixel_art/make_model_tileable.py @@ -10,6 +10,8 @@ import torch from torch.nn import functional as F from torch.nn.modules.utils import _pair +from comfy.nodes.package_typing import CustomNode + def flatten_modules(m): '''Return submodules of module m in flattened form.''' @@ -38,7 +40,7 @@ def __replacementConv2DConvForward(self, input: torch.Tensor, weight: torch.Tens return F.conv2d(working, weight, bias, self.stride, _pair(0), self.dilation, self.groups) -class MakeModelTileable: +class MakeModelTileable(CustomNode): @classmethod def INPUT_TYPES(s): return { diff --git a/comfy_extras/nodes/pixel_art/mask_ops.py b/comfy_extras/nodes/pixel_art/mask_ops.py index bd870eab4..b549fc391 100644 --- a/comfy_extras/nodes/pixel_art/mask_ops.py +++ b/comfy_extras/nodes/pixel_art/mask_ops.py @@ -7,8 +7,10 @@ import numpy as np import rembg import torch +from comfy.nodes.package_typing import CustomNode -class BinarizeMask: + +class BinarizeMask(CustomNode): '''Binarize (threshold) a mask.''' @classmethod @@ -36,7 +38,7 @@ class BinarizeMask: return (s,) -class ImageCutout: +class ImageCutout(CustomNode): '''Perform basic image cutout (adds alpha channel from mask).''' @classmethod @@ -65,4 +67,3 @@ NODE_CLASS_MAPPINGS = { "BinarizeMask": BinarizeMask, "ImageCutout": ImageCutout, } - diff --git a/comfy_extras/nodes/pixel_art/palettize.py b/comfy_extras/nodes/pixel_art/palettize.py index b092997b6..19fc68190 100644 --- a/comfy_extras/nodes/pixel_art/palettize.py +++ b/comfy_extras/nodes/pixel_art/palettize.py @@ -9,6 +9,8 @@ import numpy as np from PIL import Image import torch +from comfy.nodes.package_typing import CustomNode + PALETTES_PATH = os.path.join(os.path.dirname(__file__), '../../..', 'palettes') PAL_EXT = '.png' @@ -18,6 +20,7 @@ QUANTIZE_METHODS = { 'fast_octree': Image.Quantize.FASTOCTREE, } + # Determine optimal number of colors. # FROM: astropulse/sd-palettize # @@ -59,8 +62,10 @@ def determine_best_k(image, max_k, quantize_method=Image.Quantize.FASTOCTREE): return best_k + palette_warned = False + def list_palettes(): global palette_warned palettes = [] @@ -72,7 +77,8 @@ def list_palettes(): pass if not palettes and not palette_warned: palette_warned = True - print("ImagePalettize warning: no fixed palettes found. You can put these in the palettes/ directory below the ComfyUI root.") + print( + "ImagePalettize warning: no fixed palettes found. You can put these in the palettes/ directory below the ComfyUI root.") return palettes @@ -90,7 +96,7 @@ def load_palette(name): return get_image_colors(Image.open(os.path.join(PALETTES_PATH, name + PAL_EXT))) -class ImagePalettize: +class ImagePalettize(CustomNode): @classmethod def INPUT_TYPES(s): return { @@ -122,7 +128,7 @@ class ImagePalettize: if palette not in {'auto_best_k', 'auto_fixed_k'}: pal_entries = load_palette(palette) k = len(pal_entries) // 3 - pal_img = Image.new('P', (1, 1)) # image size doesn't matter it only holds the palette + pal_img = Image.new('P', (1, 1)) # image size doesn't matter it only holds the palette pal_img.putpalette(pal_entries) results = [] @@ -143,7 +149,7 @@ class ImagePalettize: results.append(np.array(i)) result = np.array(results).astype(np.float32) / 255.0 - return (torch.from_numpy(result), ) + return (torch.from_numpy(result),) NODE_CLASS_MAPPINGS = { diff --git a/comfy_extras/nodes/pixel_art/patterngen.py b/comfy_extras/nodes/pixel_art/patterngen.py index 76d588af6..d35bd24c6 100644 --- a/comfy_extras/nodes/pixel_art/patterngen.py +++ b/comfy_extras/nodes/pixel_art/patterngen.py @@ -9,9 +9,12 @@ import numpy as np from PIL import Image import torch +from comfy.nodes.package_typing import CustomNode + MAX_RESOLUTION = 8192 -class ImageSolidColor: + +class ImageSolidColor(CustomNode): @classmethod def INPUT_TYPES(s): return { @@ -32,7 +35,7 @@ class ImageSolidColor: def render(self, width, height, r, g, b): color = torch.tensor([r, g, b]) / 255.0 result = color.expand(1, height, width, 3) - return (result, ) + return (result,) NODE_CLASS_MAPPINGS = { @@ -42,4 +45,3 @@ NODE_CLASS_MAPPINGS = { NODE_DISPLAY_NAME_MAPPINGS = { "ImageSolidColor": "Solid Color", } - diff --git a/comfy_extras/nodes/pixel_art/remove_background.py b/comfy_extras/nodes/pixel_art/remove_background.py index 6ba498c5b..979256d94 100644 --- a/comfy_extras/nodes/pixel_art/remove_background.py +++ b/comfy_extras/nodes/pixel_art/remove_background.py @@ -7,11 +7,12 @@ import numpy as np import rembg import torch +from comfy.nodes.package_typing import CustomNode MODELS = rembg.sessions.sessions_names -class ImageRemoveBackground: +class ImageRemoveBackground(CustomNode): '''Remove background from image (adds an alpha channel)''' @classmethod @@ -59,17 +60,18 @@ class ImageRemoveBackground: i = 255. * i.cpu().numpy() i = np.clip(i, 0, 255).astype(np.uint8) i = rembg.remove(i, - alpha_matting=(alpha_matting == "enabled"), - alpha_matting_foreground_threshold=am_foreground_thr, - alpha_matting_background_threshold=am_background_thr, - alpha_matting_erode_size=am_erode_size, - session=session, - ) + alpha_matting=(alpha_matting == "enabled"), + alpha_matting_foreground_threshold=am_foreground_thr, + alpha_matting_background_threshold=am_background_thr, + alpha_matting_erode_size=am_erode_size, + session=session, + ) results.append(i.astype(np.float32) / 255.0) s = torch.from_numpy(np.array(results)) return (s,) + class ImageEstimateForegroundMask: ''' Return a mask of which pixels are estimated to belong to foreground. diff --git a/requirements.txt b/requirements.txt index 6cdf3380b..0172d926b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -29,5 +29,4 @@ diffusers>=0.16.1 protobuf==3.20.3 rembg psutil -https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.40.1.post1-py3-none-win_amd64.whl; platform_system == "Windows" -bitsandbytes; platform_system != "Windows" \ No newline at end of file +bitsandbytes>=0.40.1 \ No newline at end of file diff --git a/setup.py b/setup.py index 86a62dcc4..5fa71187e 100644 --- a/setup.py +++ b/setup.py @@ -54,6 +54,11 @@ Packages that should have a specific option set when a GPU accelerator is presen """ gpu_accelerated_packages = {"rembg": "rembg[gpu]"} +""" +The URL to the bitsandbytes package to use on Windows +""" +bitsandbytes_windows = "https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.40.1.post1-py3-none-win_amd64.whl" + """ Indicates if we're installing an editable (develop) mode package """ @@ -152,6 +157,8 @@ def dependencies() -> [str]: requirement = InstallRequirement(Requirement(package), comes_from=f"{package_name}=={version}") candidate = finder.find_best_candidate(requirement.name, requirement.specifier) if candidate.best_candidate is not None: + if requirement.name == "bitsandbytes" and platform.system().lower() == 'windows': + _dependencies[i] = f"{requirement.name} @ {bitsandbytes_windows}" if gpu_accelerated and requirement.name in gpu_accelerated_packages: _dependencies[i] = gpu_accelerated_packages[requirement.name] if any([url in candidate.best_candidate.link.url for url in _alternative_indices]):