Known Flux controlnet models

This commit is contained in:
doctorpangloss 2024-11-22 18:00:29 -08:00
parent 4b77c4941c
commit b1ad9cad37
5 changed files with 684 additions and 13 deletions

View File

@ -1,22 +1,25 @@
from .component_model import files
from .model_management import load_models_gpu
from .utils import load_torch_file, transformers_convert, state_dict_prefix_replace
import torch
import json
import logging
from . import ops
from . import model_patcher
from . import model_management
import torch
from . import clip_model
from . import model_management
from . import model_patcher
from . import ops
from .component_model import files
from .model_management import load_models_gpu
from .utils import load_torch_file, transformers_convert, state_dict_prefix_replace
class Output:
def __getitem__(self, key):
return getattr(self, key)
def __setitem__(self, key, item):
setattr(self, key, item)
def clip_preprocess(image, size=224, mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]):
mean = torch.tensor(mean, device=image.device, dtype=image.dtype)
std = torch.tensor(std, device=image.device, dtype=image.dtype)
@ -24,11 +27,12 @@ def clip_preprocess(image, size=224, mean=[0.48145466, 0.4578275, 0.40821073], s
if not (image.shape[2] == size and image.shape[3] == size):
scale = (size / min(image.shape[2], image.shape[3]))
image = torch.nn.functional.interpolate(image, size=(round(scale * image.shape[2]), round(scale * image.shape[3])), mode="bicubic", antialias=True)
h = (image.shape[2] - size)//2
w = (image.shape[3] - size)//2
image = image[:,:,h:h+size,w:w+size]
h = (image.shape[2] - size) // 2
w = (image.shape[3] - size) // 2
image = image[:, :, h:h + size, w:w + size]
image = torch.clip((255. * image), 0, 255).round() / 255.0
return (image - mean.view([3,1,1])) / std.view([3,1,1])
return (image - mean.view([3, 1, 1])) / std.view([3, 1, 1])
class ClipVisionModel():
def __init__(self, json_config: dict | str):
@ -53,6 +57,7 @@ class ClipVisionModel():
self.model.eval()
self.patcher = model_patcher.ModelPatcher(self.model, load_device=self.load_device, offload_device=offload_device)
def load_sd(self, sd):
return self.model.load_state_dict(sd, strict=False)
@ -70,6 +75,7 @@ class ClipVisionModel():
outputs["penultimate_hidden_states"] = out[1].to(model_management.intermediate_device())
return outputs
def convert_to_transformers(sd, prefix):
sd_k = sd.keys()
if "{}transformer.resblocks.0.attn.in_proj_weight".format(prefix) in sd_k:
@ -96,6 +102,7 @@ def convert_to_transformers(sd, prefix):
sd = state_dict_prefix_replace(sd, replace_prefix)
return sd
def load_clipvision_from_sd(sd, prefix="", convert_keys=False):
if convert_keys:
sd = convert_to_transformers(sd, prefix)
@ -105,7 +112,7 @@ def load_clipvision_from_sd(sd, prefix="", convert_keys=False):
json_config = files.get_path_as_dict(None, "clip_vision_config_h.json")
elif "vision_model.encoder.layers.22.layer_norm1.weight" in sd:
if sd["vision_model.encoder.layers.0.layer_norm1.weight"].shape[0] == 1152:
json_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "clip_vision_siglip_384.json")
json_config = files.get_path_as_dict(None, "clip_vision_siglip_384.json")
elif sd["vision_model.embeddings.position_embedding.weight"].shape[0] == 577:
json_config = files.get_path_as_dict(None, "clip_vision_config_vitl_336.json")
else:
@ -124,6 +131,7 @@ def load_clipvision_from_sd(sd, prefix="", convert_keys=False):
sd.pop(k)
return clip
def load(ckpt_path):
sd = load_torch_file(ckpt_path)
if "visual.transformer.resblocks.0.attn.in_proj_weight" in sd:

View File

@ -283,7 +283,8 @@ KNOWN_GLIGEN_MODELS: Final[KnownDownloadables] = KnownDownloadables([
], folder_name="gligen")
KNOWN_CLIP_VISION_MODELS: Final[KnownDownloadables] = KnownDownloadables([
HuggingFile("comfyanonymous/clip_vision_g", "clip_vision_g.safetensors")
HuggingFile("comfyanonymous/clip_vision_g", "clip_vision_g.safetensors"),
HuggingFile("Comfy-Org/sigclip_vision_384", "sigclip_vision_patch14_384.safetensors"),
], folder_name="clip_vision")
KNOWN_LORAS: Final[KnownDownloadables] = KnownDownloadables([
@ -292,6 +293,8 @@ KNOWN_LORAS: Final[KnownDownloadables] = KnownDownloadables([
CivitFile(model_id=47085, model_version_id=55199, filename="GoodHands-beta2.safetensors"),
HuggingFile("ByteDance/Hyper-SD", "Hyper-SDXL-12steps-CFG-lora.safetensors"),
HuggingFile("ByteDance/Hyper-SD", "Hyper-SD15-12steps-CFG-lora.safetensors"),
HuggingFile("black-forest-labs/FLUX.1-Canny-dev-lora", "flux1-canny-dev-lora.safetensors"),
HuggingFile("black-forest-labs/FLUX.1-Depth-dev-lora", "flux1-depth-dev-lora.safetensors"),
], folder_name="loras")
KNOWN_CONTROLNETS: Final[KnownDownloadables] = KnownDownloadables([
@ -434,6 +437,9 @@ KNOWN_UNET_MODELS: Final[KnownDownloadables] = KnownDownloadables([
HuggingFile("ByteDance/Hyper-SD", "Hyper-SDXL-1step-Unet-Comfyui.fp16.safetensors"),
HuggingFile("black-forest-labs/FLUX.1-schnell", "flux1-schnell.safetensors"),
HuggingFile("black-forest-labs/FLUX.1-dev", "flux1-dev.safetensors"),
HuggingFile("black-forest-labs/FLUX.1-Fill-dev", "flux1-fill-dev.safetensors"),
HuggingFile("black-forest-labs/FLUX.1-Canny-dev", "flux1-canny-dev.safetensors"),
HuggingFile("black-forest-labs/FLUX.1-Depth-dev", "flux1-depth-dev.safetensors"),
HuggingFile("Kijai/flux-fp8", "flux1-dev-fp8.safetensors"),
HuggingFile("Kijai/flux-fp8", "flux1-schnell-fp8.safetensors"),
HuggingFile("Comfy-Org/mochi_preview_repackaged", "split_files/diffusion_models/mochi_preview_bf16.safetensors"),
@ -452,6 +458,10 @@ KNOWN_CLIP_MODELS: Final[KnownDownloadables] = KnownDownloadables([
HuggingFile("zer0int/CLIP-GmP-ViT-L-14", "ViT-L-14-TEXT-detail-improved-hiT-GmP-TE-only-HF.safetensors"),
], folder_name="clip")
KNOWN_STYLE_MODELS: Final[KnownDownloadables] = KnownDownloadables([
HuggingFile("black-forest-labs/FLUX.1-Redux-dev", "flux1-redux-dev.safetensors"),
], folder_name="style_models")
_known_models_db: list[KnownDownloadables] = [
KNOWN_CHECKPOINTS,
KNOWN_VAES,
@ -466,6 +476,7 @@ _known_models_db: list[KnownDownloadables] = [
KNOWN_IMAGE_ONLY_CHECKPOINTS,
KNOWN_UNCLIP_CHECKPOINTS,
KNOWN_UPSCALERS,
KNOWN_STYLE_MODELS,
]

View File

@ -0,0 +1,193 @@
{
"3": {
"inputs": {
"seed": 432318046789205,
"steps": 20,
"cfg": 1,
"sampler_name": "euler",
"scheduler": "normal",
"denoise": 1,
"model": [
"31",
0
],
"positive": [
"35",
0
],
"negative": [
"35",
1
],
"latent_image": [
"35",
2
]
},
"class_type": "KSampler",
"_meta": {
"title": "KSampler"
}
},
"7": {
"inputs": {
"text": "",
"clip": [
"34",
0
]
},
"class_type": "CLIPTextEncode",
"_meta": {
"title": "CLIP Text Encode (Negative Prompt)"
}
},
"8": {
"inputs": {
"samples": [
"3",
0
],
"vae": [
"32",
0
]
},
"class_type": "VAEDecode",
"_meta": {
"title": "VAE Decode"
}
},
"9": {
"inputs": {
"filename_prefix": "ComfyUI",
"images": [
"8",
0
]
},
"class_type": "SaveImage",
"_meta": {
"title": "Save Image"
}
},
"18": {
"inputs": {
"low_threshold": 0.15,
"high_threshold": 0.3,
"image": [
"36",
0
]
},
"class_type": "Canny",
"_meta": {
"title": "Canny"
}
},
"19": {
"inputs": {
"images": [
"18",
0
]
},
"class_type": "PreviewImage",
"_meta": {
"title": "Preview Image"
}
},
"23": {
"inputs": {
"text": "cute anime girl with massive fluffy fennec ears and a big fluffy tail blonde messy long hair blue eyes wearing a pink sweater and jeans",
"clip": [
"34",
0
]
},
"class_type": "CLIPTextEncode",
"_meta": {
"title": "CLIP Text Encode (Positive Prompt)"
}
},
"26": {
"inputs": {
"guidance": 30,
"conditioning": [
"23",
0
]
},
"class_type": "FluxGuidance",
"_meta": {
"title": "FluxGuidance"
}
},
"31": {
"inputs": {
"unet_name": "flux1-canny-dev.safetensors",
"weight_dtype": "default"
},
"class_type": "UNETLoader",
"_meta": {
"title": "Load Diffusion Model"
}
},
"32": {
"inputs": {
"vae_name": "ae.safetensors"
},
"class_type": "VAELoader",
"_meta": {
"title": "Load VAE"
}
},
"34": {
"inputs": {
"clip_name1": "clip_l.safetensors",
"clip_name2": "t5xxl_fp16.safetensors",
"type": "flux"
},
"class_type": "DualCLIPLoader",
"_meta": {
"title": "DualCLIPLoader"
}
},
"35": {
"inputs": {
"positive": [
"26",
0
],
"negative": [
"7",
0
],
"vae": [
"32",
0
],
"pixels": [
"18",
0
]
},
"class_type": "InstructPixToPixConditioning",
"_meta": {
"title": "InstructPixToPixConditioning"
}
},
"36": {
"inputs": {
"value": "https://comfyanonymous.github.io/ComfyUI_examples/flux/flux_fill_inpaint_example.png",
"name": "",
"title": "",
"description": "",
"__required": true
},
"class_type": "ImageRequestParameter",
"_meta": {
"title": "ImageRequestParameter"
}
}
}

View File

@ -0,0 +1,201 @@
{
"3": {
"inputs": {
"seed": 164211176398261,
"steps": 20,
"cfg": 1,
"sampler_name": "euler",
"scheduler": "normal",
"denoise": 1,
"model": [
"39",
0
],
"positive": [
"38",
0
],
"negative": [
"38",
1
],
"latent_image": [
"38",
2
]
},
"class_type": "KSampler",
"_meta": {
"title": "KSampler"
}
},
"7": {
"inputs": {
"text": "",
"clip": [
"34",
0
]
},
"class_type": "CLIPTextEncode",
"_meta": {
"title": "CLIP Text Encode (Negative Prompt)"
}
},
"8": {
"inputs": {
"samples": [
"3",
0
],
"vae": [
"32",
0
]
},
"class_type": "VAEDecode",
"_meta": {
"title": "VAE Decode"
}
},
"9": {
"inputs": {
"filename_prefix": "ComfyUI",
"images": [
"8",
0
]
},
"class_type": "SaveImage",
"_meta": {
"title": "Save Image"
}
},
"23": {
"inputs": {
"text": "beautiful scenery",
"clip": [
"34",
0
]
},
"class_type": "CLIPTextEncode",
"_meta": {
"title": "CLIP Text Encode (Positive Prompt)"
}
},
"26": {
"inputs": {
"guidance": 30,
"conditioning": [
"23",
0
]
},
"class_type": "FluxGuidance",
"_meta": {
"title": "FluxGuidance"
}
},
"31": {
"inputs": {
"unet_name": "flux1-fill-dev.safetensors",
"weight_dtype": "default"
},
"class_type": "UNETLoader",
"_meta": {
"title": "Load Diffusion Model"
}
},
"32": {
"inputs": {
"vae_name": "ae.safetensors"
},
"class_type": "VAELoader",
"_meta": {
"title": "Load VAE"
}
},
"34": {
"inputs": {
"clip_name1": "clip_l.safetensors",
"clip_name2": "t5xxl_fp16.safetensors",
"type": "flux"
},
"class_type": "DualCLIPLoader",
"_meta": {
"title": "DualCLIPLoader"
}
},
"38": {
"inputs": {
"noise_mask": false,
"positive": [
"26",
0
],
"negative": [
"7",
0
],
"vae": [
"32",
0
],
"pixels": [
"44",
0
],
"mask": [
"44",
1
]
},
"class_type": "InpaintModelConditioning",
"_meta": {
"title": "InpaintModelConditioning"
}
},
"39": {
"inputs": {
"model": [
"31",
0
]
},
"class_type": "DifferentialDiffusion",
"_meta": {
"title": "Differential Diffusion"
}
},
"44": {
"inputs": {
"left": 400,
"top": 0,
"right": 400,
"bottom": 400,
"feathering": 24,
"image": [
"45",
0
]
},
"class_type": "ImagePadForOutpaint",
"_meta": {
"title": "Pad Image for Outpainting"
}
},
"45": {
"inputs": {
"value": "https://comfyanonymous.github.io/ComfyUI_examples/flux/flux_fill_inpaint_example.png",
"name": "",
"title": "",
"description": "",
"__required": true
},
"class_type": "ImageRequestParameter",
"_meta": {
"title": "ImageRequestParameter"
}
}
}

View File

@ -0,0 +1,258 @@
{
"6": {
"inputs": {
"text": "cute anime girl with massive fluffy fennec ears",
"clip": [
"11",
0
]
},
"class_type": "CLIPTextEncode",
"_meta": {
"title": "CLIP Text Encode (Positive Prompt)"
}
},
"8": {
"inputs": {
"samples": [
"13",
0
],
"vae": [
"10",
0
]
},
"class_type": "VAEDecode",
"_meta": {
"title": "VAE Decode"
}
},
"9": {
"inputs": {
"filename_prefix": "ComfyUI",
"images": [
"8",
0
]
},
"class_type": "SaveImage",
"_meta": {
"title": "Save Image"
}
},
"10": {
"inputs": {
"vae_name": "ae.safetensors"
},
"class_type": "VAELoader",
"_meta": {
"title": "Load VAE"
}
},
"11": {
"inputs": {
"clip_name1": "t5xxl_fp16.safetensors",
"clip_name2": "clip_l.safetensors",
"type": "flux"
},
"class_type": "DualCLIPLoader",
"_meta": {
"title": "DualCLIPLoader"
}
},
"12": {
"inputs": {
"unet_name": "flux1-dev.safetensors",
"weight_dtype": "default"
},
"class_type": "UNETLoader",
"_meta": {
"title": "Load Diffusion Model"
}
},
"13": {
"inputs": {
"noise": [
"25",
0
],
"guider": [
"22",
0
],
"sampler": [
"16",
0
],
"sigmas": [
"17",
0
],
"latent_image": [
"27",
0
]
},
"class_type": "SamplerCustomAdvanced",
"_meta": {
"title": "SamplerCustomAdvanced"
}
},
"16": {
"inputs": {
"sampler_name": "euler"
},
"class_type": "KSamplerSelect",
"_meta": {
"title": "KSamplerSelect"
}
},
"17": {
"inputs": {
"scheduler": "simple",
"steps": 1,
"denoise": 1,
"model": [
"30",
0
]
},
"class_type": "BasicScheduler",
"_meta": {
"title": "BasicScheduler"
}
},
"22": {
"inputs": {
"model": [
"30",
0
],
"conditioning": [
"41",
0
]
},
"class_type": "BasicGuider",
"_meta": {
"title": "BasicGuider"
}
},
"25": {
"inputs": {
"noise_seed": 895731728473880
},
"class_type": "RandomNoise",
"_meta": {
"title": "RandomNoise"
}
},
"26": {
"inputs": {
"guidance": 3.5,
"conditioning": [
"6",
0
]
},
"class_type": "FluxGuidance",
"_meta": {
"title": "FluxGuidance"
}
},
"27": {
"inputs": {
"width": 1024,
"height": 1024,
"batch_size": 1
},
"class_type": "EmptySD3LatentImage",
"_meta": {
"title": "EmptySD3LatentImage"
}
},
"30": {
"inputs": {
"max_shift": 1.15,
"base_shift": 0.5,
"width": 1024,
"height": 1024,
"model": [
"12",
0
]
},
"class_type": "ModelSamplingFlux",
"_meta": {
"title": "ModelSamplingFlux"
}
},
"38": {
"inputs": {
"clip_name": "sigclip_vision_patch14_384.safetensors"
},
"class_type": "CLIPVisionLoader",
"_meta": {
"title": "Load CLIP Vision"
}
},
"39": {
"inputs": {
"clip_vision": [
"38",
0
],
"image": [
"44",
0
]
},
"class_type": "CLIPVisionEncode",
"_meta": {
"title": "CLIP Vision Encode"
}
},
"41": {
"inputs": {
"conditioning": [
"26",
0
],
"style_model": [
"42",
0
],
"clip_vision_output": [
"39",
0
]
},
"class_type": "StyleModelApply",
"_meta": {
"title": "Apply Style Model"
}
},
"42": {
"inputs": {
"style_model_name": "flux1-redux-dev.safetensors"
},
"class_type": "StyleModelLoader",
"_meta": {
"title": "Load Style Model"
}
},
"44": {
"inputs": {
"value": "https://comfyanonymous.github.io/ComfyUI_examples/flux/flux_fill_inpaint_example.png",
"name": "",
"title": "",
"description": "",
"__required": true
},
"class_type": "ImageRequestParameter",
"_meta": {
"title": "ImageRequestParameter"
}
}
}