Merge branch 'master' of github.com:comfyanonymous/ComfyUI

This commit is contained in:
doctorpangloss 2024-02-26 08:51:07 -08:00
commit bd5073caf2
22 changed files with 160 additions and 290 deletions

View File

@ -1,6 +1,9 @@
import pygit2
from datetime import datetime
import sys
import os
import shutil
import filecmp
def pull(repo, remote_name='origin', branch='master'):
for remote in repo.remotes:
@ -42,7 +45,8 @@ def pull(repo, remote_name='origin', branch='master'):
raise AssertionError('Unknown merge analysis result')
pygit2.option(pygit2.GIT_OPT_SET_OWNER_VALIDATION, 0)
repo = pygit2.Repository(str(sys.argv[1]))
repo_path = str(sys.argv[1])
repo = pygit2.Repository(repo_path)
ident = pygit2.Signature('comfyui', 'comfy@ui')
try:
print("stashing current changes")
@ -51,7 +55,10 @@ except KeyError:
print("nothing to stash")
backup_branch_name = 'backup_branch_{}'.format(datetime.today().strftime('%Y-%m-%d_%H_%M_%S'))
print("creating backup branch: {}".format(backup_branch_name))
repo.branches.local.create(backup_branch_name, repo.head.peel())
try:
repo.branches.local.create(backup_branch_name, repo.head.peel())
except:
pass
print("checking out master branch")
branch = repo.lookup_branch('master')
@ -63,3 +70,41 @@ pull(repo)
print("Done!")
self_update = True
if len(sys.argv) > 2:
self_update = '--skip_self_update' not in sys.argv
update_py_path = os.path.realpath(__file__)
repo_update_py_path = os.path.join(repo_path, ".ci/update_windows/update.py")
cur_path = os.path.dirname(update_py_path)
req_path = os.path.join(cur_path, "current_requirements.txt")
repo_req_path = os.path.join(repo_path, "requirements.txt")
def files_equal(file1, file2):
try:
return filecmp.cmp(file1, file2, shallow=False)
except:
return False
def file_size(f):
try:
return os.path.getsize(f)
except:
return 0
if self_update and not files_equal(update_py_path, repo_update_py_path) and file_size(repo_update_py_path) > 10:
shutil.copy(repo_update_py_path, os.path.join(cur_path, "update_new.py"))
exit()
if not os.path.exists(req_path) or not files_equal(repo_req_path, req_path):
import subprocess
try:
subprocess.check_call([sys.executable, '-m', 'pip', 'install', '-r', repo_req_path])
shutil.copy(repo_req_path, req_path)
except:
pass

View File

@ -1,2 +1,8 @@
@echo off
..\python_embeded\python.exe .\update.py ..\ComfyUI\
pause
if exist update_new.py (
move /y update_new.py update.py
echo Running updater again since it got updated.
..\python_embeded\python.exe .\update.py ..\ComfyUI\ --skip_self_update
)
if "%~1"=="" pause

View File

@ -1,3 +0,0 @@
..\python_embeded\python.exe .\update.py ..\ComfyUI\
..\python_embeded\python.exe -s -m pip install --upgrade torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu117 xformers -r ../ComfyUI/requirements.txt pygit2
pause

View File

@ -1,11 +0,0 @@
@echo off
..\python_embeded\python.exe .\update.py ..\ComfyUI\
echo
echo This will try to update pytorch and all python dependencies, if you get an error wait for pytorch/xformers to fix their stuff
echo You should not be running this anyways unless you really have to
echo
echo If you just want to update normally, close this and run update_comfyui.bat instead.
echo
pause
..\python_embeded\python.exe -s -m pip install --upgrade torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu118 xformers -r ../ComfyUI/requirements.txt pygit2
pause

View File

@ -1,71 +0,0 @@
name: "Windows Release cu118 dependencies"
on:
workflow_dispatch:
# push:
# branches:
# - master
jobs:
build_dependencies:
env:
# you need at least cuda 5.0 for some of the stuff compiled here.
TORCH_CUDA_ARCH_LIST: "5.0+PTX 6.0 6.1 7.0 7.5 8.0 8.6 8.9"
FORCE_CUDA: 1
MAX_JOBS: 1 # will crash otherwise
DISTUTILS_USE_SDK: 1 # otherwise distutils will complain on windows about multiple versions of msvc
XFORMERS_BUILD_TYPE: "Release"
runs-on: windows-latest
steps:
- name: Cache Built Dependencies
uses: actions/cache@v3
id: cache-cu118_python_stuff
with:
path: cu118_python_deps.tar
key: ${{ runner.os }}-build-cu118
- if: steps.cache-cu118_python_stuff.outputs.cache-hit != 'true'
uses: actions/checkout@v3
- if: steps.cache-cu118_python_stuff.outputs.cache-hit != 'true'
uses: actions/setup-python@v4
with:
python-version: '3.10.9'
- if: steps.cache-cu118_python_stuff.outputs.cache-hit != 'true'
uses: comfyanonymous/cuda-toolkit@test
id: cuda-toolkit
with:
cuda: '11.8.0'
# copied from xformers github
- name: Setup MSVC
uses: ilammy/msvc-dev-cmd@v1
- name: Configure Pagefile
# windows runners will OOM with many CUDA architectures
# we cheat here with a page file
uses: al-cheb/configure-pagefile-action@v1.3
with:
minimum-size: 2GB
# really unfortunate: https://github.com/ilammy/msvc-dev-cmd#name-conflicts-with-shell-bash
- name: Remove link.exe
shell: bash
run: rm /usr/bin/link
- if: steps.cache-cu118_python_stuff.outputs.cache-hit != 'true'
shell: bash
run: |
python -m pip wheel --no-cache-dir torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu118 -r requirements.txt pygit2 -w ./temp_wheel_dir
python -m pip install --no-cache-dir ./temp_wheel_dir/*
echo installed basic
git clone --recurse-submodules https://github.com/facebookresearch/xformers.git
cd xformers
python -m pip install --no-cache-dir wheel setuptools twine
echo building xformers
python setup.py bdist_wheel -d ../temp_wheel_dir/
cd ..
rm -rf xformers
ls -lah temp_wheel_dir
mv temp_wheel_dir cu118_python_deps
tar cf cu118_python_deps.tar cu118_python_deps

View File

@ -1,37 +0,0 @@
name: "Windows Release cu118 dependencies 2"
on:
workflow_dispatch:
inputs:
xformers:
description: 'xformers version'
required: true
type: string
default: "xformers"
# push:
# branches:
# - master
jobs:
build_dependencies:
runs-on: windows-latest
steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v4
with:
python-version: '3.10.9'
- shell: bash
run: |
python -m pip wheel --no-cache-dir torch torchvision torchaudio ${{ inputs.xformers }} --extra-index-url https://download.pytorch.org/whl/cu118 -r requirements.txt pygit2 -w ./temp_wheel_dir
python -m pip install --no-cache-dir ./temp_wheel_dir/*
echo installed basic
ls -lah temp_wheel_dir
mv temp_wheel_dir cu118_python_deps
tar cf cu118_python_deps.tar cu118_python_deps
- uses: actions/cache/save@v3
with:
path: cu118_python_deps.tar
key: ${{ runner.os }}-build-cu118

View File

@ -1,79 +0,0 @@
name: "Windows Release cu118 packaging"
on:
workflow_dispatch:
# push:
# branches:
# - master
jobs:
package_comfyui:
permissions:
contents: "write"
packages: "write"
pull-requests: "read"
runs-on: windows-latest
steps:
- uses: actions/cache/restore@v3
id: cache
with:
path: cu118_python_deps.tar
key: ${{ runner.os }}-build-cu118
- shell: bash
run: |
mv cu118_python_deps.tar ../
cd ..
tar xf cu118_python_deps.tar
pwd
ls
- uses: actions/checkout@v3
with:
fetch-depth: 0
persist-credentials: false
- shell: bash
run: |
cd ..
cp -r ComfyUI ComfyUI_copy
curl https://www.python.org/ftp/python/3.10.9/python-3.10.9-embed-amd64.zip -o python_embeded.zip
unzip python_embeded.zip -d python_embeded
cd python_embeded
echo 'import site' >> ./python310._pth
curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py
./python.exe get-pip.py
./python.exe -s -m pip install ../cu118_python_deps/*
sed -i '1i../ComfyUI' ./python310._pth
cd ..
git clone https://github.com/comfyanonymous/taesd
cp taesd/*.pth ./ComfyUI_copy/models/vae_approx/
mkdir ComfyUI_windows_portable
mv python_embeded ComfyUI_windows_portable
mv ComfyUI_copy ComfyUI_windows_portable/ComfyUI
cd ComfyUI_windows_portable
mkdir update
cp -r ComfyUI/.ci/update_windows/* ./update/
cp -r ComfyUI/.ci/update_windows_cu118/* ./update/
cp -r ComfyUI/.ci/windows_base_files/* ./
cd ..
"C:\Program Files\7-Zip\7z.exe" a -t7z -m0=lzma -mx=8 -mfb=64 -md=32m -ms=on -mf=BCJ2 ComfyUI_windows_portable.7z ComfyUI_windows_portable
mv ComfyUI_windows_portable.7z ComfyUI/new_ComfyUI_windows_portable_nvidia_cu118_or_cpu.7z
cd ComfyUI_windows_portable
python_embeded/python.exe -s ComfyUI/main.py --quick-test-for-ci --cpu
ls
- name: Upload binaries to release
uses: svenstaro/upload-release-action@v2
with:
repo_token: ${{ secrets.GITHUB_TOKEN }}
file: new_ComfyUI_windows_portable_nvidia_cu118_or_cpu.7z
tag: "latest"
overwrite: true

View File

@ -41,10 +41,9 @@ jobs:
- shell: bash
run: |
echo "@echo off
..\python_embeded\python.exe .\update.py ..\ComfyUI\\
call update_comfyui.bat nopause
echo -
echo This will try to update pytorch and all python dependencies, if you get an error wait for pytorch/xformers to fix their stuff
echo You should not be running this anyways unless you really have to
echo This will try to update pytorch and all python dependencies.
echo -
echo If you just want to update normally, close this and run update_comfyui.bat instead.
echo -

View File

@ -68,7 +68,7 @@ jobs:
cp -r ComfyUI/.ci/update_windows/* ./update/
cp -r ComfyUI/.ci/windows_base_files/* ./
echo "..\python_embeded\python.exe .\update.py ..\ComfyUI\\
echo "call update_comfyui.bat nopause
..\python_embeded\python.exe -s -m pip install --upgrade --pre torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/nightly/cu${{ inputs.cu }} -r ../ComfyUI/requirements.txt pygit2
pause" > ./update/update_comfyui_and_python_dependencies.bat
cd ..

View File

@ -119,6 +119,9 @@ class CLIPTextModel(torch.nn.Module):
super().__init__()
self.num_layers = config_dict["num_hidden_layers"]
self.text_model = CLIPTextModel_(config_dict, dtype, device, operations)
embed_dim = config_dict["hidden_size"]
self.text_projection = operations.Linear(embed_dim, embed_dim, bias=False, dtype=dtype, device=device)
self.text_projection.weight.copy_(torch.eye(embed_dim))
self.dtype = dtype
def get_input_embeddings(self):
@ -128,7 +131,10 @@ class CLIPTextModel(torch.nn.Module):
self.text_model.embeddings.token_embedding = embeddings
def forward(self, *args, **kwargs):
return self.text_model(*args, **kwargs)
x = self.text_model(*args, **kwargs)
out = self.text_projection(x[2])
return (x[0], x[1], out, x[2])
class CLIPVisionEmbeddings(torch.nn.Module):
def __init__(self, embed_dim, num_channels=3, patch_size=14, image_size=224, dtype=None, device=None, operations=None):

View File

@ -712,11 +712,11 @@ class PromptServer(ExecutorToClientProgress):
for name, dir in nodes.EXTENSION_WEB_DIRS.items():
self.app.add_routes([
web.static('/extensions/' + quote(name), dir, follow_symlinks=True),
web.static('/extensions/' + quote(name), dir),
])
self.app.add_routes([
web.static('/', self.web_root, follow_symlinks=True),
web.static('/', self.web_root),
])
def get_queue_info(self):

View File

@ -358,9 +358,6 @@ class UniPC:
thresholding=False,
max_val=1.,
variant='bh1',
noise_mask=None,
masked_image=None,
noise=None,
):
"""Construct a UniPC.
@ -372,9 +369,6 @@ class UniPC:
self.predict_x0 = predict_x0
self.thresholding = thresholding
self.max_val = max_val
self.noise_mask = noise_mask
self.masked_image = masked_image
self.noise = noise
def dynamic_thresholding_fn(self, x0, t=None):
"""
@ -391,10 +385,7 @@ class UniPC:
"""
Return the noise prediction model.
"""
if self.noise_mask is not None:
return self.model(x, t) * self.noise_mask
else:
return self.model(x, t)
return self.model(x, t)
def data_prediction_fn(self, x, t):
"""
@ -409,8 +400,6 @@ class UniPC:
s = torch.quantile(torch.abs(x0).reshape((x0.shape[0], -1)), p, dim=1)
s = expand_dims(torch.maximum(s, self.max_val * torch.ones_like(s).to(s.device)), dims)
x0 = torch.clamp(x0, -s, s) / s
if self.noise_mask is not None:
x0 = x0 * self.noise_mask + (1. - self.noise_mask) * self.masked_image
return x0
def model_fn(self, x, t):
@ -723,8 +712,6 @@ class UniPC:
assert timesteps.shape[0] - 1 == steps
# with torch.no_grad():
for step_index in trange(steps, disable=disable_pbar):
if self.noise_mask is not None:
x = x * self.noise_mask + (1. - self.noise_mask) * (self.masked_image * self.noise_schedule.marginal_alpha(timesteps[step_index]) + self.noise * self.noise_schedule.marginal_std(timesteps[step_index]))
if step_index == 0:
vec_t = timesteps[0].expand((x.shape[0]))
model_prev_list = [self.model_fn(x, vec_t)]
@ -766,7 +753,7 @@ class UniPC:
model_x = self.model_fn(x, vec_t)
model_prev_list[-1] = model_x
if callback is not None:
callback(step_index, model_prev_list[-1], x, steps)
callback({'x': x, 'i': step_index, 'denoised': model_prev_list[-1]})
else:
raise NotImplementedError()
# if denoise_to_zero:
@ -858,7 +845,7 @@ def predict_eps_sigma(model, input, sigma_in, **kwargs):
return (input - model(input, sigma_in, **kwargs)) / sigma
def sample_unipc(model, noise, image, sigmas, max_denoise, extra_args=None, callback=None, disable=False, noise_mask=None, variant='bh1'):
def sample_unipc(model, noise, sigmas, extra_args=None, callback=None, disable=False, variant='bh1'):
timesteps = sigmas.clone()
if sigmas[-1] == 0:
timesteps = sigmas[:]
@ -867,16 +854,7 @@ def sample_unipc(model, noise, image, sigmas, max_denoise, extra_args=None, call
timesteps = sigmas.clone()
ns = SigmaConvert()
if image is not None:
img = image * ns.marginal_alpha(timesteps[0])
if max_denoise:
noise_mult = 1.0
else:
noise_mult = ns.marginal_std(timesteps[0])
img += noise * noise_mult
else:
img = noise
noise = noise / torch.sqrt(1.0 + timesteps[0] ** 2.0)
model_type = "noise"
model_fn = model_wrapper(
@ -888,7 +866,10 @@ def sample_unipc(model, noise, image, sigmas, max_denoise, extra_args=None, call
)
order = min(3, len(timesteps) - 2)
uni_pc = UniPC(model_fn, ns, predict_x0=True, thresholding=False, noise_mask=noise_mask, masked_image=image, noise=noise, variant=variant)
x = uni_pc.sample(img, timesteps=timesteps, skip_type="time_uniform", method="multistep", order=order, lower_order_final=True, callback=callback, disable_pbar=disable)
uni_pc = UniPC(model_fn, ns, predict_x0=True, thresholding=False, variant=variant)
x = uni_pc.sample(noise, timesteps=timesteps, skip_type="time_uniform", method="multistep", order=order, lower_order_final=True, callback=callback, disable_pbar=disable)
x /= ns.marginal_alpha(timesteps[-1])
return x
def sample_unipc_bh2(model, noise, sigmas, extra_args=None, callback=None, disable=False):
return sample_unipc(model, noise, sigmas, extra_args, callback, disable, variant='bh2')

View File

@ -197,6 +197,15 @@ def model_lora_keys_clip(model, key_map={}):
key_map[lora_key] = k
lora_key = "text_encoder.text_model.encoder.layers.{}.{}".format(b, c) #diffusers lora
key_map[lora_key] = k
lora_key = "lora_prior_te_text_model_encoder_layers_{}_{}".format(b, LORA_CLIP_MAP[c]) #cascade lora: TODO put lora key prefix in the model config
key_map[lora_key] = k
k = "clip_g.transformer.text_projection.weight"
if k in sdk:
key_map["lora_prior_te_text_projection"] = k #cascade lora?
# key_map["text_encoder.text_projection"] = k #TODO: check if other lora have the text_projection too
# key_map["lora_te_text_projection"] = k
return key_map
@ -207,6 +216,7 @@ def model_lora_keys_unet(model, key_map={}):
if k.startswith("diffusion_model.") and k.endswith(".weight"):
key_lora = k[len("diffusion_model."):-len(".weight")].replace(".", "_")
key_map["lora_unet_{}".format(key_lora)] = k
key_map["lora_prior_unet_{}".format(key_lora)] = k #cascade lora: TODO put lora key prefix in the model config
diffusers_keys = utils.unet_to_diffusers(model.model_config.unet_config)
for k in diffusers_keys:

View File

@ -987,7 +987,7 @@ class GLIGENTextBoxApply:
def append(self, conditioning_to, clip, gligen_textbox_model, text, width, height, x, y):
c = []
cond, cond_pooled = clip.encode_from_tokens(clip.tokenize(text), return_pooled=True)
cond, cond_pooled = clip.encode_from_tokens(clip.tokenize(text), return_pooled="unprojected")
for t in conditioning_to:
n = [t[0], t[1].copy()]
position_params = [(cond_pooled, height // 8, width // 8, y // 8, x // 8)]

View File

@ -211,6 +211,7 @@ def calc_cond_uncond_batch(model, cond, uncond, x_in, timestep, model_options):
cur_patches[p] = cur_patches[p] + patches[p]
else:
cur_patches[p] = patches[p]
transformer_options["patches"] = cur_patches
else:
transformer_options["patches"] = patches
@ -516,14 +517,9 @@ class Sampler:
sigma = float(sigmas[0])
return math.isclose(max_sigma, sigma, rel_tol=1e-05) or sigma > max_sigma
class UNIPC(Sampler):
def sample(self, model_wrap, sigmas, extra_args, callback, noise, latent_image=None, denoise_mask=None, disable_pbar=False):
return uni_pc.sample_unipc(model_wrap, noise, latent_image, sigmas, max_denoise=self.max_denoise(model_wrap, sigmas), extra_args=extra_args, noise_mask=denoise_mask, callback=callback, disable=disable_pbar)
class UNIPCBH2(Sampler):
def sample(self, model_wrap, sigmas, extra_args, callback, noise, latent_image=None, denoise_mask=None, disable_pbar=False):
return uni_pc.sample_unipc(model_wrap, noise, latent_image, sigmas, max_denoise=self.max_denoise(model_wrap, sigmas), extra_args=extra_args, noise_mask=denoise_mask, callback=callback, variant='bh2', disable=disable_pbar)
KSAMPLER_NAMES = ["euler", "euler_ancestral", "heun", "heunpp2","dpm_2", "dpm_2_ancestral",
"lms", "dpm_fast", "dpm_adaptive", "dpmpp_2s_ancestral", "dpmpp_sde", "dpmpp_sde_gpu",
"dpmpp_2m", "dpmpp_2m_sde", "dpmpp_2m_sde_gpu", "dpmpp_3m_sde", "dpmpp_3m_sde_gpu", "ddpm", "lcm"]
class KSAMPLER(Sampler):
def __init__(self, sampler_function, extra_options={}, inpaint_options={}):
@ -638,9 +634,9 @@ def calculate_sigmas_scheduler(model, scheduler_name, steps):
def sampler_object(name):
if name == "uni_pc":
sampler = UNIPC()
sampler = KSAMPLER(uni_pc.sample_unipc)
elif name == "uni_pc_bh2":
sampler = UNIPCBH2()
sampler = KSAMPLER(uni_pc.sample_unipc_bh2)
elif name == "ddim":
sampler = ksampler("euler", inpaint_options={"random": True})
else:

View File

@ -52,7 +52,7 @@ def load_clip_weights(model, sd):
if ids.dtype == torch.float32:
sd['cond_stage_model.transformer.text_model.embeddings.position_ids'] = ids.round()
sd = utils.transformers_convert(sd, "cond_stage_model.model.", "cond_stage_model.transformer.text_model.", 24)
sd = utils.clip_text_transformers_convert(sd, "cond_stage_model.model.", "cond_stage_model.transformer.")
return load_model_weights(model, sd)
@ -123,10 +123,13 @@ class CLIP:
return self.tokenizer.tokenize_with_weights(text, return_word_ids)
def encode_from_tokens(self, tokens, return_pooled=False):
self.cond_stage_model.reset_clip_options()
if self.layer_idx is not None:
self.cond_stage_model.clip_layer(self.layer_idx)
else:
self.cond_stage_model.reset_clip_layer()
self.cond_stage_model.set_clip_options({"layer": self.layer_idx})
if return_pooled == "unprojected":
self.cond_stage_model.set_clip_options({"projected_pooled": False})
self.load_model()
cond, pooled = self.cond_stage_model.encode_token_weights(tokens)
@ -361,7 +364,10 @@ def load_clip(ckpt_paths, embedding_directory=None, clip_type=CLIPType.STABLE_DI
for i in range(len(clip_data)):
if "transformer.resblocks.0.ln_1.weight" in clip_data[i]:
clip_data[i] = utils.transformers_convert(clip_data[i], "", "text_model.", 32)
clip_data[i] = utils.clip_text_transformers_convert(clip_data[i], "", "")
else:
if "text_projection" in clip_data[i]:
clip_data[i]["text_projection.weight"] = clip_data[i]["text_projection"].transpose(0, 1) #old models saved with the CLIPSave node
clip_target = EmptyClass()
clip_target.params = {}

View File

@ -68,7 +68,7 @@ class SDClipModel(torch.nn.Module, ClipTokenWeightEncoder):
]
def __init__(self, version="openai/clip-vit-large-patch14", device="cpu", max_length=77,
freeze=True, layer="last", layer_idx=None, textmodel_json_config=None, dtype=None, model_class=clip_model.CLIPTextModel,
special_tokens={"start": 49406, "end": 49407, "pad": 49407}, layer_norm_hidden_state=True, enable_attention_masks=False): # clip-vit-base-patch32
special_tokens={"start": 49406, "end": 49407, "pad": 49407}, layer_norm_hidden_state=True, enable_attention_masks=False, return_projected_pooled=True): # clip-vit-base-patch32
super().__init__()
assert layer in self.LAYERS
@ -89,16 +89,18 @@ class SDClipModel(torch.nn.Module, ClipTokenWeightEncoder):
self.layer = layer
self.layer_idx = None
self.special_tokens = special_tokens
self.text_projection = torch.nn.Parameter(torch.eye(self.transformer.get_input_embeddings().weight.shape[1]))
self.logit_scale = torch.nn.Parameter(torch.tensor(4.6055))
self.enable_attention_masks = enable_attention_masks
self.layer_norm_hidden_state = layer_norm_hidden_state
self.return_projected_pooled = return_projected_pooled
if layer == "hidden":
assert layer_idx is not None
assert abs(layer_idx) < self.num_layers
self.clip_layer(layer_idx)
self.layer_default = (self.layer, self.layer_idx)
self.set_clip_options({"layer": layer_idx})
self.options_default = (self.layer, self.layer_idx, self.return_projected_pooled)
def freeze(self):
self.transformer = self.transformer.eval()
@ -106,16 +108,19 @@ class SDClipModel(torch.nn.Module, ClipTokenWeightEncoder):
for param in self.parameters():
param.requires_grad = False
def clip_layer(self, layer_idx):
if abs(layer_idx) > self.num_layers:
def set_clip_options(self, options):
layer_idx = options.get("layer", self.layer_idx)
self.return_projected_pooled = options.get("projected_pooled", self.return_projected_pooled)
if layer_idx is None or abs(layer_idx) > self.num_layers:
self.layer = "last"
else:
self.layer = "hidden"
self.layer_idx = layer_idx
def reset_clip_layer(self):
self.layer = self.layer_default[0]
self.layer_idx = self.layer_default[1]
def reset_clip_options(self):
self.layer = self.options_default[0]
self.layer_idx = self.options_default[1]
self.return_projected_pooled = self.options_default[2]
def set_up_textual_embeddings(self, tokens, current_embeds):
out_tokens = []
@ -180,23 +185,19 @@ class SDClipModel(torch.nn.Module, ClipTokenWeightEncoder):
else:
z = outputs[1]
if outputs[2] is not None:
pooled_output = outputs[2].float()
else:
pooled_output = None
pooled_output = None
if len(outputs) >= 3:
if not self.return_projected_pooled and len(outputs) >= 4 and outputs[3] is not None:
pooled_output = outputs[3].float()
elif outputs[2] is not None:
pooled_output = outputs[2].float()
if self.text_projection is not None and pooled_output is not None:
pooled_output = pooled_output.float().to(self.text_projection.device) @ self.text_projection.float()
return z.float(), pooled_output
def encode(self, tokens):
return self(tokens)
def load_sd(self, sd):
if "text_projection" in sd:
self.text_projection[:] = sd.pop("text_projection")
if "text_projection.weight" in sd:
self.text_projection[:] = sd.pop("text_projection.weight").transpose(0, 1)
return self.transformer.load_state_dict(sd, strict=False)
def parse_parentheses(string):
@ -515,11 +516,11 @@ class SD1ClipModel(torch.nn.Module):
self.clip = "clip_{}".format(self.clip_name)
setattr(self, self.clip, clip_model(device=device, dtype=dtype, **kwargs))
def clip_layer(self, layer_idx):
getattr(self, self.clip).clip_layer(layer_idx)
def set_clip_options(self, options):
getattr(self, self.clip).set_clip_options(options)
def reset_clip_layer(self):
getattr(self, self.clip).reset_clip_layer()
def reset_clip_options(self):
getattr(self, self.clip).reset_clip_options()
def encode_token_weights(self, token_weight_pairs):
token_weight_pairs = token_weight_pairs[self.clip_name]

View File

@ -40,13 +40,13 @@ class SDXLClipModel(torch.nn.Module):
self.clip_l = sd1_clip.SDClipModel(layer="hidden", layer_idx=-2, device=device, dtype=dtype, layer_norm_hidden_state=False)
self.clip_g = SDXLClipG(device=device, dtype=dtype)
def clip_layer(self, layer_idx):
self.clip_l.clip_layer(layer_idx)
self.clip_g.clip_layer(layer_idx)
def set_clip_options(self, options):
self.clip_l.set_clip_options(options)
self.clip_g.set_clip_options(options)
def reset_clip_layer(self):
self.clip_g.reset_clip_layer()
self.clip_l.reset_clip_layer()
def reset_clip_options(self):
self.clip_g.reset_clip_options()
self.clip_l.reset_clip_options()
def encode_token_weights(self, token_weight_pairs):
token_weight_pairs_g = token_weight_pairs["g"]

View File

@ -75,7 +75,7 @@ class SD20(supported_models_base.BASE):
replace_prefix["conditioner.embedders.0.model."] = "clip_h." #SD2 in sgm format
replace_prefix["cond_stage_model.model."] = "clip_h."
state_dict = utils.state_dict_prefix_replace(state_dict, replace_prefix, filter_keys=True)
state_dict = utils.transformers_convert(state_dict, "clip_h.", "clip_h.transformer.text_model.", 24)
state_dict = utils.clip_text_transformers_convert(state_dict, "clip_h.", "clip_h.transformer.")
return state_dict
def process_clip_state_dict_for_saving(self, state_dict):
@ -134,7 +134,7 @@ class SDXLRefiner(supported_models_base.BASE):
replace_prefix["conditioner.embedders.0.model."] = "clip_g."
state_dict = utils.state_dict_prefix_replace(state_dict, replace_prefix, filter_keys=True)
state_dict = utils.transformers_convert(state_dict, "clip_g.", "clip_g.transformer.text_model.", 32)
state_dict = utils.clip_text_transformers_convert(state_dict, "clip_g.", "clip_g.transformer.")
state_dict = utils.state_dict_key_replace(state_dict, keys_to_replace)
return state_dict
@ -182,10 +182,8 @@ class SDXL(supported_models_base.BASE):
replace_prefix["conditioner.embedders.1.model."] = "clip_g."
state_dict = utils.state_dict_prefix_replace(state_dict, replace_prefix, filter_keys=True)
state_dict = utils.transformers_convert(state_dict, "clip_g.", "clip_g.transformer.text_model.", 32)
keys_to_replace["clip_g.text_projection.weight"] = "clip_g.text_projection"
state_dict = utils.state_dict_key_replace(state_dict, keys_to_replace)
state_dict = utils.clip_text_transformers_convert(state_dict, "clip_g.", "clip_g.transformer.")
return state_dict
def process_clip_state_dict_for_saving(self, state_dict):
@ -338,6 +336,12 @@ class Stable_Cascade_C(supported_models_base.BASE):
state_dict[k_to] = weights[shape_from*x:shape_from*(x + 1)]
return state_dict
def process_clip_state_dict(self, state_dict):
state_dict = utils.state_dict_prefix_replace(state_dict, {k: "" for k in self.text_encoder_key_prefix}, filter_keys=True)
if "clip_g.text_projection" in state_dict:
state_dict["clip_g.transformer.text_projection.weight"] = state_dict.pop("clip_g.text_projection").transpose(0, 1)
return state_dict
def get_model(self, state_dict, prefix="", device=None):
out = model_base.StableCascade_C(self, device=device)
return out

View File

@ -100,8 +100,22 @@ def transformers_convert(sd, prefix_from, prefix_to, number):
p = ["self_attn.q_proj", "self_attn.k_proj", "self_attn.v_proj"]
k_to = "{}encoder.layers.{}.{}.{}".format(prefix_to, resblock, p[x], y)
sd[k_to] = weights[shape_from*x:shape_from*(x + 1)]
return sd
def clip_text_transformers_convert(sd, prefix_from, prefix_to):
sd = transformers_convert(sd, prefix_from, "{}text_model.".format(prefix_to), 32)
tp = "{}text_projection.weight".format(prefix_from)
if tp in sd:
sd["{}text_projection.weight".format(prefix_to)] = sd.pop(tp)
tp = "{}text_projection".format(prefix_from)
if tp in sd:
sd["{}text_projection.weight".format(prefix_to)] = sd.pop(tp).transpose(0, 1)
return sd
UNET_MAP_ATTENTIONS = {
"proj_in.weight",
"proj_in.bias",

View File

@ -33,7 +33,7 @@ class PerpNeg:
pos = noise_pred_pos - noise_pred_nocond
neg = noise_pred_neg - noise_pred_nocond
perp = ((torch.mul(pos, neg).sum())/(torch.norm(neg)**2)) * neg
perp = neg - ((torch.mul(neg, pos).sum())/(torch.norm(pos)**2)) * pos
perp_neg = perp * neg_scale
cfg_result = noise_pred_nocond + cond_scale*(pos - perp_neg)
cfg_result = x - cfg_result

View File

@ -103,6 +103,9 @@ class Example:
#def IS_CHANGED(s, image, string_field, int_field, float_field, print_to_screen):
# return ""
# Set the web directory, any .js file in that directory will be loaded by the frontend as a frontend extension
# WEB_DIRECTORY = "./somejs"
# A dictionary that contains all nodes you want to export with their names
# NOTE: names should be globally unique
NODE_CLASS_MAPPINGS = {