diff --git a/.github/workflows/windows_release_cu118_dependencies_2.yml b/.github/workflows/windows_release_cu118_dependencies_2.yml index 42adee9e7..a7760b21e 100644 --- a/.github/workflows/windows_release_cu118_dependencies_2.yml +++ b/.github/workflows/windows_release_cu118_dependencies_2.yml @@ -2,6 +2,13 @@ name: "Windows Release cu118 dependencies 2" on: workflow_dispatch: + inputs: + xformers: + description: 'xformers version' + required: true + type: string + default: "xformers" + # push: # branches: # - master @@ -17,7 +24,7 @@ jobs: - shell: bash run: | - python -m pip wheel --no-cache-dir torch torchvision torchaudio xformers --extra-index-url https://download.pytorch.org/whl/cu118 -r requirements.txt pygit2 -w ./temp_wheel_dir + python -m pip wheel --no-cache-dir torch torchvision torchaudio ${{ inputs.xformers }} --extra-index-url https://download.pytorch.org/whl/cu118 -r requirements.txt pygit2 -w ./temp_wheel_dir python -m pip install --no-cache-dir ./temp_wheel_dir/* echo installed basic ls -lah temp_wheel_dir diff --git a/CODEOWNERS b/CODEOWNERS new file mode 100644 index 000000000..7c7c3e19e --- /dev/null +++ b/CODEOWNERS @@ -0,0 +1 @@ +* @comfyanonymous diff --git a/comfy/clip_vision.py b/comfy/clip_vision.py index e2bc3209d..8d04faf71 100644 --- a/comfy/clip_vision.py +++ b/comfy/clip_vision.py @@ -24,8 +24,8 @@ class ClipVisionModel(): return self.model.load_state_dict(sd, strict=False) def encode_image(self, image): - img = torch.clip((255. * image[0]), 0, 255).round().int() - inputs = self.processor(images=[img], return_tensors="pt") + img = torch.clip((255. * image), 0, 255).round().int() + inputs = self.processor(images=img, return_tensors="pt") outputs = self.model(**inputs) return outputs diff --git a/comfy/model_base.py b/comfy/model_base.py index bf6983fc2..c3c807a68 100644 --- a/comfy/model_base.py +++ b/comfy/model_base.py @@ -120,15 +120,15 @@ class SD21UNCLIP(BaseModel): weights = [] noise_aug = [] for unclip_cond in unclip_conditioning: - adm_cond = unclip_cond["clip_vision_output"].image_embeds - weight = unclip_cond["strength"] - noise_augment = unclip_cond["noise_augmentation"] - noise_level = round((self.noise_augmentor.max_noise_level - 1) * noise_augment) - c_adm, noise_level_emb = self.noise_augmentor(adm_cond.to(device), noise_level=torch.tensor([noise_level], device=device)) - adm_out = torch.cat((c_adm, noise_level_emb), 1) * weight - weights.append(weight) - noise_aug.append(noise_augment) - adm_inputs.append(adm_out) + for adm_cond in unclip_cond["clip_vision_output"].image_embeds: + weight = unclip_cond["strength"] + noise_augment = unclip_cond["noise_augmentation"] + noise_level = round((self.noise_augmentor.max_noise_level - 1) * noise_augment) + c_adm, noise_level_emb = self.noise_augmentor(adm_cond.to(device), noise_level=torch.tensor([noise_level], device=device)) + adm_out = torch.cat((c_adm, noise_level_emb), 1) * weight + weights.append(weight) + noise_aug.append(noise_augment) + adm_inputs.append(adm_out) if len(noise_aug) > 1: adm_out = torch.stack(adm_inputs).sum(0) diff --git a/nodes.py b/nodes.py index 92baffe30..5f7ea95c0 100644 --- a/nodes.py +++ b/nodes.py @@ -771,7 +771,7 @@ class StyleModelApply: CATEGORY = "conditioning/style_model" def apply_stylemodel(self, clip_vision_output, style_model, conditioning): - cond = style_model.get_cond(clip_vision_output) + cond = style_model.get_cond(clip_vision_output).flatten(start_dim=0, end_dim=1).unsqueeze(dim=0) c = [] for t in conditioning: n = [torch.cat((t[0], cond), dim=1), t[1].copy()]