diff --git a/comfy/model_base.py b/comfy/model_base.py index e4c9391db..60997246c 100644 --- a/comfy/model_base.py +++ b/comfy/model_base.py @@ -156,10 +156,10 @@ class SDXLRefiner(BaseModel): print(clip_pooled.shape, width, height, crop_w, crop_h, aesthetic_score) out = [] - out.append(self.embedder(torch.Tensor([width]))) out.append(self.embedder(torch.Tensor([height]))) - out.append(self.embedder(torch.Tensor([crop_w]))) + out.append(self.embedder(torch.Tensor([width]))) out.append(self.embedder(torch.Tensor([crop_h]))) + out.append(self.embedder(torch.Tensor([crop_w]))) out.append(self.embedder(torch.Tensor([aesthetic_score]))) flat = torch.flatten(torch.cat(out))[None, ] return torch.cat((clip_pooled.to(flat.device), flat), dim=1) @@ -180,11 +180,11 @@ class SDXL(BaseModel): print(clip_pooled.shape, width, height, crop_w, crop_h, target_width, target_height) out = [] - out.append(self.embedder(torch.Tensor([width]))) out.append(self.embedder(torch.Tensor([height]))) - out.append(self.embedder(torch.Tensor([crop_w]))) + out.append(self.embedder(torch.Tensor([width]))) out.append(self.embedder(torch.Tensor([crop_h]))) - out.append(self.embedder(torch.Tensor([target_width]))) + out.append(self.embedder(torch.Tensor([crop_w]))) out.append(self.embedder(torch.Tensor([target_height]))) + out.append(self.embedder(torch.Tensor([target_width]))) flat = torch.flatten(torch.cat(out))[None, ] return torch.cat((clip_pooled.to(flat.device), flat), dim=1) diff --git a/comfy/sd.py b/comfy/sd.py index 21d7b8a54..52d016b10 100644 --- a/comfy/sd.py +++ b/comfy/sd.py @@ -223,13 +223,28 @@ def model_lora_keys(model, key_map={}): counter += 1 counter = 0 text_model_lora_key = "lora_te_text_model_encoder_layers_{}_{}" - for b in range(24): + clip_l_present = False + for b in range(32): for c in LORA_CLIP_MAP: k = "transformer.text_model.encoder.layers.{}.{}.weight".format(b, c) if k in sdk: lora_key = text_model_lora_key.format(b, LORA_CLIP_MAP[c]) key_map[lora_key] = k + k = "clip_l.transformer.text_model.encoder.layers.{}.{}.weight".format(b, c) + if k in sdk: + lora_key = "lora_te1_text_model_encoder_layers_{}_{}".format(b, LORA_CLIP_MAP[c]) #SDXL base + key_map[lora_key] = k + clip_l_present = True + + k = "clip_g.transformer.text_model.encoder.layers.{}.{}.weight".format(b, c) + if k in sdk: + if clip_l_present: + lora_key = "lora_te2_text_model_encoder_layers_{}_{}".format(b, LORA_CLIP_MAP[c]) #SDXL base + else: + lora_key = "lora_te_text_model_encoder_layers_{}_{}".format(b, LORA_CLIP_MAP[c]) #TODO: test if this is correct for SDXL-Refiner + key_map[lora_key] = k + #Locon stuff ds_counter = 0 diff --git a/nodes.py b/nodes.py index e6317b187..f99b41eda 100644 --- a/nodes.py +++ b/nodes.py @@ -151,6 +151,25 @@ class ConditioningSetMask: c.append(n) return (c, ) +class ConditioningZeroOut: + @classmethod + def INPUT_TYPES(s): + return {"required": {"conditioning": ("CONDITIONING", )}} + RETURN_TYPES = ("CONDITIONING",) + FUNCTION = "zero_out" + + CATEGORY = "advanced/conditioning" + + def zero_out(self, conditioning): + c = [] + for t in conditioning: + d = t[1].copy() + if "pooled_output" in d: + d["pooled_output"] = torch.zeros_like(d["pooled_output"]) + n = [torch.zeros_like(t[0]), d] + c.append(n) + return (c, ) + class VAEDecode: @classmethod def INPUT_TYPES(s): @@ -1473,6 +1492,7 @@ NODE_CLASS_MAPPINGS = { "LoadLatent": LoadLatent, "SaveLatent": SaveLatent, + "ConditioningZeroOut": ConditioningZeroOut, "SavePreviewLatent": SavePreviewLatent, }