Merge branch 'master' into dr-support-pip-cm

2026-02-24 21:07:37 +08:00 · 2025-09-03 00:07:37 +09:00 · 2025-09-03 00:07:37 +09:00 · cc21e84115
commit cc21e84115
parent 69bbe1d5a9 e2d1e5dad9
6 changed files with 36 additions and 3 deletions
--- a/comfy/cli_args.py
+++ b/comfy/cli_args.py
@ -149,6 +149,7 @@ class PerformanceFeature(enum.Enum):
    Fp16Accumulation = "fp16_accumulation"
    Fp8MatrixMultiplication = "fp8_matrix_mult"
    CublasOps = "cublas_ops"
+    AutoTune = "autotune"

 parser.add_argument("--fast", nargs="*", type=PerformanceFeature, help="Enable some untested and potentially quality deteriorating optimizations. --fast with no arguments enables everything. You can pass a list specific optimizations if you only want to enable specific ones. Current valid optimizations: fp16_accumulation fp8_matrix_mult cublas_ops")

--- a/comfy/ldm/flux/model.py
+++ b/comfy/ldm/flux/model.py
@ -233,12 +233,18 @@ class Flux(nn.Module):
            h = 0
            w = 0
            index = 0
-            index_ref_method = kwargs.get("ref_latents_method", "offset") == "index"
+            ref_latents_method = kwargs.get("ref_latents_method", "offset")
            for ref in ref_latents:
-                if index_ref_method:
+                if ref_latents_method == "index":
                    index += 1
                    h_offset = 0
                    w_offset = 0
+                elif ref_latents_method == "uso":
+                    index = 0
+                    h_offset = h_len * patch_size + h
+                    w_offset = w_len * patch_size + w
+                    h += ref.shape[-2]
+                    w += ref.shape[-1]
                else:
                    index = 1
                    h_offset = 0
--- a/comfy/lora.py
+++ b/comfy/lora.py
@ -260,6 +260,10 @@ def model_lora_keys_unet(model, key_map={}):
                key_map["transformer.{}".format(k[:-len(".weight")])] = to #simpletrainer and probably regular diffusers flux lora format
                key_map["lycoris_{}".format(k[:-len(".weight")].replace(".", "_"))] = to #simpletrainer lycoris
                key_map["lora_transformer_{}".format(k[:-len(".weight")].replace(".", "_"))] = to #onetrainer
+        for k in sdk:
+            hidden_size = model.model_config.unet_config.get("hidden_size", 0)
+            if k.endswith(".weight") and ".linear1." in k:
+                key_map["{}".format(k.replace(".linear1.weight", ".linear1_qkv"))] = (k, (0, 0, hidden_size * 3))

    if isinstance(model, comfy.model_base.GenmoMochi):
        for k in sdk:
--- a/comfy/lora_convert.py
+++ b/comfy/lora_convert.py
@ -15,10 +15,29 @@ def convert_lora_bfl_control(sd): #BFL loras for Flux
 def convert_lora_wan_fun(sd): #Wan Fun loras
    return comfy.utils.state_dict_prefix_replace(sd, {"lora_unet__": "lora_unet_"})

+def convert_uso_lora(sd):
+    sd_out = {}
+    for k in sd:
+        tensor = sd[k]
+        k_to = "diffusion_model.{}".format(k.replace(".down.weight", ".lora_down.weight")
+                                           .replace(".up.weight", ".lora_up.weight")
+                                           .replace(".qkv_lora2.", ".txt_attn.qkv.")
+                                           .replace(".qkv_lora1.", ".img_attn.qkv.")
+                                           .replace(".proj_lora1.", ".img_attn.proj.")
+                                           .replace(".proj_lora2.", ".txt_attn.proj.")
+                                           .replace(".qkv_lora.", ".linear1_qkv.")
+                                           .replace(".proj_lora.", ".linear2.")
+                                           .replace(".processor.", ".")
+                                           )
+        sd_out[k_to] = tensor
+    return sd_out
+

 def convert_lora(sd):
    if "img_in.lora_A.weight" in sd and "single_blocks.0.norm.key_norm.scale" in sd:
        return convert_lora_bfl_control(sd)
    if "lora_unet__blocks_0_cross_attn_k.lora_down.weight" in sd:
        return convert_lora_wan_fun(sd)
+    if "single_blocks.37.processor.qkv_lora.up.weight" in sd and "double_blocks.18.processor.qkv_lora2.up.weight" in sd:
+        return convert_uso_lora(sd)
    return sd
--- a/comfy/ops.py
+++ b/comfy/ops.py
@ -52,6 +52,9 @@ except (ModuleNotFoundError, TypeError):

 cast_to = comfy.model_management.cast_to #TODO: remove once no more references

+if torch.cuda.is_available() and torch.backends.cudnn.is_available() and PerformanceFeature.AutoTune in args.fast:
+    torch.backends.cudnn.benchmark = True
+
 def cast_to_input(weight, input, non_blocking=False, copy=True):
    return comfy.model_management.cast_to(weight, input.dtype, input.device, non_blocking=non_blocking, copy=copy)

--- a/comfy_extras/nodes_flux.py
+++ b/comfy_extras/nodes_flux.py
@ -105,7 +105,7 @@ class FluxKontextMultiReferenceLatentMethod:
    def INPUT_TYPES(s):
        return {"required": {
            "conditioning": ("CONDITIONING", ),
-            "reference_latents_method": (("offset", "index"), ),
+            "reference_latents_method": (("offset", "index", "uso"), ),
            }}

    RETURN_TYPES = ("CONDITIONING",)