Merge ebe2e774e7 into f5030e26fd

2026-02-07 20:12:35 +08:00 · 2026-02-03 09:56:20 +00:00
2 changed files with 6 additions and 20 deletions
--- a/comfy/text_encoders/ace15.py
+++ b/comfy/text_encoders/ace15.py
@ -57,9 +57,8 @@ def sample_manual_loop_no_classes(
        if eos_token_id is not None and eos_token_id < audio_start_id and min_tokens < step:
            eos_score = cfg_logits[:, eos_token_id].clone()

-        remove_logit_value = torch.finfo(cfg_logits.dtype).min
        # Only generate audio tokens
-        cfg_logits[:, :audio_start_id] = remove_logit_value
+        cfg_logits[:, :audio_start_id] = float('-inf')

        if eos_token_id is not None and eos_token_id < audio_start_id and min_tokens < step:
            cfg_logits[:, eos_token_id] = eos_score
@ -67,7 +66,7 @@ def sample_manual_loop_no_classes(
        if top_k is not None and top_k > 0:
            top_k_vals, _ = torch.topk(cfg_logits, top_k)
            min_val = top_k_vals[..., -1, None]
-            cfg_logits[cfg_logits < min_val] = remove_logit_value
+            cfg_logits[cfg_logits < min_val] = float('-inf')

        if top_p is not None and top_p < 1.0:
            sorted_logits, sorted_indices = torch.sort(cfg_logits, descending=True)
@ -76,7 +75,7 @@ def sample_manual_loop_no_classes(
            sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[..., :-1].clone()
            sorted_indices_to_remove[..., 0] = 0
            indices_to_remove = sorted_indices_to_remove.scatter(1, sorted_indices, sorted_indices_to_remove)
-            cfg_logits[indices_to_remove] = remove_logit_value
+            cfg_logits[indices_to_remove] = float('-inf')

        if temperature > 0:
            cfg_logits = cfg_logits / temperature
--- a/comfy/text_encoders/llama.py
+++ b/comfy/text_encoders/llama.py
@ -6,7 +6,6 @@ import math

 from comfy.ldm.modules.attention import optimized_attention_for_device
 import comfy.model_management
-import comfy.ops
 import comfy.ldm.common_dit
 import comfy.clip_model

@ -628,10 +627,10 @@ class Llama2_(nn.Module):
        mask = None
        if attention_mask is not None:
            mask = 1.0 - attention_mask.to(x.dtype).reshape((attention_mask.shape[0], 1, -1, attention_mask.shape[-1])).expand(attention_mask.shape[0], 1, seq_len, attention_mask.shape[-1])
-            mask = mask.masked_fill(mask.to(torch.bool), torch.finfo(x.dtype).min)
+            mask = mask.masked_fill(mask.to(torch.bool), float("-inf"))

        if seq_len > 1:
-            causal_mask = torch.empty(past_len + seq_len, past_len + seq_len, dtype=x.dtype, device=x.device).fill_(torch.finfo(x.dtype).min).triu_(1)
+            causal_mask = torch.empty(past_len + seq_len, past_len + seq_len, dtype=x.dtype, device=x.device).fill_(float("-inf")).triu_(1)
            if mask is not None:
                mask += causal_mask
            else:
@ -795,19 +794,7 @@ class Qwen3_2B_ACE15_lm(BaseLlama, torch.nn.Module):
        self.dtype = dtype

    def logits(self, x):
-        input = x[:, -1:]
-        module = self.model.embed_tokens
-
-        offload_stream = None
-        if module.comfy_cast_weights:
-            weight, _, offload_stream = comfy.ops.cast_bias_weight(module, input, offloadable=True)
-        else:
-            weight = self.model.embed_tokens.weight.to(x)
-
-        x = torch.nn.functional.linear(input, weight, None)
-
-        comfy.ops.uncast_bias_weight(module, weight, None, offload_stream)
-        return x
+        return torch.nn.functional.linear(x[:, -1:], self.model.embed_tokens.weight.to(x), None)

 class Qwen3_4B(BaseLlama, torch.nn.Module):
    def __init__(self, config_dict, dtype, device, operations):