mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2026-03-16 22:58:19 +08:00
fix ACE-Step 1.5: max_tokens typo and lyrics embedding truncation
Two bugs in ace15.py encode_token_weights(): 1. max_tokens parameter received min_tokens value: `max_tokens=lm_metadata["min_tokens"]` → `max_tokens=lm_metadata["max_tokens"]` This caused the LM to always generate minimum-length audio codes regardless of the requested duration. 2. lyrics_embeds[:, 0] discarded the full lyric sequence, passing only the first token embedding to the diffusion model's lyric encoder. Changed to pass the full lyrics_embeds tensor for proper lyrics conditioning. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
6d11cc7354
commit
ddd0cd7521
@ -298,11 +298,11 @@ class ACE15TEModel(torch.nn.Module):
|
||||
self.qwen3_06b.set_clip_options({"layer": [0]})
|
||||
lyrics_embeds, _, extra_l = self.qwen3_06b.encode_token_weights(token_weight_pairs_lyrics)
|
||||
|
||||
out = {"conditioning_lyrics": lyrics_embeds[:, 0]}
|
||||
out = {"conditioning_lyrics": lyrics_embeds}
|
||||
|
||||
lm_metadata = token_weight_pairs["lm_metadata"]
|
||||
if lm_metadata["generate_audio_codes"]:
|
||||
audio_codes = generate_audio_codes(getattr(self, self.lm_model, self.qwen3_06b), token_weight_pairs["lm_prompt"], token_weight_pairs["lm_prompt_negative"], min_tokens=lm_metadata["min_tokens"], max_tokens=lm_metadata["min_tokens"], seed=lm_metadata["seed"], cfg_scale=lm_metadata["cfg_scale"], temperature=lm_metadata["temperature"], top_p=lm_metadata["top_p"], top_k=lm_metadata["top_k"], min_p=lm_metadata["min_p"])
|
||||
audio_codes = generate_audio_codes(getattr(self, self.lm_model, self.qwen3_06b), token_weight_pairs["lm_prompt"], token_weight_pairs["lm_prompt_negative"], min_tokens=lm_metadata["min_tokens"], max_tokens=lm_metadata["max_tokens"], seed=lm_metadata["seed"], cfg_scale=lm_metadata["cfg_scale"], temperature=lm_metadata["temperature"], top_p=lm_metadata["top_p"], top_k=lm_metadata["top_k"], min_p=lm_metadata["min_p"])
|
||||
out["audio_codes"] = [audio_codes]
|
||||
|
||||
return base_out, None, out
|
||||
|
||||
Loading…
Reference in New Issue
Block a user