diff --git a/comfy_api_nodes/apis/elevenlabs.py b/comfy_api_nodes/apis/elevenlabs.py index 3511f6c04..814e2fe3c 100644 --- a/comfy_api_nodes/apis/elevenlabs.py +++ b/comfy_api_nodes/apis/elevenlabs.py @@ -1,36 +1,6 @@ from pydantic import BaseModel, Field -class MusicSection(BaseModel): - section_name: str = Field(...) - positive_local_styles: list[str] = Field(...) - negative_local_styles: list[str] = Field(...) - duration_ms: int = Field(...) - lines: list[str] = Field(...) - - -class MusicPrompt(BaseModel): - positive_global_styles: list[str] = Field(...) - negative_global_styles: list[str] = Field(...) - sections: list[MusicSection] = Field(...) - - -class ComposeMusicRequest(BaseModel): - model_id: str = Field(...) - prompt: str | None = Field(...) - music_length_ms: int | None = Field(...) - force_instrumental: bool | None = Field(...) - composition_plan: MusicPrompt | None = Field(...) - respect_sections_durations: bool | None = Field(...) - output_format: str | None = Field(...) - - -class CreateCompositionPlanRequest(BaseModel): - prompt: str = Field(...) - music_length_ms: int | None = Field(...) - model_id: str = Field(...) - - class SpeechToTextRequest(BaseModel): model_id: str = Field(...) cloud_storage_url: str = Field(...) @@ -77,13 +47,6 @@ class TextToSpeechRequest(BaseModel): apply_text_normalization: str | None = Field(None, description="Text normalization mode: auto, on, off") -class TextToSoundEffectsRequest(BaseModel): - text: str = Field(..., description="Text prompt to convert into a sound effect") - duration_seconds: float = Field(..., description="Duration of generated sound in seconds") - prompt_influence: float = Field(..., description="How closely generation follows the prompt") - loop: bool | None = Field(None, description="Whether to create a smoothly looping sound effect") - - class AddVoiceRequest(BaseModel): name: str = Field(..., description="Name that identifies the voice") remove_background_noise: bool = Field(..., description="Remove background noise from voice samples") diff --git a/comfy_api_nodes/nodes_elevenlabs.py b/comfy_api_nodes/nodes_elevenlabs.py index e19b19f97..caa0d1d0f 100644 --- a/comfy_api_nodes/nodes_elevenlabs.py +++ b/comfy_api_nodes/nodes_elevenlabs.py @@ -7,17 +7,12 @@ from comfy_api.latest import IO, ComfyExtension, Input from comfy_api_nodes.apis.elevenlabs import ( AddVoiceRequest, AddVoiceResponse, - ComposeMusicRequest, - CreateCompositionPlanRequest, DialogueInput, DialogueSettings, - MusicPrompt, - MusicSection, SpeechToSpeechRequest, SpeechToTextRequest, SpeechToTextResponse, TextToDialogueRequest, - TextToSoundEffectsRequest, TextToSpeechRequest, TextToSpeechVoiceSettings, ) @@ -67,363 +62,6 @@ ELEVENLABS_VOICE_MAP = { } -def parse_multiline_to_list(text: str) -> list[str]: - if not text or not text.strip(): - return [] - return [line.strip() for line in text.splitlines() if line.strip()] - - -class ElevenLabsComposeMusicSection(IO.ComfyNode): - @classmethod - def define_schema(cls) -> IO.Schema: - return IO.Schema( - node_id="ElevenLabsComposeMusicSection", - display_name="ElevenLabs Compose Music Section", - category="api node/audio/ElevenLabs", - description="Define a section for structured music composition.", - inputs=[ - IO.String.Input( - "section_name", - default="Verse", - tooltip="Name of this section (1-100 characters). " - "E.g., 'Intro', 'Verse', 'Chorus', 'Bridge', 'Outro'.", - ), - IO.String.Input( - "positive_local_styles", - default="", - multiline=True, - tooltip="Styles for this section (one per line). E.g., 'energetic', 'upbeat', 'guitar-driven'.", - ), - IO.String.Input( - "negative_local_styles", - default="", - multiline=True, - tooltip="Styles to avoid in this section (one per line). E.g., 'slow', 'acoustic'.", - ), - IO.Float.Input( - "duration", - default=30, - min=3, - max=120, - step=0.01, - display_mode=IO.NumberDisplay.number, - tooltip="Duration of this section in seconds.", - ), - IO.String.Input( - "content", - default="", - multiline=True, - tooltip="Lyrics for this section (one line per lyric line, max 200 characters per line).", - ), - ], - outputs=[ - IO.Custom(ELEVENLABS_MUSIC_SECTIONS).Output(display_name="section"), - ], - is_api_node=False, - ) - - @classmethod - def execute( - cls, - section_name: str, - positive_local_styles: str, - negative_local_styles: str, - duration: float, - content: str, - ) -> IO.NodeOutput: - validate_string(section_name, min_length=1, max_length=100) - lines = parse_multiline_to_list(content) - for i, line in enumerate(lines, 1): - if len(line) > 200: - raise ValueError(f"Line {i} exceeds 200 characters (has {len(line)}).") - section = { - "section_name": section_name, - "positive_local_styles": parse_multiline_to_list(positive_local_styles), - "negative_local_styles": parse_multiline_to_list(negative_local_styles), - "duration_ms": int(duration * 1000), - "lines": lines, - } - return IO.NodeOutput(json.dumps(section)) - - -class ElevenLabsCreateCompositionPlan(IO.ComfyNode): - @classmethod - def define_schema(cls) -> IO.Schema: - return IO.Schema( - node_id="ElevenLabsCreateCompositionPlan", - display_name="ElevenLabs Create Composition Plan", - category="api node/audio/ElevenLabs", - description="Generate a composition plan from lyrics. " - "Connect output to a 'Preview as Text' node to view the plan, then copy values to Section nodes.", - inputs=[ - IO.String.Input( - "prompt", - default="", - multiline=True, - tooltip="Lyrics or description to generate a composition plan from.", - ), - IO.Float.Input( - "duration", - default=60, - min=3, - max=600, - step=0.1, - display_mode=IO.NumberDisplay.number, - ), - IO.DynamicCombo.Input( - "model", - options=[ - IO.DynamicCombo.Option("music_v1", []), - ], - tooltip="Model to use for plan generation.", - ), - ], - outputs=[ - IO.String.Output(display_name="composition_plan"), - IO.Custom(ELEVENLABS_COMPOSITION_PLAN).Output(display_name="plan_data"), - ], - hidden=[ - IO.Hidden.auth_token_comfy_org, - IO.Hidden.api_key_comfy_org, - IO.Hidden.unique_id, - ], - ) - - @classmethod - async def execute( - cls, - prompt: str, - duration: float, - model: dict, - ) -> IO.NodeOutput: - validate_string(prompt, min_length=1) - request = CreateCompositionPlanRequest( - prompt=prompt, - music_length_ms=int(duration * 1000) if duration else None, - model_id=model["model"], - ) - response = await sync_op( - cls, - ApiEndpoint(path="/proxy/elevenlabs/v1/music/plan", method="POST"), - response_model=MusicPrompt, - data=request, - ) - output_lines = [ - "=== COMPOSITION PLAN ===", - "", - "--- GLOBAL STYLES ---", - "Positive (copy to positive_global_styles):", - "\n".join(response.positive_global_styles) if response.positive_global_styles else "(none)", - "", - "Negative (copy to negative_global_styles):", - "\n".join(response.negative_global_styles) if response.negative_global_styles else "(none)", - "", - "--- SECTIONS ---", - ] - for i, section in enumerate(response.sections, 1): - output_lines.extend( - [ - "", - f"=== Section {i}: {section.section_name} ===", - f"section_name: {section.section_name}", - f"duration: {section.duration_ms / 1000:.2f} seconds", - "", - "positive_local_styles:", - "\n".join(section.positive_local_styles) if section.positive_local_styles else "(none)", - "", - "negative_local_styles:", - "\n".join(section.negative_local_styles) if section.negative_local_styles else "(none)", - "", - "content (lyrics):", - "\n".join(section.lines) if section.lines else "(instrumental)", - ] - ) - return IO.NodeOutput("\n".join(output_lines), response.model_dump_json()) - - -class ElevenLabsComposeMusic(IO.ComfyNode): - @classmethod - def define_schema(cls) -> IO.Schema: - return IO.Schema( - node_id="ElevenLabsComposeMusic", - display_name="ElevenLabs Compose Music", - category="api node/audio/ElevenLabs", - description="Generate music. Use a simple text prompt or a detailed composition plan with sections.", - inputs=[ - IO.DynamicCombo.Input( - "model", - options=[ - IO.DynamicCombo.Option( - "music_v1", - [], - ), - ], - tooltip="Model to use for music generation.", - ), - IO.DynamicCombo.Input( - "content", - options=[ - IO.DynamicCombo.Option( - "prompt", - [ - IO.String.Input( - "prompt", - default="", - multiline=True, - tooltip="A simple text prompt to generate a song from (max 4100 characters).", - ), - IO.Float.Input( - "duration", - default=60, - min=3, - max=600, - step=0.1, - display_mode=IO.NumberDisplay.number, - ), - IO.Boolean.Input( - "force_instrumental", - default=False, - tooltip="If true, guarantees the generated song will be instrumental.", - ), - ], - ), - IO.DynamicCombo.Option( - "composition_plan", - [ - IO.String.Input( - "positive_global_styles", - default="", - multiline=True, - tooltip="Global styles for the entire song (one per line). " - "E.g., 'pop', 'electronic', 'uplifting'.", - ), - IO.String.Input( - "negative_global_styles", - default="", - multiline=True, - tooltip="Styles to avoid in the entire song (one per line). " - "E.g., 'metal', 'aggressive'.", - ), - IO.Boolean.Input( - "respect_sections_durations", - default=True, - tooltip="When true, strictly enforces each section's duration. " - "When false, may adjust for better quality.", - ), - IO.Autogrow.Input( - "sections", - template=IO.Autogrow.TemplatePrefix( - IO.Custom(ELEVENLABS_MUSIC_SECTIONS).Input("sections"), - prefix="section", - min=1, - max=30, - ), - ), - ], - ), - IO.DynamicCombo.Option( - "from_plan", - [ - IO.Custom(ELEVENLABS_COMPOSITION_PLAN).Input( - "plan_data", - tooltip="Connect the plan_data output from ElevenLabsCreateCompositionPlan node.", - ), - IO.Boolean.Input( - "respect_sections_durations", - default=True, - tooltip="When true, strictly enforces each section's duration. " - "When false, may adjust for better quality.", - ), - ], - ), - ], - tooltip="Choose between a simple text prompt, a structured composition plan, " - "or connect directly from ElevenLabsCreateCompositionPlan.", - ), - IO.Combo.Input( - "output_format", - options=["mp3_44100_192", "opus_48000_192"], - ), - ], - outputs=[ - IO.Audio.Output(), - ], - hidden=[ - IO.Hidden.auth_token_comfy_org, - IO.Hidden.api_key_comfy_org, - IO.Hidden.unique_id, - ], - is_api_node=True, - ) - - @classmethod - async def execute( - cls, - model: dict, - content: dict, - output_format: str, - ) -> IO.NodeOutput: - if content["content"] == "prompt": - validate_string(content["prompt"], min_length=1, max_length=4100) - request = ComposeMusicRequest( - model_id=model["model"], - prompt=content["prompt"], - music_length_ms=content["duration"] * 1000, - force_instrumental=content["force_instrumental"], - output_format=output_format, - respect_sections_durations=None, - composition_plan=None, - ) - elif content["content"] == "from_plan": - composition_plan = MusicPrompt.model_validate_json(content["plan_data"]) - request = ComposeMusicRequest( - model_id=model["model"], - composition_plan=composition_plan, - respect_sections_durations=content["respect_sections_durations"], - output_format=output_format, - prompt=None, - music_length_ms=None, - force_instrumental=None, - ) - else: # composition_plan - sections_autogrow = content["sections"] - sections: list[MusicSection] = [] - for key in sections_autogrow: - section_json = sections_autogrow[key] - s = json.loads(section_json) - sections.append( - MusicSection( - section_name=s["section_name"], - positive_local_styles=s["positive_local_styles"], - negative_local_styles=s["negative_local_styles"], - duration_ms=s["duration_ms"], - lines=s["lines"], - ) - ) - if not sections: - raise ValueError("At least one section is required for composition_plan.") - request = ComposeMusicRequest( - model_id=model["model"], - composition_plan=MusicPrompt( - positive_global_styles=parse_multiline_to_list(content["positive_global_styles"]), - negative_global_styles=parse_multiline_to_list(content["negative_global_styles"]), - sections=sections, - ), - respect_sections_durations=content["respect_sections_durations"], - output_format=output_format, - prompt=None, - music_length_ms=None, - force_instrumental=None, - ) - response = await sync_op_raw( - cls, - ApiEndpoint(path="/proxy/elevenlabs/v1/music", method="POST"), - data=request, - as_binary=True, - ) - return IO.NodeOutput(audio_bytes_to_audio_input(response)) - - class ElevenLabsSpeechToText(IO.ComfyNode): @classmethod def define_schema(cls) -> IO.Schema: @@ -805,99 +443,6 @@ class ElevenLabsAudioIsolation(IO.ComfyNode): return IO.NodeOutput(audio_bytes_to_audio_input(response)) -class ElevenLabsTextToSoundEffects(IO.ComfyNode): - @classmethod - def define_schema(cls) -> IO.Schema: - return IO.Schema( - node_id="ElevenLabsTextToSoundEffects", - display_name="ElevenLabs Text to Sound Effects", - category="api node/audio/ElevenLabs", - description="Generate sound effects from text descriptions.", - inputs=[ - IO.String.Input( - "text", - multiline=True, - default="", - tooltip="Text description of the sound effect to generate.", - ), - IO.DynamicCombo.Input( - "model", - options=[ - IO.DynamicCombo.Option( - "eleven_sfx_v2", - [ - IO.Float.Input( - "duration", - default=5.0, - min=0.5, - max=30.0, - step=0.1, - display_mode=IO.NumberDisplay.slider, - tooltip="Duration of generated sound in seconds.", - ), - IO.Boolean.Input( - "loop", - default=False, - tooltip="Create a smoothly looping sound effect.", - ), - IO.Float.Input( - "prompt_influence", - default=0.3, - min=0.0, - max=1.0, - step=0.01, - display_mode=IO.NumberDisplay.slider, - tooltip="How closely generation follows the prompt. " - "Higher values make the sound follow the text more closely.", - ), - ], - ), - ], - tooltip="Model to use for sound effect generation.", - ), - IO.Combo.Input( - "output_format", - options=["mp3_44100_192", "opus_48000_192"], - tooltip="Audio output format.", - ), - ], - outputs=[ - IO.Audio.Output(), - ], - hidden=[ - IO.Hidden.auth_token_comfy_org, - IO.Hidden.api_key_comfy_org, - IO.Hidden.unique_id, - ], - is_api_node=True, - ) - - @classmethod - async def execute( - cls, - text: str, - model: dict, - output_format: str, - ) -> IO.NodeOutput: - validate_string(text, min_length=1) - response = await sync_op_raw( - cls, - ApiEndpoint( - path="/proxy/elevenlabs/v1/sound-generation", - method="POST", - query_params={"output_format": output_format}, - ), - data=TextToSoundEffectsRequest( - text=text, - duration_seconds=model["duration"], - prompt_influence=model["prompt_influence"], - loop=model.get("loop", None), - ), - as_binary=True, - ) - return IO.NodeOutput(audio_bytes_to_audio_input(response)) - - class ElevenLabsInstantVoiceClone(IO.ComfyNode): @classmethod def define_schema(cls) -> IO.Schema: @@ -1251,14 +796,10 @@ class ElevenLabsExtension(ComfyExtension): @override async def get_node_list(self) -> list[type[IO.ComfyNode]]: return [ - # ElevenLabsComposeMusicSection, - # ElevenLabsCreateCompositionPlan, - # ElevenLabsComposeMusic, ElevenLabsSpeechToText, ElevenLabsVoiceSelector, ElevenLabsTextToSpeech, ElevenLabsAudioIsolation, - ElevenLabsTextToSoundEffects, ElevenLabsInstantVoiceClone, ElevenLabsSpeechToSpeech, ElevenLabsTextToDialogue,