mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2026-02-06 19:42:34 +08:00
feat(api-nodes): add ElevenLabs API nodes
This commit is contained in:
parent
6ea8c128a3
commit
6f7b65c951
118
comfy_api_nodes/apis/elevenlabs.py
Normal file
118
comfy_api_nodes/apis/elevenlabs.py
Normal file
@ -0,0 +1,118 @@
|
|||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
|
||||||
|
class MusicSection(BaseModel):
|
||||||
|
section_name: str = Field(...)
|
||||||
|
positive_local_styles: list[str] = Field(...)
|
||||||
|
negative_local_styles: list[str] = Field(...)
|
||||||
|
duration_ms: int = Field(...)
|
||||||
|
lines: list[str] = Field(...)
|
||||||
|
|
||||||
|
|
||||||
|
class MusicPrompt(BaseModel):
|
||||||
|
positive_global_styles: list[str] = Field(...)
|
||||||
|
negative_global_styles: list[str] = Field(...)
|
||||||
|
sections: list[MusicSection] = Field(...)
|
||||||
|
|
||||||
|
|
||||||
|
class ComposeMusicRequest(BaseModel):
|
||||||
|
model_id: str = Field(...)
|
||||||
|
prompt: str | None = Field(...)
|
||||||
|
music_length_ms: int | None = Field(...)
|
||||||
|
force_instrumental: bool | None = Field(...)
|
||||||
|
composition_plan: MusicPrompt | None = Field(...)
|
||||||
|
respect_sections_durations: bool | None = Field(...)
|
||||||
|
output_format: str | None = Field(...)
|
||||||
|
|
||||||
|
|
||||||
|
class CreateCompositionPlanRequest(BaseModel):
|
||||||
|
prompt: str = Field(...)
|
||||||
|
music_length_ms: int | None = Field(...)
|
||||||
|
model_id: str = Field(...)
|
||||||
|
|
||||||
|
|
||||||
|
class SpeechToTextRequest(BaseModel):
|
||||||
|
model_id: str = Field(...)
|
||||||
|
cloud_storage_url: str = Field(...)
|
||||||
|
language_code: str | None = Field(None, description="ISO-639-1 or ISO-639-3 language code")
|
||||||
|
tag_audio_events: bool | None = Field(None, description="Annotate sounds like (laughter) in transcript")
|
||||||
|
num_speakers: int | None = Field(None, description="Max speakers predicted")
|
||||||
|
timestamps_granularity: str = Field(default="word", description="Timing precision: none, word, or character")
|
||||||
|
diarize: bool | None = Field(None, description="Annotate which speaker is talking")
|
||||||
|
diarization_threshold: float | None = Field(None, description="Speaker separation sensitivity")
|
||||||
|
temperature: float | None = Field(None, description="Randomness control")
|
||||||
|
seed: int = Field(..., description="Seed for deterministic sampling")
|
||||||
|
|
||||||
|
|
||||||
|
class SpeechToTextWord(BaseModel):
|
||||||
|
text: str = Field(..., description="The word text")
|
||||||
|
type: str = Field(default="word", description="Type of text element (word, spacing, etc.)")
|
||||||
|
start: float | None = Field(None, description="Start time in seconds (when timestamps enabled)")
|
||||||
|
end: float | None = Field(None, description="End time in seconds (when timestamps enabled)")
|
||||||
|
speaker_id: str | None = Field(None, description="Speaker identifier when diarization is enabled")
|
||||||
|
logprob: float | None = Field(None, description="Log probability of the word")
|
||||||
|
|
||||||
|
|
||||||
|
class SpeechToTextResponse(BaseModel):
|
||||||
|
language_code: str = Field(..., description="Detected or specified language code")
|
||||||
|
language_probability: float | None = Field(None, description="Confidence of language detection")
|
||||||
|
text: str = Field(..., description="Full transcript text")
|
||||||
|
words: list[SpeechToTextWord] | None = Field(None, description="Word-level timing information")
|
||||||
|
|
||||||
|
|
||||||
|
class TextToSpeechVoiceSettings(BaseModel):
|
||||||
|
stability: float | None = Field(None, description="Voice stability")
|
||||||
|
similarity_boost: float | None = Field(None, description="Similarity boost")
|
||||||
|
style: float | None = Field(None, description="Style exaggeration")
|
||||||
|
use_speaker_boost: bool | None = Field(None, description="Boost similarity to original speaker")
|
||||||
|
speed: float | None = Field(None, description="Speech speed")
|
||||||
|
|
||||||
|
|
||||||
|
class TextToSpeechRequest(BaseModel):
|
||||||
|
text: str = Field(..., description="Text to convert to speech")
|
||||||
|
model_id: str = Field(..., description="Model ID for TTS")
|
||||||
|
language_code: str | None = Field(None, description="ISO-639-1 or ISO-639-3 language code")
|
||||||
|
voice_settings: TextToSpeechVoiceSettings | None = Field(None, description="Voice settings")
|
||||||
|
seed: int = Field(..., description="Seed for deterministic sampling")
|
||||||
|
apply_text_normalization: str | None = Field(None, description="Text normalization mode: auto, on, off")
|
||||||
|
|
||||||
|
|
||||||
|
class TextToSoundEffectsRequest(BaseModel):
|
||||||
|
text: str = Field(..., description="Text prompt to convert into a sound effect")
|
||||||
|
duration_seconds: float = Field(..., description="Duration of generated sound in seconds")
|
||||||
|
prompt_influence: float = Field(..., description="How closely generation follows the prompt")
|
||||||
|
loop: bool | None = Field(None, description="Whether to create a smoothly looping sound effect")
|
||||||
|
|
||||||
|
|
||||||
|
class AddVoiceRequest(BaseModel):
|
||||||
|
name: str = Field(..., description="Name that identifies the voice")
|
||||||
|
remove_background_noise: bool = Field(..., description="Remove background noise from voice samples")
|
||||||
|
|
||||||
|
|
||||||
|
class AddVoiceResponse(BaseModel):
|
||||||
|
voice_id: str = Field(..., description="The newly created voice's unique identifier")
|
||||||
|
|
||||||
|
|
||||||
|
class SpeechToSpeechRequest(BaseModel):
|
||||||
|
model_id: str = Field(..., description="Model ID for speech-to-speech")
|
||||||
|
voice_settings: str = Field(..., description="JSON string of voice settings")
|
||||||
|
seed: int = Field(..., description="Seed for deterministic sampling")
|
||||||
|
remove_background_noise: bool = Field(..., description="Remove background noise from input audio")
|
||||||
|
|
||||||
|
|
||||||
|
class DialogueInput(BaseModel):
|
||||||
|
text: str = Field(..., description="Text content to convert to speech")
|
||||||
|
voice_id: str = Field(..., description="Voice identifier for this dialogue segment")
|
||||||
|
|
||||||
|
|
||||||
|
class DialogueSettings(BaseModel):
|
||||||
|
stability: float | None = Field(None, description="Voice stability (0-1)")
|
||||||
|
|
||||||
|
|
||||||
|
class TextToDialogueRequest(BaseModel):
|
||||||
|
inputs: list[DialogueInput] = Field(..., description="List of dialogue segments")
|
||||||
|
model_id: str = Field(..., description="Model ID for dialogue generation")
|
||||||
|
language_code: str | None = Field(None, description="ISO-639-1 language code")
|
||||||
|
settings: DialogueSettings | None = Field(None, description="Voice settings")
|
||||||
|
seed: int | None = Field(None, description="Seed for deterministic sampling")
|
||||||
|
apply_text_normalization: str | None = Field(None, description="Text normalization mode: auto, on, off")
|
||||||
1269
comfy_api_nodes/nodes_elevenlabs.py
Normal file
1269
comfy_api_nodes/nodes_elevenlabs.py
Normal file
File diff suppressed because it is too large
Load Diff
@ -9,6 +9,8 @@ from .client import (
|
|||||||
from .conversions import (
|
from .conversions import (
|
||||||
audio_bytes_to_audio_input,
|
audio_bytes_to_audio_input,
|
||||||
audio_input_to_mp3,
|
audio_input_to_mp3,
|
||||||
|
audio_ndarray_to_bytesio,
|
||||||
|
audio_tensor_to_contiguous_ndarray,
|
||||||
audio_to_base64_string,
|
audio_to_base64_string,
|
||||||
bytesio_to_image_tensor,
|
bytesio_to_image_tensor,
|
||||||
convert_mask_to_image,
|
convert_mask_to_image,
|
||||||
@ -74,6 +76,8 @@ __all__ = [
|
|||||||
# Conversions
|
# Conversions
|
||||||
"audio_bytes_to_audio_input",
|
"audio_bytes_to_audio_input",
|
||||||
"audio_input_to_mp3",
|
"audio_input_to_mp3",
|
||||||
|
"audio_ndarray_to_bytesio",
|
||||||
|
"audio_tensor_to_contiguous_ndarray",
|
||||||
"audio_to_base64_string",
|
"audio_to_base64_string",
|
||||||
"bytesio_to_image_tensor",
|
"bytesio_to_image_tensor",
|
||||||
"convert_mask_to_image",
|
"convert_mask_to_image",
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user