ComfyUI/comfy_api_nodes/apis/minimax.py
octo-patch 3c5f87b270 feat(api-nodes): add MiniMax Chat and TTS nodes
- Add MinimaxChatNode for text generation using MiniMax-M2.7 and
  MiniMax-M2.7-highspeed models via OpenAI-compatible API
- Add MinimaxTextToSpeechNode for speech synthesis using speech-2.8-hd
  and speech-2.8-turbo models via MiniMax TTS API
- Add Pydantic request/response models for Chat and TTS APIs to
  comfy_api_nodes/apis/minimax.py
- Add unit tests for all new API models
2026-04-12 18:52:48 +08:00

179 lines
6.9 KiB
Python

from enum import Enum
from typing import Optional
from pydantic import BaseModel, Field
class MinimaxBaseResponse(BaseModel):
status_code: int = Field(
...,
description='Status code. 0 indicates success, other values indicate errors.',
)
status_msg: str = Field(
..., description='Specific error details or success message.'
)
class File(BaseModel):
bytes: Optional[int] = Field(None, description='File size in bytes')
created_at: Optional[int] = Field(
None, description='Unix timestamp when the file was created, in seconds'
)
download_url: Optional[str] = Field(
None, description='The URL to download the video'
)
backup_download_url: Optional[str] = Field(
None, description='The backup URL to download the video'
)
file_id: Optional[int] = Field(None, description='Unique identifier for the file')
filename: Optional[str] = Field(None, description='The name of the file')
purpose: Optional[str] = Field(None, description='The purpose of using the file')
class MinimaxFileRetrieveResponse(BaseModel):
base_resp: MinimaxBaseResponse
file: File
class MiniMaxModel(str, Enum):
T2V_01_Director = 'T2V-01-Director'
I2V_01_Director = 'I2V-01-Director'
S2V_01 = 'S2V-01'
I2V_01 = 'I2V-01'
I2V_01_live = 'I2V-01-live'
T2V_01 = 'T2V-01'
Hailuo_02 = 'MiniMax-Hailuo-02'
class Status6(str, Enum):
Queueing = 'Queueing'
Preparing = 'Preparing'
Processing = 'Processing'
Success = 'Success'
Fail = 'Fail'
class MinimaxTaskResultResponse(BaseModel):
base_resp: MinimaxBaseResponse
file_id: Optional[str] = Field(
None,
description='After the task status changes to Success, this field returns the file ID corresponding to the generated video.',
)
status: Status6 = Field(
...,
description="Task status: 'Queueing' (in queue), 'Preparing' (task is preparing), 'Processing' (generating), 'Success' (task completed successfully), or 'Fail' (task failed).",
)
task_id: str = Field(..., description='The task ID being queried.')
class SubjectReferenceItem(BaseModel):
image: Optional[str] = Field(
None, description='URL or base64 encoding of the subject reference image.'
)
mask: Optional[str] = Field(
None,
description='URL or base64 encoding of the mask for the subject reference image.',
)
class MinimaxVideoGenerationRequest(BaseModel):
callback_url: Optional[str] = Field(
None,
description='Optional. URL to receive real-time status updates about the video generation task.',
)
first_frame_image: Optional[str] = Field(
None,
description='URL or base64 encoding of the first frame image. Required when model is I2V-01, I2V-01-Director, or I2V-01-live.',
)
model: MiniMaxModel = Field(
...,
description='Required. ID of model. Options: T2V-01-Director, I2V-01-Director, S2V-01, I2V-01, I2V-01-live, T2V-01',
)
prompt: Optional[str] = Field(
None,
description='Description of the video. Should be less than 2000 characters. Supports camera movement instructions in [brackets].',
max_length=2000,
)
prompt_optimizer: Optional[bool] = Field(
True,
description='If true (default), the model will automatically optimize the prompt. Set to false for more precise control.',
)
subject_reference: Optional[list[SubjectReferenceItem]] = Field(
None,
description='Only available when model is S2V-01. The model will generate a video based on the subject uploaded through this parameter.',
)
duration: Optional[int] = Field(
None,
description="The length of the output video in seconds."
)
resolution: Optional[str] = Field(
None,
description="The dimensions of the video display. 1080p corresponds to 1920 x 1080 pixels, 768p corresponds to 1366 x 768 pixels."
)
class MinimaxVideoGenerationResponse(BaseModel):
base_resp: MinimaxBaseResponse
task_id: str = Field(
..., description='The task ID for the asynchronous video generation task.'
)
# --- Chat API models ---
class MinimaxChatMessage(BaseModel):
role: str = Field(..., description="The role of the message author: 'user' or 'assistant'.")
content: str = Field(..., description="The content of the message.")
class MinimaxChatRequest(BaseModel):
model: str = Field(..., description="The model ID to use for chat completion.")
messages: list[MinimaxChatMessage] = Field(..., description="A list of messages comprising the conversation.")
temperature: Optional[float] = Field(1.0, description="Sampling temperature in (0.0, 1.0]. Default is 1.0.")
max_tokens: Optional[int] = Field(None, description="Maximum number of tokens to generate.")
stream: Optional[bool] = Field(False, description="Whether to stream partial results.")
class MinimaxChatChoice(BaseModel):
index: int = Field(..., description="Index of this choice.")
message: MinimaxChatMessage = Field(..., description="The generated message.")
finish_reason: Optional[str] = Field(None, description="The reason generation stopped.")
class MinimaxChatUsage(BaseModel):
prompt_tokens: Optional[int] = Field(None)
completion_tokens: Optional[int] = Field(None)
total_tokens: Optional[int] = Field(None)
class MinimaxChatResponse(BaseModel):
id: Optional[str] = Field(None, description="Unique identifier for this completion.")
choices: list[MinimaxChatChoice] = Field(..., description="List of generated choices.")
usage: Optional[MinimaxChatUsage] = Field(None, description="Token usage information.")
model: Optional[str] = Field(None, description="The model used for this completion.")
# --- TTS API models ---
class MinimaxTTSVoiceSetting(BaseModel):
voice_id: str = Field(..., description="The voice ID to use for speech synthesis.")
speed: Optional[float] = Field(1.0, description="Speech speed. 1.0 is normal.")
vol: Optional[float] = Field(1.0, description="Volume. 1.0 is normal.")
pitch: Optional[int] = Field(0, description="Pitch adjustment. 0 is normal.")
class MinimaxTTSAudioSetting(BaseModel):
sample_rate: Optional[int] = Field(32000, description="Audio sample rate in Hz.")
bitrate: Optional[int] = Field(128000, description="Audio bitrate in bps.")
format: Optional[str] = Field("mp3", description="Audio format: 'mp3' or 'pcm'.")
channel: Optional[int] = Field(1, description="Number of audio channels.")
class MinimaxTTSRequest(BaseModel):
model: str = Field(..., description="The TTS model ID to use.")
text: str = Field(..., description="The text to synthesize into speech.")
stream: Optional[bool] = Field(True, description="Whether to stream the audio output.")
voice_setting: MinimaxTTSVoiceSetting = Field(..., description="Voice settings.")
audio_setting: Optional[MinimaxTTSAudioSetting] = Field(None, description="Audio output settings.")