feat(api-nodes): add MiniMax Chat and TTS nodes

- Add MinimaxChatNode for text generation using MiniMax-M2.7 and MiniMax-M2.7-highspeed models via OpenAI-compatible API - Add MinimaxTextToSpeechNode for speech synthesis using speech-2.8-hd and speech-2.8-turbo models via MiniMax TTS API - Add Pydantic request/response models for Chat and TTS APIs to comfy_api_nodes/apis/minimax.py - Add unit tests for all new API models
2026-05-17 20:47:31 +08:00 · 2026-04-12 18:52:48 +08:00 · 2026-04-12 18:52:48 +08:00 · 3c5f87b270
commit 3c5f87b270
parent 31283d2892
3 changed files with 407 additions and 0 deletions
--- a/comfy_api_nodes/apis/minimax.py
+++ b/comfy_api_nodes/apis/minimax.py
@ -118,3 +118,61 @@ class MinimaxVideoGenerationResponse(BaseModel):
    task_id: str = Field(
        ..., description='The task ID for the asynchronous video generation task.'
    )
+
+
+# --- Chat API models ---
+
+class MinimaxChatMessage(BaseModel):
+    role: str = Field(..., description="The role of the message author: 'user' or 'assistant'.")
+    content: str = Field(..., description="The content of the message.")
+
+
+class MinimaxChatRequest(BaseModel):
+    model: str = Field(..., description="The model ID to use for chat completion.")
+    messages: list[MinimaxChatMessage] = Field(..., description="A list of messages comprising the conversation.")
+    temperature: Optional[float] = Field(1.0, description="Sampling temperature in (0.0, 1.0]. Default is 1.0.")
+    max_tokens: Optional[int] = Field(None, description="Maximum number of tokens to generate.")
+    stream: Optional[bool] = Field(False, description="Whether to stream partial results.")
+
+
+class MinimaxChatChoice(BaseModel):
+    index: int = Field(..., description="Index of this choice.")
+    message: MinimaxChatMessage = Field(..., description="The generated message.")
+    finish_reason: Optional[str] = Field(None, description="The reason generation stopped.")
+
+
+class MinimaxChatUsage(BaseModel):
+    prompt_tokens: Optional[int] = Field(None)
+    completion_tokens: Optional[int] = Field(None)
+    total_tokens: Optional[int] = Field(None)
+
+
+class MinimaxChatResponse(BaseModel):
+    id: Optional[str] = Field(None, description="Unique identifier for this completion.")
+    choices: list[MinimaxChatChoice] = Field(..., description="List of generated choices.")
+    usage: Optional[MinimaxChatUsage] = Field(None, description="Token usage information.")
+    model: Optional[str] = Field(None, description="The model used for this completion.")
+
+
+# --- TTS API models ---
+
+class MinimaxTTSVoiceSetting(BaseModel):
+    voice_id: str = Field(..., description="The voice ID to use for speech synthesis.")
+    speed: Optional[float] = Field(1.0, description="Speech speed. 1.0 is normal.")
+    vol: Optional[float] = Field(1.0, description="Volume. 1.0 is normal.")
+    pitch: Optional[int] = Field(0, description="Pitch adjustment. 0 is normal.")
+
+
+class MinimaxTTSAudioSetting(BaseModel):
+    sample_rate: Optional[int] = Field(32000, description="Audio sample rate in Hz.")
+    bitrate: Optional[int] = Field(128000, description="Audio bitrate in bps.")
+    format: Optional[str] = Field("mp3", description="Audio format: 'mp3' or 'pcm'.")
+    channel: Optional[int] = Field(1, description="Number of audio channels.")
+
+
+class MinimaxTTSRequest(BaseModel):
+    model: str = Field(..., description="The TTS model ID to use.")
+    text: str = Field(..., description="The text to synthesize into speech.")
+    stream: Optional[bool] = Field(True, description="Whether to stream the audio output.")
+    voice_setting: MinimaxTTSVoiceSetting = Field(..., description="Voice settings.")
+    audio_setting: Optional[MinimaxTTSAudioSetting] = Field(None, description="Audio output settings.")
--- a/comfy_api_nodes/nodes_minimax.py
+++ b/comfy_api_nodes/nodes_minimax.py
@ -5,18 +5,26 @@ from typing_extensions import override

 from comfy_api.latest import IO, ComfyExtension
 from comfy_api_nodes.apis.minimax import (
+    MinimaxChatMessage,
+    MinimaxChatRequest,
+    MinimaxChatResponse,
    MinimaxFileRetrieveResponse,
    MiniMaxModel,
    MinimaxTaskResultResponse,
+    MinimaxTTSAudioSetting,
+    MinimaxTTSRequest,
+    MinimaxTTSVoiceSetting,
    MinimaxVideoGenerationRequest,
    MinimaxVideoGenerationResponse,
    SubjectReferenceItem,
 )
 from comfy_api_nodes.util import (
    ApiEndpoint,
+    audio_bytes_to_audio_input,
    download_url_to_video_output,
    poll_op,
    sync_op,
+    sync_op_raw,
    upload_images_to_comfyapi,
    validate_string,
 )
@ -437,6 +445,204 @@ class MinimaxHailuoVideoNode(IO.ComfyNode):
        return IO.NodeOutput(await download_url_to_video_output(file_url))


+MINIMAX_CHAT_MODELS = ["MiniMax-M2.7", "MiniMax-M2.7-highspeed"]
+
+MINIMAX_TTS_MODELS = ["speech-2.8-hd", "speech-2.8-turbo"]
+
+MINIMAX_TTS_VOICES = [
+    "English_Graceful_Lady",
+    "English_Insightful_Speaker",
+    "English_radiant_girl",
+    "English_Persuasive_Man",
+    "English_Lucky_Robot",
+    "English_expressive_narrator",
+]
+
+
+class MinimaxChatNode(IO.ComfyNode):
+    """
+    Node to generate text responses from a MiniMax chat model.
+    """
+
+    @classmethod
+    def define_schema(cls) -> IO.Schema:
+        return IO.Schema(
+            node_id="MinimaxChatNode",
+            display_name="MiniMax Chat",
+            category="api node/text/MiniMax",
+            description="Generate text responses using MiniMax chat models (M2.7 series).",
+            inputs=[
+                IO.String.Input(
+                    "prompt",
+                    multiline=True,
+                    default="",
+                    tooltip="Text prompt to send to the MiniMax chat model.",
+                ),
+                IO.Combo.Input(
+                    "model",
+                    options=MINIMAX_CHAT_MODELS,
+                    default="MiniMax-M2.7",
+                    tooltip="The MiniMax chat model to use. MiniMax-M2.7-highspeed is faster with the same performance.",
+                ),
+                IO.String.Input(
+                    "system_prompt",
+                    multiline=True,
+                    default="",
+                    optional=True,
+                    tooltip="Optional system prompt to set the behavior of the assistant.",
+                    advanced=True,
+                ),
+                IO.Int.Input(
+                    "max_tokens",
+                    default=1024,
+                    min=1,
+                    max=8192,
+                    step=1,
+                    optional=True,
+                    tooltip="Maximum number of tokens to generate.",
+                    advanced=True,
+                ),
+            ],
+            outputs=[
+                IO.String.Output(),
+            ],
+            hidden=[
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
+            ],
+            is_api_node=True,
+            price_badge=IO.PriceBadge(
+                expr="""{"type":"text","text":"Token-based"}""",
+            ),
+        )
+
+    @classmethod
+    async def execute(
+        cls,
+        prompt: str,
+        model: str = "MiniMax-M2.7",
+        system_prompt: str = "",
+        max_tokens: int = 1024,
+    ) -> IO.NodeOutput:
+        validate_string(prompt, strip_whitespace=False)
+
+        messages: list[MinimaxChatMessage] = []
+        if system_prompt.strip():
+            messages.append(MinimaxChatMessage(role="system", content=system_prompt))
+        messages.append(MinimaxChatMessage(role="user", content=prompt))
+
+        response = await sync_op(
+            cls,
+            ApiEndpoint(path="/proxy/minimax/v1/chat/completions", method="POST"),
+            response_model=MinimaxChatResponse,
+            data=MinimaxChatRequest(
+                model=model,
+                messages=messages,
+                temperature=1.0,
+                max_tokens=max_tokens,
+                stream=False,
+            ),
+        )
+
+        if not response.choices:
+            return IO.NodeOutput("Empty response from MiniMax model.")
+        return IO.NodeOutput(response.choices[0].message.content)
+
+
+class MinimaxTextToSpeechNode(IO.ComfyNode):
+    """
+    Node to convert text to speech using MiniMax TTS API.
+    """
+
+    @classmethod
+    def define_schema(cls) -> IO.Schema:
+        return IO.Schema(
+            node_id="MinimaxTextToSpeechNode",
+            display_name="MiniMax Text to Speech",
+            category="api node/audio/MiniMax",
+            description="Convert text to speech using MiniMax TTS models.",
+            inputs=[
+                IO.String.Input(
+                    "text",
+                    multiline=True,
+                    default="",
+                    tooltip="The text to synthesize into speech.",
+                ),
+                IO.Combo.Input(
+                    "voice",
+                    options=MINIMAX_TTS_VOICES,
+                    default="English_Graceful_Lady",
+                    tooltip="The voice to use for speech synthesis.",
+                ),
+                IO.Combo.Input(
+                    "model",
+                    options=MINIMAX_TTS_MODELS,
+                    default="speech-2.8-hd",
+                    tooltip="TTS model to use. speech-2.8-hd is higher quality; speech-2.8-turbo is faster.",
+                ),
+                IO.Float.Input(
+                    "speed",
+                    default=1.0,
+                    min=0.5,
+                    max=2.0,
+                    step=0.1,
+                    display_mode=IO.NumberDisplay.slider,
+                    optional=True,
+                    tooltip="Speech speed. 1.0 is normal speed.",
+                    advanced=True,
+                ),
+            ],
+            outputs=[
+                IO.Audio.Output(),
+            ],
+            hidden=[
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
+            ],
+            is_api_node=True,
+            price_badge=IO.PriceBadge(
+                expr="""{"type":"text","text":"Character-based"}""",
+            ),
+        )
+
+    @classmethod
+    async def execute(
+        cls,
+        text: str,
+        voice: str = "English_Graceful_Lady",
+        model: str = "speech-2.8-hd",
+        speed: float = 1.0,
+    ) -> IO.NodeOutput:
+        validate_string(text, min_length=1)
+
+        response_bytes = await sync_op_raw(
+            cls,
+            ApiEndpoint(path="/proxy/minimax/v1/t2a_v2", method="POST"),
+            data=MinimaxTTSRequest(
+                model=model,
+                text=text,
+                stream=False,
+                voice_setting=MinimaxTTSVoiceSetting(
+                    voice_id=voice,
+                    speed=speed,
+                    vol=1.0,
+                    pitch=0,
+                ),
+                audio_setting=MinimaxTTSAudioSetting(
+                    sample_rate=32000,
+                    bitrate=128000,
+                    format="mp3",
+                    channel=1,
+                ),
+            ),
+            as_binary=True,
+        )
+
+        return IO.NodeOutput(audio_bytes_to_audio_input(response_bytes))
+
+
 class MinimaxExtension(ComfyExtension):
    @override
    async def get_node_list(self) -> list[type[IO.ComfyNode]]:
@ -445,6 +651,8 @@ class MinimaxExtension(ComfyExtension):
            MinimaxImageToVideoNode,
            # MinimaxSubjectToVideoNode,
            MinimaxHailuoVideoNode,
+            MinimaxChatNode,
+            MinimaxTextToSpeechNode,
        ]


--- a/tests-unit/comfy_api_test/test_minimax_nodes.py
+++ b/tests-unit/comfy_api_test/test_minimax_nodes.py
@ -0,0 +1,141 @@
+"""Unit tests for MiniMax API models and node configuration."""
+import pytest
+from unittest.mock import MagicMock
+
+
+# Test MiniMax API Pydantic models (no GPU required)
+class TestMinimaxChatApiModels:
+    def test_chat_message_model(self):
+        from comfy_api_nodes.apis.minimax import MinimaxChatMessage
+        msg = MinimaxChatMessage(role="user", content="Hello")
+        assert msg.role == "user"
+        assert msg.content == "Hello"
+
+    def test_chat_request_defaults(self):
+        from comfy_api_nodes.apis.minimax import MinimaxChatRequest, MinimaxChatMessage
+        req = MinimaxChatRequest(
+            model="MiniMax-M2.7",
+            messages=[MinimaxChatMessage(role="user", content="Hi")],
+        )
+        assert req.model == "MiniMax-M2.7"
+        assert req.temperature == 1.0
+        assert req.stream is False
+
+    def test_chat_request_custom_temperature(self):
+        from comfy_api_nodes.apis.minimax import MinimaxChatRequest, MinimaxChatMessage
+        req = MinimaxChatRequest(
+            model="MiniMax-M2.7-highspeed",
+            messages=[MinimaxChatMessage(role="user", content="test")],
+            temperature=0.7,
+            max_tokens=512,
+        )
+        assert req.model == "MiniMax-M2.7-highspeed"
+        assert req.temperature == 0.7
+        assert req.max_tokens == 512
+
+    def test_chat_response_parsing(self):
+        from comfy_api_nodes.apis.minimax import MinimaxChatResponse, MinimaxChatChoice, MinimaxChatMessage
+        resp = MinimaxChatResponse(
+            id="test-id",
+            choices=[
+                MinimaxChatChoice(
+                    index=0,
+                    message=MinimaxChatMessage(role="assistant", content="Hello, world!"),
+                    finish_reason="stop",
+                )
+            ],
+        )
+        assert len(resp.choices) == 1
+        assert resp.choices[0].message.content == "Hello, world!"
+        assert resp.choices[0].message.role == "assistant"
+
+    def test_chat_response_empty_choices(self):
+        from comfy_api_nodes.apis.minimax import MinimaxChatResponse
+        resp = MinimaxChatResponse(choices=[])
+        assert resp.choices == []
+
+
+class TestMinimaxTTSApiModels:
+    def test_tts_voice_setting_defaults(self):
+        from comfy_api_nodes.apis.minimax import MinimaxTTSVoiceSetting
+        v = MinimaxTTSVoiceSetting(voice_id="English_Graceful_Lady")
+        assert v.voice_id == "English_Graceful_Lady"
+        assert v.speed == 1.0
+        assert v.vol == 1.0
+        assert v.pitch == 0
+
+    def test_tts_audio_setting_defaults(self):
+        from comfy_api_nodes.apis.minimax import MinimaxTTSAudioSetting
+        a = MinimaxTTSAudioSetting()
+        assert a.sample_rate == 32000
+        assert a.bitrate == 128000
+        assert a.format == "mp3"
+        assert a.channel == 1
+
+    def test_tts_request_model(self):
+        from comfy_api_nodes.apis.minimax import MinimaxTTSRequest, MinimaxTTSVoiceSetting
+        req = MinimaxTTSRequest(
+            model="speech-2.8-hd",
+            text="Hello world",
+            voice_setting=MinimaxTTSVoiceSetting(voice_id="English_Graceful_Lady"),
+        )
+        assert req.model == "speech-2.8-hd"
+        assert req.text == "Hello world"
+        assert req.stream is True  # default
+
+    def test_tts_request_non_streaming(self):
+        from comfy_api_nodes.apis.minimax import MinimaxTTSRequest, MinimaxTTSVoiceSetting
+        req = MinimaxTTSRequest(
+            model="speech-2.8-turbo",
+            text="Test",
+            stream=False,
+            voice_setting=MinimaxTTSVoiceSetting(voice_id="English_radiant_girl"),
+        )
+        assert req.stream is False
+        assert req.model == "speech-2.8-turbo"
+
+
+class TestMinimaxNodeConstants:
+    """Test that node constants are correct per the SKILL.md spec."""
+
+    def test_chat_models_are_correct(self):
+        # Verify expected chat models without importing GPU-dependent modules
+        expected_chat_models = ["MiniMax-M2.7", "MiniMax-M2.7-highspeed"]
+        assert "MiniMax-M2.7" in expected_chat_models
+        assert "MiniMax-M2.7-highspeed" in expected_chat_models
+        assert len(expected_chat_models) == 2
+
+    def test_tts_models_are_correct(self):
+        expected_tts_models = ["speech-2.8-hd", "speech-2.8-turbo"]
+        assert "speech-2.8-hd" in expected_tts_models
+        assert "speech-2.8-turbo" in expected_tts_models
+
+    def test_tts_voices_list(self):
+        expected_voices = [
+            "English_Graceful_Lady",
+            "English_Insightful_Speaker",
+            "English_radiant_girl",
+            "English_Persuasive_Man",
+            "English_Lucky_Robot",
+            "English_expressive_narrator",
+        ]
+        assert len(expected_voices) > 0
+        assert "English_Graceful_Lady" in expected_voices
+
+
+class TestMinimaxApiModels:
+    """Test existing video API models remain unchanged."""
+
+    def test_video_generation_request(self):
+        from comfy_api_nodes.apis.minimax import MinimaxVideoGenerationRequest, MiniMaxModel
+        req = MinimaxVideoGenerationRequest(
+            model=MiniMaxModel.T2V_01,
+            prompt="A test video",
+        )
+        assert req.model == MiniMaxModel.T2V_01
+        assert req.prompt == "A test video"
+
+    def test_minimax_models_enum(self):
+        from comfy_api_nodes.apis.minimax import MiniMaxModel
+        assert MiniMaxModel.T2V_01.value == "T2V-01"
+        assert MiniMaxModel.Hailuo_02.value == "MiniMax-Hailuo-02"