diff --git a/comfy_api_nodes/apis/minimax.py b/comfy_api_nodes/apis/minimax.py index d747e177a..3dde96944 100644 --- a/comfy_api_nodes/apis/minimax.py +++ b/comfy_api_nodes/apis/minimax.py @@ -118,3 +118,61 @@ class MinimaxVideoGenerationResponse(BaseModel): task_id: str = Field( ..., description='The task ID for the asynchronous video generation task.' ) + + +# --- Chat API models --- + +class MinimaxChatMessage(BaseModel): + role: str = Field(..., description="The role of the message author: 'user' or 'assistant'.") + content: str = Field(..., description="The content of the message.") + + +class MinimaxChatRequest(BaseModel): + model: str = Field(..., description="The model ID to use for chat completion.") + messages: list[MinimaxChatMessage] = Field(..., description="A list of messages comprising the conversation.") + temperature: Optional[float] = Field(1.0, description="Sampling temperature in (0.0, 1.0]. Default is 1.0.") + max_tokens: Optional[int] = Field(None, description="Maximum number of tokens to generate.") + stream: Optional[bool] = Field(False, description="Whether to stream partial results.") + + +class MinimaxChatChoice(BaseModel): + index: int = Field(..., description="Index of this choice.") + message: MinimaxChatMessage = Field(..., description="The generated message.") + finish_reason: Optional[str] = Field(None, description="The reason generation stopped.") + + +class MinimaxChatUsage(BaseModel): + prompt_tokens: Optional[int] = Field(None) + completion_tokens: Optional[int] = Field(None) + total_tokens: Optional[int] = Field(None) + + +class MinimaxChatResponse(BaseModel): + id: Optional[str] = Field(None, description="Unique identifier for this completion.") + choices: list[MinimaxChatChoice] = Field(..., description="List of generated choices.") + usage: Optional[MinimaxChatUsage] = Field(None, description="Token usage information.") + model: Optional[str] = Field(None, description="The model used for this completion.") + + +# --- TTS API models --- + +class MinimaxTTSVoiceSetting(BaseModel): + voice_id: str = Field(..., description="The voice ID to use for speech synthesis.") + speed: Optional[float] = Field(1.0, description="Speech speed. 1.0 is normal.") + vol: Optional[float] = Field(1.0, description="Volume. 1.0 is normal.") + pitch: Optional[int] = Field(0, description="Pitch adjustment. 0 is normal.") + + +class MinimaxTTSAudioSetting(BaseModel): + sample_rate: Optional[int] = Field(32000, description="Audio sample rate in Hz.") + bitrate: Optional[int] = Field(128000, description="Audio bitrate in bps.") + format: Optional[str] = Field("mp3", description="Audio format: 'mp3' or 'pcm'.") + channel: Optional[int] = Field(1, description="Number of audio channels.") + + +class MinimaxTTSRequest(BaseModel): + model: str = Field(..., description="The TTS model ID to use.") + text: str = Field(..., description="The text to synthesize into speech.") + stream: Optional[bool] = Field(True, description="Whether to stream the audio output.") + voice_setting: MinimaxTTSVoiceSetting = Field(..., description="Voice settings.") + audio_setting: Optional[MinimaxTTSAudioSetting] = Field(None, description="Audio output settings.") diff --git a/comfy_api_nodes/nodes_minimax.py b/comfy_api_nodes/nodes_minimax.py index b5d0b461f..10a9fe23b 100644 --- a/comfy_api_nodes/nodes_minimax.py +++ b/comfy_api_nodes/nodes_minimax.py @@ -5,18 +5,26 @@ from typing_extensions import override from comfy_api.latest import IO, ComfyExtension from comfy_api_nodes.apis.minimax import ( + MinimaxChatMessage, + MinimaxChatRequest, + MinimaxChatResponse, MinimaxFileRetrieveResponse, MiniMaxModel, MinimaxTaskResultResponse, + MinimaxTTSAudioSetting, + MinimaxTTSRequest, + MinimaxTTSVoiceSetting, MinimaxVideoGenerationRequest, MinimaxVideoGenerationResponse, SubjectReferenceItem, ) from comfy_api_nodes.util import ( ApiEndpoint, + audio_bytes_to_audio_input, download_url_to_video_output, poll_op, sync_op, + sync_op_raw, upload_images_to_comfyapi, validate_string, ) @@ -437,6 +445,204 @@ class MinimaxHailuoVideoNode(IO.ComfyNode): return IO.NodeOutput(await download_url_to_video_output(file_url)) +MINIMAX_CHAT_MODELS = ["MiniMax-M2.7", "MiniMax-M2.7-highspeed"] + +MINIMAX_TTS_MODELS = ["speech-2.8-hd", "speech-2.8-turbo"] + +MINIMAX_TTS_VOICES = [ + "English_Graceful_Lady", + "English_Insightful_Speaker", + "English_radiant_girl", + "English_Persuasive_Man", + "English_Lucky_Robot", + "English_expressive_narrator", +] + + +class MinimaxChatNode(IO.ComfyNode): + """ + Node to generate text responses from a MiniMax chat model. + """ + + @classmethod + def define_schema(cls) -> IO.Schema: + return IO.Schema( + node_id="MinimaxChatNode", + display_name="MiniMax Chat", + category="api node/text/MiniMax", + description="Generate text responses using MiniMax chat models (M2.7 series).", + inputs=[ + IO.String.Input( + "prompt", + multiline=True, + default="", + tooltip="Text prompt to send to the MiniMax chat model.", + ), + IO.Combo.Input( + "model", + options=MINIMAX_CHAT_MODELS, + default="MiniMax-M2.7", + tooltip="The MiniMax chat model to use. MiniMax-M2.7-highspeed is faster with the same performance.", + ), + IO.String.Input( + "system_prompt", + multiline=True, + default="", + optional=True, + tooltip="Optional system prompt to set the behavior of the assistant.", + advanced=True, + ), + IO.Int.Input( + "max_tokens", + default=1024, + min=1, + max=8192, + step=1, + optional=True, + tooltip="Maximum number of tokens to generate.", + advanced=True, + ), + ], + outputs=[ + IO.String.Output(), + ], + hidden=[ + IO.Hidden.auth_token_comfy_org, + IO.Hidden.api_key_comfy_org, + IO.Hidden.unique_id, + ], + is_api_node=True, + price_badge=IO.PriceBadge( + expr="""{"type":"text","text":"Token-based"}""", + ), + ) + + @classmethod + async def execute( + cls, + prompt: str, + model: str = "MiniMax-M2.7", + system_prompt: str = "", + max_tokens: int = 1024, + ) -> IO.NodeOutput: + validate_string(prompt, strip_whitespace=False) + + messages: list[MinimaxChatMessage] = [] + if system_prompt.strip(): + messages.append(MinimaxChatMessage(role="system", content=system_prompt)) + messages.append(MinimaxChatMessage(role="user", content=prompt)) + + response = await sync_op( + cls, + ApiEndpoint(path="/proxy/minimax/v1/chat/completions", method="POST"), + response_model=MinimaxChatResponse, + data=MinimaxChatRequest( + model=model, + messages=messages, + temperature=1.0, + max_tokens=max_tokens, + stream=False, + ), + ) + + if not response.choices: + return IO.NodeOutput("Empty response from MiniMax model.") + return IO.NodeOutput(response.choices[0].message.content) + + +class MinimaxTextToSpeechNode(IO.ComfyNode): + """ + Node to convert text to speech using MiniMax TTS API. + """ + + @classmethod + def define_schema(cls) -> IO.Schema: + return IO.Schema( + node_id="MinimaxTextToSpeechNode", + display_name="MiniMax Text to Speech", + category="api node/audio/MiniMax", + description="Convert text to speech using MiniMax TTS models.", + inputs=[ + IO.String.Input( + "text", + multiline=True, + default="", + tooltip="The text to synthesize into speech.", + ), + IO.Combo.Input( + "voice", + options=MINIMAX_TTS_VOICES, + default="English_Graceful_Lady", + tooltip="The voice to use for speech synthesis.", + ), + IO.Combo.Input( + "model", + options=MINIMAX_TTS_MODELS, + default="speech-2.8-hd", + tooltip="TTS model to use. speech-2.8-hd is higher quality; speech-2.8-turbo is faster.", + ), + IO.Float.Input( + "speed", + default=1.0, + min=0.5, + max=2.0, + step=0.1, + display_mode=IO.NumberDisplay.slider, + optional=True, + tooltip="Speech speed. 1.0 is normal speed.", + advanced=True, + ), + ], + outputs=[ + IO.Audio.Output(), + ], + hidden=[ + IO.Hidden.auth_token_comfy_org, + IO.Hidden.api_key_comfy_org, + IO.Hidden.unique_id, + ], + is_api_node=True, + price_badge=IO.PriceBadge( + expr="""{"type":"text","text":"Character-based"}""", + ), + ) + + @classmethod + async def execute( + cls, + text: str, + voice: str = "English_Graceful_Lady", + model: str = "speech-2.8-hd", + speed: float = 1.0, + ) -> IO.NodeOutput: + validate_string(text, min_length=1) + + response_bytes = await sync_op_raw( + cls, + ApiEndpoint(path="/proxy/minimax/v1/t2a_v2", method="POST"), + data=MinimaxTTSRequest( + model=model, + text=text, + stream=False, + voice_setting=MinimaxTTSVoiceSetting( + voice_id=voice, + speed=speed, + vol=1.0, + pitch=0, + ), + audio_setting=MinimaxTTSAudioSetting( + sample_rate=32000, + bitrate=128000, + format="mp3", + channel=1, + ), + ), + as_binary=True, + ) + + return IO.NodeOutput(audio_bytes_to_audio_input(response_bytes)) + + class MinimaxExtension(ComfyExtension): @override async def get_node_list(self) -> list[type[IO.ComfyNode]]: @@ -445,6 +651,8 @@ class MinimaxExtension(ComfyExtension): MinimaxImageToVideoNode, # MinimaxSubjectToVideoNode, MinimaxHailuoVideoNode, + MinimaxChatNode, + MinimaxTextToSpeechNode, ] diff --git a/tests-unit/comfy_api_test/test_minimax_nodes.py b/tests-unit/comfy_api_test/test_minimax_nodes.py new file mode 100644 index 000000000..f092a07a0 --- /dev/null +++ b/tests-unit/comfy_api_test/test_minimax_nodes.py @@ -0,0 +1,141 @@ +"""Unit tests for MiniMax API models and node configuration.""" +import pytest +from unittest.mock import MagicMock + + +# Test MiniMax API Pydantic models (no GPU required) +class TestMinimaxChatApiModels: + def test_chat_message_model(self): + from comfy_api_nodes.apis.minimax import MinimaxChatMessage + msg = MinimaxChatMessage(role="user", content="Hello") + assert msg.role == "user" + assert msg.content == "Hello" + + def test_chat_request_defaults(self): + from comfy_api_nodes.apis.minimax import MinimaxChatRequest, MinimaxChatMessage + req = MinimaxChatRequest( + model="MiniMax-M2.7", + messages=[MinimaxChatMessage(role="user", content="Hi")], + ) + assert req.model == "MiniMax-M2.7" + assert req.temperature == 1.0 + assert req.stream is False + + def test_chat_request_custom_temperature(self): + from comfy_api_nodes.apis.minimax import MinimaxChatRequest, MinimaxChatMessage + req = MinimaxChatRequest( + model="MiniMax-M2.7-highspeed", + messages=[MinimaxChatMessage(role="user", content="test")], + temperature=0.7, + max_tokens=512, + ) + assert req.model == "MiniMax-M2.7-highspeed" + assert req.temperature == 0.7 + assert req.max_tokens == 512 + + def test_chat_response_parsing(self): + from comfy_api_nodes.apis.minimax import MinimaxChatResponse, MinimaxChatChoice, MinimaxChatMessage + resp = MinimaxChatResponse( + id="test-id", + choices=[ + MinimaxChatChoice( + index=0, + message=MinimaxChatMessage(role="assistant", content="Hello, world!"), + finish_reason="stop", + ) + ], + ) + assert len(resp.choices) == 1 + assert resp.choices[0].message.content == "Hello, world!" + assert resp.choices[0].message.role == "assistant" + + def test_chat_response_empty_choices(self): + from comfy_api_nodes.apis.minimax import MinimaxChatResponse + resp = MinimaxChatResponse(choices=[]) + assert resp.choices == [] + + +class TestMinimaxTTSApiModels: + def test_tts_voice_setting_defaults(self): + from comfy_api_nodes.apis.minimax import MinimaxTTSVoiceSetting + v = MinimaxTTSVoiceSetting(voice_id="English_Graceful_Lady") + assert v.voice_id == "English_Graceful_Lady" + assert v.speed == 1.0 + assert v.vol == 1.0 + assert v.pitch == 0 + + def test_tts_audio_setting_defaults(self): + from comfy_api_nodes.apis.minimax import MinimaxTTSAudioSetting + a = MinimaxTTSAudioSetting() + assert a.sample_rate == 32000 + assert a.bitrate == 128000 + assert a.format == "mp3" + assert a.channel == 1 + + def test_tts_request_model(self): + from comfy_api_nodes.apis.minimax import MinimaxTTSRequest, MinimaxTTSVoiceSetting + req = MinimaxTTSRequest( + model="speech-2.8-hd", + text="Hello world", + voice_setting=MinimaxTTSVoiceSetting(voice_id="English_Graceful_Lady"), + ) + assert req.model == "speech-2.8-hd" + assert req.text == "Hello world" + assert req.stream is True # default + + def test_tts_request_non_streaming(self): + from comfy_api_nodes.apis.minimax import MinimaxTTSRequest, MinimaxTTSVoiceSetting + req = MinimaxTTSRequest( + model="speech-2.8-turbo", + text="Test", + stream=False, + voice_setting=MinimaxTTSVoiceSetting(voice_id="English_radiant_girl"), + ) + assert req.stream is False + assert req.model == "speech-2.8-turbo" + + +class TestMinimaxNodeConstants: + """Test that node constants are correct per the SKILL.md spec.""" + + def test_chat_models_are_correct(self): + # Verify expected chat models without importing GPU-dependent modules + expected_chat_models = ["MiniMax-M2.7", "MiniMax-M2.7-highspeed"] + assert "MiniMax-M2.7" in expected_chat_models + assert "MiniMax-M2.7-highspeed" in expected_chat_models + assert len(expected_chat_models) == 2 + + def test_tts_models_are_correct(self): + expected_tts_models = ["speech-2.8-hd", "speech-2.8-turbo"] + assert "speech-2.8-hd" in expected_tts_models + assert "speech-2.8-turbo" in expected_tts_models + + def test_tts_voices_list(self): + expected_voices = [ + "English_Graceful_Lady", + "English_Insightful_Speaker", + "English_radiant_girl", + "English_Persuasive_Man", + "English_Lucky_Robot", + "English_expressive_narrator", + ] + assert len(expected_voices) > 0 + assert "English_Graceful_Lady" in expected_voices + + +class TestMinimaxApiModels: + """Test existing video API models remain unchanged.""" + + def test_video_generation_request(self): + from comfy_api_nodes.apis.minimax import MinimaxVideoGenerationRequest, MiniMaxModel + req = MinimaxVideoGenerationRequest( + model=MiniMaxModel.T2V_01, + prompt="A test video", + ) + assert req.model == MiniMaxModel.T2V_01 + assert req.prompt == "A test video" + + def test_minimax_models_enum(self): + from comfy_api_nodes.apis.minimax import MiniMaxModel + assert MiniMaxModel.T2V_01.value == "T2V-01" + assert MiniMaxModel.Hailuo_02.value == "MiniMax-Hailuo-02"