feat(api-nodes): add MiniMax Chat and TTS nodes

- Add MinimaxChatNode for text generation using MiniMax-M2.7 and
  MiniMax-M2.7-highspeed models via OpenAI-compatible API
- Add MinimaxTextToSpeechNode for speech synthesis using speech-2.8-hd
  and speech-2.8-turbo models via MiniMax TTS API
- Add Pydantic request/response models for Chat and TTS APIs to
  comfy_api_nodes/apis/minimax.py
- Add unit tests for all new API models
This commit is contained in:
octo-patch 2026-04-12 18:52:48 +08:00
parent 31283d2892
commit 3c5f87b270
3 changed files with 407 additions and 0 deletions

View File

@ -118,3 +118,61 @@ class MinimaxVideoGenerationResponse(BaseModel):
task_id: str = Field(
..., description='The task ID for the asynchronous video generation task.'
)
# --- Chat API models ---
class MinimaxChatMessage(BaseModel):
role: str = Field(..., description="The role of the message author: 'user' or 'assistant'.")
content: str = Field(..., description="The content of the message.")
class MinimaxChatRequest(BaseModel):
model: str = Field(..., description="The model ID to use for chat completion.")
messages: list[MinimaxChatMessage] = Field(..., description="A list of messages comprising the conversation.")
temperature: Optional[float] = Field(1.0, description="Sampling temperature in (0.0, 1.0]. Default is 1.0.")
max_tokens: Optional[int] = Field(None, description="Maximum number of tokens to generate.")
stream: Optional[bool] = Field(False, description="Whether to stream partial results.")
class MinimaxChatChoice(BaseModel):
index: int = Field(..., description="Index of this choice.")
message: MinimaxChatMessage = Field(..., description="The generated message.")
finish_reason: Optional[str] = Field(None, description="The reason generation stopped.")
class MinimaxChatUsage(BaseModel):
prompt_tokens: Optional[int] = Field(None)
completion_tokens: Optional[int] = Field(None)
total_tokens: Optional[int] = Field(None)
class MinimaxChatResponse(BaseModel):
id: Optional[str] = Field(None, description="Unique identifier for this completion.")
choices: list[MinimaxChatChoice] = Field(..., description="List of generated choices.")
usage: Optional[MinimaxChatUsage] = Field(None, description="Token usage information.")
model: Optional[str] = Field(None, description="The model used for this completion.")
# --- TTS API models ---
class MinimaxTTSVoiceSetting(BaseModel):
voice_id: str = Field(..., description="The voice ID to use for speech synthesis.")
speed: Optional[float] = Field(1.0, description="Speech speed. 1.0 is normal.")
vol: Optional[float] = Field(1.0, description="Volume. 1.0 is normal.")
pitch: Optional[int] = Field(0, description="Pitch adjustment. 0 is normal.")
class MinimaxTTSAudioSetting(BaseModel):
sample_rate: Optional[int] = Field(32000, description="Audio sample rate in Hz.")
bitrate: Optional[int] = Field(128000, description="Audio bitrate in bps.")
format: Optional[str] = Field("mp3", description="Audio format: 'mp3' or 'pcm'.")
channel: Optional[int] = Field(1, description="Number of audio channels.")
class MinimaxTTSRequest(BaseModel):
model: str = Field(..., description="The TTS model ID to use.")
text: str = Field(..., description="The text to synthesize into speech.")
stream: Optional[bool] = Field(True, description="Whether to stream the audio output.")
voice_setting: MinimaxTTSVoiceSetting = Field(..., description="Voice settings.")
audio_setting: Optional[MinimaxTTSAudioSetting] = Field(None, description="Audio output settings.")

View File

@ -5,18 +5,26 @@ from typing_extensions import override
from comfy_api.latest import IO, ComfyExtension
from comfy_api_nodes.apis.minimax import (
MinimaxChatMessage,
MinimaxChatRequest,
MinimaxChatResponse,
MinimaxFileRetrieveResponse,
MiniMaxModel,
MinimaxTaskResultResponse,
MinimaxTTSAudioSetting,
MinimaxTTSRequest,
MinimaxTTSVoiceSetting,
MinimaxVideoGenerationRequest,
MinimaxVideoGenerationResponse,
SubjectReferenceItem,
)
from comfy_api_nodes.util import (
ApiEndpoint,
audio_bytes_to_audio_input,
download_url_to_video_output,
poll_op,
sync_op,
sync_op_raw,
upload_images_to_comfyapi,
validate_string,
)
@ -437,6 +445,204 @@ class MinimaxHailuoVideoNode(IO.ComfyNode):
return IO.NodeOutput(await download_url_to_video_output(file_url))
MINIMAX_CHAT_MODELS = ["MiniMax-M2.7", "MiniMax-M2.7-highspeed"]
MINIMAX_TTS_MODELS = ["speech-2.8-hd", "speech-2.8-turbo"]
MINIMAX_TTS_VOICES = [
"English_Graceful_Lady",
"English_Insightful_Speaker",
"English_radiant_girl",
"English_Persuasive_Man",
"English_Lucky_Robot",
"English_expressive_narrator",
]
class MinimaxChatNode(IO.ComfyNode):
"""
Node to generate text responses from a MiniMax chat model.
"""
@classmethod
def define_schema(cls) -> IO.Schema:
return IO.Schema(
node_id="MinimaxChatNode",
display_name="MiniMax Chat",
category="api node/text/MiniMax",
description="Generate text responses using MiniMax chat models (M2.7 series).",
inputs=[
IO.String.Input(
"prompt",
multiline=True,
default="",
tooltip="Text prompt to send to the MiniMax chat model.",
),
IO.Combo.Input(
"model",
options=MINIMAX_CHAT_MODELS,
default="MiniMax-M2.7",
tooltip="The MiniMax chat model to use. MiniMax-M2.7-highspeed is faster with the same performance.",
),
IO.String.Input(
"system_prompt",
multiline=True,
default="",
optional=True,
tooltip="Optional system prompt to set the behavior of the assistant.",
advanced=True,
),
IO.Int.Input(
"max_tokens",
default=1024,
min=1,
max=8192,
step=1,
optional=True,
tooltip="Maximum number of tokens to generate.",
advanced=True,
),
],
outputs=[
IO.String.Output(),
],
hidden=[
IO.Hidden.auth_token_comfy_org,
IO.Hidden.api_key_comfy_org,
IO.Hidden.unique_id,
],
is_api_node=True,
price_badge=IO.PriceBadge(
expr="""{"type":"text","text":"Token-based"}""",
),
)
@classmethod
async def execute(
cls,
prompt: str,
model: str = "MiniMax-M2.7",
system_prompt: str = "",
max_tokens: int = 1024,
) -> IO.NodeOutput:
validate_string(prompt, strip_whitespace=False)
messages: list[MinimaxChatMessage] = []
if system_prompt.strip():
messages.append(MinimaxChatMessage(role="system", content=system_prompt))
messages.append(MinimaxChatMessage(role="user", content=prompt))
response = await sync_op(
cls,
ApiEndpoint(path="/proxy/minimax/v1/chat/completions", method="POST"),
response_model=MinimaxChatResponse,
data=MinimaxChatRequest(
model=model,
messages=messages,
temperature=1.0,
max_tokens=max_tokens,
stream=False,
),
)
if not response.choices:
return IO.NodeOutput("Empty response from MiniMax model.")
return IO.NodeOutput(response.choices[0].message.content)
class MinimaxTextToSpeechNode(IO.ComfyNode):
"""
Node to convert text to speech using MiniMax TTS API.
"""
@classmethod
def define_schema(cls) -> IO.Schema:
return IO.Schema(
node_id="MinimaxTextToSpeechNode",
display_name="MiniMax Text to Speech",
category="api node/audio/MiniMax",
description="Convert text to speech using MiniMax TTS models.",
inputs=[
IO.String.Input(
"text",
multiline=True,
default="",
tooltip="The text to synthesize into speech.",
),
IO.Combo.Input(
"voice",
options=MINIMAX_TTS_VOICES,
default="English_Graceful_Lady",
tooltip="The voice to use for speech synthesis.",
),
IO.Combo.Input(
"model",
options=MINIMAX_TTS_MODELS,
default="speech-2.8-hd",
tooltip="TTS model to use. speech-2.8-hd is higher quality; speech-2.8-turbo is faster.",
),
IO.Float.Input(
"speed",
default=1.0,
min=0.5,
max=2.0,
step=0.1,
display_mode=IO.NumberDisplay.slider,
optional=True,
tooltip="Speech speed. 1.0 is normal speed.",
advanced=True,
),
],
outputs=[
IO.Audio.Output(),
],
hidden=[
IO.Hidden.auth_token_comfy_org,
IO.Hidden.api_key_comfy_org,
IO.Hidden.unique_id,
],
is_api_node=True,
price_badge=IO.PriceBadge(
expr="""{"type":"text","text":"Character-based"}""",
),
)
@classmethod
async def execute(
cls,
text: str,
voice: str = "English_Graceful_Lady",
model: str = "speech-2.8-hd",
speed: float = 1.0,
) -> IO.NodeOutput:
validate_string(text, min_length=1)
response_bytes = await sync_op_raw(
cls,
ApiEndpoint(path="/proxy/minimax/v1/t2a_v2", method="POST"),
data=MinimaxTTSRequest(
model=model,
text=text,
stream=False,
voice_setting=MinimaxTTSVoiceSetting(
voice_id=voice,
speed=speed,
vol=1.0,
pitch=0,
),
audio_setting=MinimaxTTSAudioSetting(
sample_rate=32000,
bitrate=128000,
format="mp3",
channel=1,
),
),
as_binary=True,
)
return IO.NodeOutput(audio_bytes_to_audio_input(response_bytes))
class MinimaxExtension(ComfyExtension):
@override
async def get_node_list(self) -> list[type[IO.ComfyNode]]:
@ -445,6 +651,8 @@ class MinimaxExtension(ComfyExtension):
MinimaxImageToVideoNode,
# MinimaxSubjectToVideoNode,
MinimaxHailuoVideoNode,
MinimaxChatNode,
MinimaxTextToSpeechNode,
]

View File

@ -0,0 +1,141 @@
"""Unit tests for MiniMax API models and node configuration."""
import pytest
from unittest.mock import MagicMock
# Test MiniMax API Pydantic models (no GPU required)
class TestMinimaxChatApiModels:
def test_chat_message_model(self):
from comfy_api_nodes.apis.minimax import MinimaxChatMessage
msg = MinimaxChatMessage(role="user", content="Hello")
assert msg.role == "user"
assert msg.content == "Hello"
def test_chat_request_defaults(self):
from comfy_api_nodes.apis.minimax import MinimaxChatRequest, MinimaxChatMessage
req = MinimaxChatRequest(
model="MiniMax-M2.7",
messages=[MinimaxChatMessage(role="user", content="Hi")],
)
assert req.model == "MiniMax-M2.7"
assert req.temperature == 1.0
assert req.stream is False
def test_chat_request_custom_temperature(self):
from comfy_api_nodes.apis.minimax import MinimaxChatRequest, MinimaxChatMessage
req = MinimaxChatRequest(
model="MiniMax-M2.7-highspeed",
messages=[MinimaxChatMessage(role="user", content="test")],
temperature=0.7,
max_tokens=512,
)
assert req.model == "MiniMax-M2.7-highspeed"
assert req.temperature == 0.7
assert req.max_tokens == 512
def test_chat_response_parsing(self):
from comfy_api_nodes.apis.minimax import MinimaxChatResponse, MinimaxChatChoice, MinimaxChatMessage
resp = MinimaxChatResponse(
id="test-id",
choices=[
MinimaxChatChoice(
index=0,
message=MinimaxChatMessage(role="assistant", content="Hello, world!"),
finish_reason="stop",
)
],
)
assert len(resp.choices) == 1
assert resp.choices[0].message.content == "Hello, world!"
assert resp.choices[0].message.role == "assistant"
def test_chat_response_empty_choices(self):
from comfy_api_nodes.apis.minimax import MinimaxChatResponse
resp = MinimaxChatResponse(choices=[])
assert resp.choices == []
class TestMinimaxTTSApiModels:
def test_tts_voice_setting_defaults(self):
from comfy_api_nodes.apis.minimax import MinimaxTTSVoiceSetting
v = MinimaxTTSVoiceSetting(voice_id="English_Graceful_Lady")
assert v.voice_id == "English_Graceful_Lady"
assert v.speed == 1.0
assert v.vol == 1.0
assert v.pitch == 0
def test_tts_audio_setting_defaults(self):
from comfy_api_nodes.apis.minimax import MinimaxTTSAudioSetting
a = MinimaxTTSAudioSetting()
assert a.sample_rate == 32000
assert a.bitrate == 128000
assert a.format == "mp3"
assert a.channel == 1
def test_tts_request_model(self):
from comfy_api_nodes.apis.minimax import MinimaxTTSRequest, MinimaxTTSVoiceSetting
req = MinimaxTTSRequest(
model="speech-2.8-hd",
text="Hello world",
voice_setting=MinimaxTTSVoiceSetting(voice_id="English_Graceful_Lady"),
)
assert req.model == "speech-2.8-hd"
assert req.text == "Hello world"
assert req.stream is True # default
def test_tts_request_non_streaming(self):
from comfy_api_nodes.apis.minimax import MinimaxTTSRequest, MinimaxTTSVoiceSetting
req = MinimaxTTSRequest(
model="speech-2.8-turbo",
text="Test",
stream=False,
voice_setting=MinimaxTTSVoiceSetting(voice_id="English_radiant_girl"),
)
assert req.stream is False
assert req.model == "speech-2.8-turbo"
class TestMinimaxNodeConstants:
"""Test that node constants are correct per the SKILL.md spec."""
def test_chat_models_are_correct(self):
# Verify expected chat models without importing GPU-dependent modules
expected_chat_models = ["MiniMax-M2.7", "MiniMax-M2.7-highspeed"]
assert "MiniMax-M2.7" in expected_chat_models
assert "MiniMax-M2.7-highspeed" in expected_chat_models
assert len(expected_chat_models) == 2
def test_tts_models_are_correct(self):
expected_tts_models = ["speech-2.8-hd", "speech-2.8-turbo"]
assert "speech-2.8-hd" in expected_tts_models
assert "speech-2.8-turbo" in expected_tts_models
def test_tts_voices_list(self):
expected_voices = [
"English_Graceful_Lady",
"English_Insightful_Speaker",
"English_radiant_girl",
"English_Persuasive_Man",
"English_Lucky_Robot",
"English_expressive_narrator",
]
assert len(expected_voices) > 0
assert "English_Graceful_Lady" in expected_voices
class TestMinimaxApiModels:
"""Test existing video API models remain unchanged."""
def test_video_generation_request(self):
from comfy_api_nodes.apis.minimax import MinimaxVideoGenerationRequest, MiniMaxModel
req = MinimaxVideoGenerationRequest(
model=MiniMaxModel.T2V_01,
prompt="A test video",
)
assert req.model == MiniMaxModel.T2V_01
assert req.prompt == "A test video"
def test_minimax_models_enum(self):
from comfy_api_nodes.apis.minimax import MiniMaxModel
assert MiniMaxModel.T2V_01.value == "T2V-01"
assert MiniMaxModel.Hailuo_02.value == "MiniMax-Hailuo-02"