mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2026-04-27 10:52:31 +08:00
feat(api-nodes): add MiniMax Chat and TTS nodes
- Add MinimaxChatNode for text generation using MiniMax-M2.7 and MiniMax-M2.7-highspeed models via OpenAI-compatible API - Add MinimaxTextToSpeechNode for speech synthesis using speech-2.8-hd and speech-2.8-turbo models via MiniMax TTS API - Add Pydantic request/response models for Chat and TTS APIs to comfy_api_nodes/apis/minimax.py - Add unit tests for all new API models
This commit is contained in:
parent
31283d2892
commit
3c5f87b270
@ -118,3 +118,61 @@ class MinimaxVideoGenerationResponse(BaseModel):
|
||||
task_id: str = Field(
|
||||
..., description='The task ID for the asynchronous video generation task.'
|
||||
)
|
||||
|
||||
|
||||
# --- Chat API models ---
|
||||
|
||||
class MinimaxChatMessage(BaseModel):
|
||||
role: str = Field(..., description="The role of the message author: 'user' or 'assistant'.")
|
||||
content: str = Field(..., description="The content of the message.")
|
||||
|
||||
|
||||
class MinimaxChatRequest(BaseModel):
|
||||
model: str = Field(..., description="The model ID to use for chat completion.")
|
||||
messages: list[MinimaxChatMessage] = Field(..., description="A list of messages comprising the conversation.")
|
||||
temperature: Optional[float] = Field(1.0, description="Sampling temperature in (0.0, 1.0]. Default is 1.0.")
|
||||
max_tokens: Optional[int] = Field(None, description="Maximum number of tokens to generate.")
|
||||
stream: Optional[bool] = Field(False, description="Whether to stream partial results.")
|
||||
|
||||
|
||||
class MinimaxChatChoice(BaseModel):
|
||||
index: int = Field(..., description="Index of this choice.")
|
||||
message: MinimaxChatMessage = Field(..., description="The generated message.")
|
||||
finish_reason: Optional[str] = Field(None, description="The reason generation stopped.")
|
||||
|
||||
|
||||
class MinimaxChatUsage(BaseModel):
|
||||
prompt_tokens: Optional[int] = Field(None)
|
||||
completion_tokens: Optional[int] = Field(None)
|
||||
total_tokens: Optional[int] = Field(None)
|
||||
|
||||
|
||||
class MinimaxChatResponse(BaseModel):
|
||||
id: Optional[str] = Field(None, description="Unique identifier for this completion.")
|
||||
choices: list[MinimaxChatChoice] = Field(..., description="List of generated choices.")
|
||||
usage: Optional[MinimaxChatUsage] = Field(None, description="Token usage information.")
|
||||
model: Optional[str] = Field(None, description="The model used for this completion.")
|
||||
|
||||
|
||||
# --- TTS API models ---
|
||||
|
||||
class MinimaxTTSVoiceSetting(BaseModel):
|
||||
voice_id: str = Field(..., description="The voice ID to use for speech synthesis.")
|
||||
speed: Optional[float] = Field(1.0, description="Speech speed. 1.0 is normal.")
|
||||
vol: Optional[float] = Field(1.0, description="Volume. 1.0 is normal.")
|
||||
pitch: Optional[int] = Field(0, description="Pitch adjustment. 0 is normal.")
|
||||
|
||||
|
||||
class MinimaxTTSAudioSetting(BaseModel):
|
||||
sample_rate: Optional[int] = Field(32000, description="Audio sample rate in Hz.")
|
||||
bitrate: Optional[int] = Field(128000, description="Audio bitrate in bps.")
|
||||
format: Optional[str] = Field("mp3", description="Audio format: 'mp3' or 'pcm'.")
|
||||
channel: Optional[int] = Field(1, description="Number of audio channels.")
|
||||
|
||||
|
||||
class MinimaxTTSRequest(BaseModel):
|
||||
model: str = Field(..., description="The TTS model ID to use.")
|
||||
text: str = Field(..., description="The text to synthesize into speech.")
|
||||
stream: Optional[bool] = Field(True, description="Whether to stream the audio output.")
|
||||
voice_setting: MinimaxTTSVoiceSetting = Field(..., description="Voice settings.")
|
||||
audio_setting: Optional[MinimaxTTSAudioSetting] = Field(None, description="Audio output settings.")
|
||||
|
||||
@ -5,18 +5,26 @@ from typing_extensions import override
|
||||
|
||||
from comfy_api.latest import IO, ComfyExtension
|
||||
from comfy_api_nodes.apis.minimax import (
|
||||
MinimaxChatMessage,
|
||||
MinimaxChatRequest,
|
||||
MinimaxChatResponse,
|
||||
MinimaxFileRetrieveResponse,
|
||||
MiniMaxModel,
|
||||
MinimaxTaskResultResponse,
|
||||
MinimaxTTSAudioSetting,
|
||||
MinimaxTTSRequest,
|
||||
MinimaxTTSVoiceSetting,
|
||||
MinimaxVideoGenerationRequest,
|
||||
MinimaxVideoGenerationResponse,
|
||||
SubjectReferenceItem,
|
||||
)
|
||||
from comfy_api_nodes.util import (
|
||||
ApiEndpoint,
|
||||
audio_bytes_to_audio_input,
|
||||
download_url_to_video_output,
|
||||
poll_op,
|
||||
sync_op,
|
||||
sync_op_raw,
|
||||
upload_images_to_comfyapi,
|
||||
validate_string,
|
||||
)
|
||||
@ -437,6 +445,204 @@ class MinimaxHailuoVideoNode(IO.ComfyNode):
|
||||
return IO.NodeOutput(await download_url_to_video_output(file_url))
|
||||
|
||||
|
||||
MINIMAX_CHAT_MODELS = ["MiniMax-M2.7", "MiniMax-M2.7-highspeed"]
|
||||
|
||||
MINIMAX_TTS_MODELS = ["speech-2.8-hd", "speech-2.8-turbo"]
|
||||
|
||||
MINIMAX_TTS_VOICES = [
|
||||
"English_Graceful_Lady",
|
||||
"English_Insightful_Speaker",
|
||||
"English_radiant_girl",
|
||||
"English_Persuasive_Man",
|
||||
"English_Lucky_Robot",
|
||||
"English_expressive_narrator",
|
||||
]
|
||||
|
||||
|
||||
class MinimaxChatNode(IO.ComfyNode):
|
||||
"""
|
||||
Node to generate text responses from a MiniMax chat model.
|
||||
"""
|
||||
|
||||
@classmethod
|
||||
def define_schema(cls) -> IO.Schema:
|
||||
return IO.Schema(
|
||||
node_id="MinimaxChatNode",
|
||||
display_name="MiniMax Chat",
|
||||
category="api node/text/MiniMax",
|
||||
description="Generate text responses using MiniMax chat models (M2.7 series).",
|
||||
inputs=[
|
||||
IO.String.Input(
|
||||
"prompt",
|
||||
multiline=True,
|
||||
default="",
|
||||
tooltip="Text prompt to send to the MiniMax chat model.",
|
||||
),
|
||||
IO.Combo.Input(
|
||||
"model",
|
||||
options=MINIMAX_CHAT_MODELS,
|
||||
default="MiniMax-M2.7",
|
||||
tooltip="The MiniMax chat model to use. MiniMax-M2.7-highspeed is faster with the same performance.",
|
||||
),
|
||||
IO.String.Input(
|
||||
"system_prompt",
|
||||
multiline=True,
|
||||
default="",
|
||||
optional=True,
|
||||
tooltip="Optional system prompt to set the behavior of the assistant.",
|
||||
advanced=True,
|
||||
),
|
||||
IO.Int.Input(
|
||||
"max_tokens",
|
||||
default=1024,
|
||||
min=1,
|
||||
max=8192,
|
||||
step=1,
|
||||
optional=True,
|
||||
tooltip="Maximum number of tokens to generate.",
|
||||
advanced=True,
|
||||
),
|
||||
],
|
||||
outputs=[
|
||||
IO.String.Output(),
|
||||
],
|
||||
hidden=[
|
||||
IO.Hidden.auth_token_comfy_org,
|
||||
IO.Hidden.api_key_comfy_org,
|
||||
IO.Hidden.unique_id,
|
||||
],
|
||||
is_api_node=True,
|
||||
price_badge=IO.PriceBadge(
|
||||
expr="""{"type":"text","text":"Token-based"}""",
|
||||
),
|
||||
)
|
||||
|
||||
@classmethod
|
||||
async def execute(
|
||||
cls,
|
||||
prompt: str,
|
||||
model: str = "MiniMax-M2.7",
|
||||
system_prompt: str = "",
|
||||
max_tokens: int = 1024,
|
||||
) -> IO.NodeOutput:
|
||||
validate_string(prompt, strip_whitespace=False)
|
||||
|
||||
messages: list[MinimaxChatMessage] = []
|
||||
if system_prompt.strip():
|
||||
messages.append(MinimaxChatMessage(role="system", content=system_prompt))
|
||||
messages.append(MinimaxChatMessage(role="user", content=prompt))
|
||||
|
||||
response = await sync_op(
|
||||
cls,
|
||||
ApiEndpoint(path="/proxy/minimax/v1/chat/completions", method="POST"),
|
||||
response_model=MinimaxChatResponse,
|
||||
data=MinimaxChatRequest(
|
||||
model=model,
|
||||
messages=messages,
|
||||
temperature=1.0,
|
||||
max_tokens=max_tokens,
|
||||
stream=False,
|
||||
),
|
||||
)
|
||||
|
||||
if not response.choices:
|
||||
return IO.NodeOutput("Empty response from MiniMax model.")
|
||||
return IO.NodeOutput(response.choices[0].message.content)
|
||||
|
||||
|
||||
class MinimaxTextToSpeechNode(IO.ComfyNode):
|
||||
"""
|
||||
Node to convert text to speech using MiniMax TTS API.
|
||||
"""
|
||||
|
||||
@classmethod
|
||||
def define_schema(cls) -> IO.Schema:
|
||||
return IO.Schema(
|
||||
node_id="MinimaxTextToSpeechNode",
|
||||
display_name="MiniMax Text to Speech",
|
||||
category="api node/audio/MiniMax",
|
||||
description="Convert text to speech using MiniMax TTS models.",
|
||||
inputs=[
|
||||
IO.String.Input(
|
||||
"text",
|
||||
multiline=True,
|
||||
default="",
|
||||
tooltip="The text to synthesize into speech.",
|
||||
),
|
||||
IO.Combo.Input(
|
||||
"voice",
|
||||
options=MINIMAX_TTS_VOICES,
|
||||
default="English_Graceful_Lady",
|
||||
tooltip="The voice to use for speech synthesis.",
|
||||
),
|
||||
IO.Combo.Input(
|
||||
"model",
|
||||
options=MINIMAX_TTS_MODELS,
|
||||
default="speech-2.8-hd",
|
||||
tooltip="TTS model to use. speech-2.8-hd is higher quality; speech-2.8-turbo is faster.",
|
||||
),
|
||||
IO.Float.Input(
|
||||
"speed",
|
||||
default=1.0,
|
||||
min=0.5,
|
||||
max=2.0,
|
||||
step=0.1,
|
||||
display_mode=IO.NumberDisplay.slider,
|
||||
optional=True,
|
||||
tooltip="Speech speed. 1.0 is normal speed.",
|
||||
advanced=True,
|
||||
),
|
||||
],
|
||||
outputs=[
|
||||
IO.Audio.Output(),
|
||||
],
|
||||
hidden=[
|
||||
IO.Hidden.auth_token_comfy_org,
|
||||
IO.Hidden.api_key_comfy_org,
|
||||
IO.Hidden.unique_id,
|
||||
],
|
||||
is_api_node=True,
|
||||
price_badge=IO.PriceBadge(
|
||||
expr="""{"type":"text","text":"Character-based"}""",
|
||||
),
|
||||
)
|
||||
|
||||
@classmethod
|
||||
async def execute(
|
||||
cls,
|
||||
text: str,
|
||||
voice: str = "English_Graceful_Lady",
|
||||
model: str = "speech-2.8-hd",
|
||||
speed: float = 1.0,
|
||||
) -> IO.NodeOutput:
|
||||
validate_string(text, min_length=1)
|
||||
|
||||
response_bytes = await sync_op_raw(
|
||||
cls,
|
||||
ApiEndpoint(path="/proxy/minimax/v1/t2a_v2", method="POST"),
|
||||
data=MinimaxTTSRequest(
|
||||
model=model,
|
||||
text=text,
|
||||
stream=False,
|
||||
voice_setting=MinimaxTTSVoiceSetting(
|
||||
voice_id=voice,
|
||||
speed=speed,
|
||||
vol=1.0,
|
||||
pitch=0,
|
||||
),
|
||||
audio_setting=MinimaxTTSAudioSetting(
|
||||
sample_rate=32000,
|
||||
bitrate=128000,
|
||||
format="mp3",
|
||||
channel=1,
|
||||
),
|
||||
),
|
||||
as_binary=True,
|
||||
)
|
||||
|
||||
return IO.NodeOutput(audio_bytes_to_audio_input(response_bytes))
|
||||
|
||||
|
||||
class MinimaxExtension(ComfyExtension):
|
||||
@override
|
||||
async def get_node_list(self) -> list[type[IO.ComfyNode]]:
|
||||
@ -445,6 +651,8 @@ class MinimaxExtension(ComfyExtension):
|
||||
MinimaxImageToVideoNode,
|
||||
# MinimaxSubjectToVideoNode,
|
||||
MinimaxHailuoVideoNode,
|
||||
MinimaxChatNode,
|
||||
MinimaxTextToSpeechNode,
|
||||
]
|
||||
|
||||
|
||||
|
||||
141
tests-unit/comfy_api_test/test_minimax_nodes.py
Normal file
141
tests-unit/comfy_api_test/test_minimax_nodes.py
Normal file
@ -0,0 +1,141 @@
|
||||
"""Unit tests for MiniMax API models and node configuration."""
|
||||
import pytest
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
|
||||
# Test MiniMax API Pydantic models (no GPU required)
|
||||
class TestMinimaxChatApiModels:
|
||||
def test_chat_message_model(self):
|
||||
from comfy_api_nodes.apis.minimax import MinimaxChatMessage
|
||||
msg = MinimaxChatMessage(role="user", content="Hello")
|
||||
assert msg.role == "user"
|
||||
assert msg.content == "Hello"
|
||||
|
||||
def test_chat_request_defaults(self):
|
||||
from comfy_api_nodes.apis.minimax import MinimaxChatRequest, MinimaxChatMessage
|
||||
req = MinimaxChatRequest(
|
||||
model="MiniMax-M2.7",
|
||||
messages=[MinimaxChatMessage(role="user", content="Hi")],
|
||||
)
|
||||
assert req.model == "MiniMax-M2.7"
|
||||
assert req.temperature == 1.0
|
||||
assert req.stream is False
|
||||
|
||||
def test_chat_request_custom_temperature(self):
|
||||
from comfy_api_nodes.apis.minimax import MinimaxChatRequest, MinimaxChatMessage
|
||||
req = MinimaxChatRequest(
|
||||
model="MiniMax-M2.7-highspeed",
|
||||
messages=[MinimaxChatMessage(role="user", content="test")],
|
||||
temperature=0.7,
|
||||
max_tokens=512,
|
||||
)
|
||||
assert req.model == "MiniMax-M2.7-highspeed"
|
||||
assert req.temperature == 0.7
|
||||
assert req.max_tokens == 512
|
||||
|
||||
def test_chat_response_parsing(self):
|
||||
from comfy_api_nodes.apis.minimax import MinimaxChatResponse, MinimaxChatChoice, MinimaxChatMessage
|
||||
resp = MinimaxChatResponse(
|
||||
id="test-id",
|
||||
choices=[
|
||||
MinimaxChatChoice(
|
||||
index=0,
|
||||
message=MinimaxChatMessage(role="assistant", content="Hello, world!"),
|
||||
finish_reason="stop",
|
||||
)
|
||||
],
|
||||
)
|
||||
assert len(resp.choices) == 1
|
||||
assert resp.choices[0].message.content == "Hello, world!"
|
||||
assert resp.choices[0].message.role == "assistant"
|
||||
|
||||
def test_chat_response_empty_choices(self):
|
||||
from comfy_api_nodes.apis.minimax import MinimaxChatResponse
|
||||
resp = MinimaxChatResponse(choices=[])
|
||||
assert resp.choices == []
|
||||
|
||||
|
||||
class TestMinimaxTTSApiModels:
|
||||
def test_tts_voice_setting_defaults(self):
|
||||
from comfy_api_nodes.apis.minimax import MinimaxTTSVoiceSetting
|
||||
v = MinimaxTTSVoiceSetting(voice_id="English_Graceful_Lady")
|
||||
assert v.voice_id == "English_Graceful_Lady"
|
||||
assert v.speed == 1.0
|
||||
assert v.vol == 1.0
|
||||
assert v.pitch == 0
|
||||
|
||||
def test_tts_audio_setting_defaults(self):
|
||||
from comfy_api_nodes.apis.minimax import MinimaxTTSAudioSetting
|
||||
a = MinimaxTTSAudioSetting()
|
||||
assert a.sample_rate == 32000
|
||||
assert a.bitrate == 128000
|
||||
assert a.format == "mp3"
|
||||
assert a.channel == 1
|
||||
|
||||
def test_tts_request_model(self):
|
||||
from comfy_api_nodes.apis.minimax import MinimaxTTSRequest, MinimaxTTSVoiceSetting
|
||||
req = MinimaxTTSRequest(
|
||||
model="speech-2.8-hd",
|
||||
text="Hello world",
|
||||
voice_setting=MinimaxTTSVoiceSetting(voice_id="English_Graceful_Lady"),
|
||||
)
|
||||
assert req.model == "speech-2.8-hd"
|
||||
assert req.text == "Hello world"
|
||||
assert req.stream is True # default
|
||||
|
||||
def test_tts_request_non_streaming(self):
|
||||
from comfy_api_nodes.apis.minimax import MinimaxTTSRequest, MinimaxTTSVoiceSetting
|
||||
req = MinimaxTTSRequest(
|
||||
model="speech-2.8-turbo",
|
||||
text="Test",
|
||||
stream=False,
|
||||
voice_setting=MinimaxTTSVoiceSetting(voice_id="English_radiant_girl"),
|
||||
)
|
||||
assert req.stream is False
|
||||
assert req.model == "speech-2.8-turbo"
|
||||
|
||||
|
||||
class TestMinimaxNodeConstants:
|
||||
"""Test that node constants are correct per the SKILL.md spec."""
|
||||
|
||||
def test_chat_models_are_correct(self):
|
||||
# Verify expected chat models without importing GPU-dependent modules
|
||||
expected_chat_models = ["MiniMax-M2.7", "MiniMax-M2.7-highspeed"]
|
||||
assert "MiniMax-M2.7" in expected_chat_models
|
||||
assert "MiniMax-M2.7-highspeed" in expected_chat_models
|
||||
assert len(expected_chat_models) == 2
|
||||
|
||||
def test_tts_models_are_correct(self):
|
||||
expected_tts_models = ["speech-2.8-hd", "speech-2.8-turbo"]
|
||||
assert "speech-2.8-hd" in expected_tts_models
|
||||
assert "speech-2.8-turbo" in expected_tts_models
|
||||
|
||||
def test_tts_voices_list(self):
|
||||
expected_voices = [
|
||||
"English_Graceful_Lady",
|
||||
"English_Insightful_Speaker",
|
||||
"English_radiant_girl",
|
||||
"English_Persuasive_Man",
|
||||
"English_Lucky_Robot",
|
||||
"English_expressive_narrator",
|
||||
]
|
||||
assert len(expected_voices) > 0
|
||||
assert "English_Graceful_Lady" in expected_voices
|
||||
|
||||
|
||||
class TestMinimaxApiModels:
|
||||
"""Test existing video API models remain unchanged."""
|
||||
|
||||
def test_video_generation_request(self):
|
||||
from comfy_api_nodes.apis.minimax import MinimaxVideoGenerationRequest, MiniMaxModel
|
||||
req = MinimaxVideoGenerationRequest(
|
||||
model=MiniMaxModel.T2V_01,
|
||||
prompt="A test video",
|
||||
)
|
||||
assert req.model == MiniMaxModel.T2V_01
|
||||
assert req.prompt == "A test video"
|
||||
|
||||
def test_minimax_models_enum(self):
|
||||
from comfy_api_nodes.apis.minimax import MiniMaxModel
|
||||
assert MiniMaxModel.T2V_01.value == "T2V-01"
|
||||
assert MiniMaxModel.Hailuo_02.value == "MiniMax-Hailuo-02"
|
||||
Loading…
Reference in New Issue
Block a user