from typing import Optional import torch from typing_extensions import override from comfy_api.latest import IO, ComfyExtension from comfy_api_nodes.apis.minimax import ( MinimaxChatMessage, MinimaxChatRequest, MinimaxChatResponse, MinimaxFileRetrieveResponse, MiniMaxModel, MinimaxTaskResultResponse, MinimaxTTSAudioSetting, MinimaxTTSRequest, MinimaxTTSVoiceSetting, MinimaxVideoGenerationRequest, MinimaxVideoGenerationResponse, SubjectReferenceItem, ) from comfy_api_nodes.util import ( ApiEndpoint, audio_bytes_to_audio_input, download_url_to_video_output, poll_op, sync_op, sync_op_raw, upload_images_to_comfyapi, validate_string, ) I2V_AVERAGE_DURATION = 114 T2V_AVERAGE_DURATION = 234 async def _generate_mm_video( cls: type[IO.ComfyNode], *, prompt_text: str, seed: int, model: str, image: Optional[torch.Tensor] = None, # used for ImageToVideo subject: Optional[torch.Tensor] = None, # used for SubjectToVideo average_duration: Optional[int] = None, ) -> IO.NodeOutput: if image is None: validate_string(prompt_text, field_name="prompt_text") image_url = None if image is not None: image_url = (await upload_images_to_comfyapi(cls, image, max_images=1))[0] # TODO: figure out how to deal with subject properly, API returns invalid params when using S2V-01 model subject_reference = None if subject is not None: subject_url = (await upload_images_to_comfyapi(cls, subject, max_images=1))[0] subject_reference = [SubjectReferenceItem(image=subject_url)] response = await sync_op( cls, ApiEndpoint(path="/proxy/minimax/video_generation", method="POST"), response_model=MinimaxVideoGenerationResponse, data=MinimaxVideoGenerationRequest( model=MiniMaxModel(model), prompt=prompt_text, callback_url=None, first_frame_image=image_url, subject_reference=subject_reference, prompt_optimizer=None, ), ) task_id = response.task_id if not task_id: raise Exception(f"MiniMax generation failed: {response.base_resp}") task_result = await poll_op( cls, ApiEndpoint(path="/proxy/minimax/query/video_generation", query_params={"task_id": task_id}), response_model=MinimaxTaskResultResponse, status_extractor=lambda x: x.status.value, estimated_duration=average_duration, ) file_id = task_result.file_id if file_id is None: raise Exception("Request was not successful. Missing file ID.") file_result = await sync_op( cls, ApiEndpoint(path="/proxy/minimax/files/retrieve", query_params={"file_id": int(file_id)}), response_model=MinimaxFileRetrieveResponse, ) file_url = file_result.file.download_url if file_url is None: raise Exception(f"No video was found in the response. Full response: {file_result.model_dump()}") if file_result.file.backup_download_url: try: return IO.NodeOutput(await download_url_to_video_output(file_url, timeout=10, max_retries=2)) except Exception: # if we have a second URL to retrieve the result, try again using that one return IO.NodeOutput( await download_url_to_video_output(file_result.file.backup_download_url, max_retries=3) ) return IO.NodeOutput(await download_url_to_video_output(file_url)) class MinimaxTextToVideoNode(IO.ComfyNode): @classmethod def define_schema(cls) -> IO.Schema: return IO.Schema( node_id="MinimaxTextToVideoNode", display_name="MiniMax Text to Video", category="api node/video/MiniMax", description="Generates videos synchronously based on a prompt, and optional parameters.", inputs=[ IO.String.Input( "prompt_text", multiline=True, default="", tooltip="Text prompt to guide the video generation", ), IO.Combo.Input( "model", options=["T2V-01", "T2V-01-Director"], default="T2V-01", tooltip="Model to use for video generation", ), IO.Int.Input( "seed", default=0, min=0, max=0xFFFFFFFFFFFFFFFF, step=1, control_after_generate=True, tooltip="The random seed used for creating the noise.", optional=True, ), ], outputs=[IO.Video.Output()], hidden=[ IO.Hidden.auth_token_comfy_org, IO.Hidden.api_key_comfy_org, IO.Hidden.unique_id, ], is_api_node=True, price_badge=IO.PriceBadge( expr="""{"type":"usd","usd":0.43}""", ), ) @classmethod async def execute( cls, prompt_text: str, model: str = "T2V-01", seed: int = 0, ) -> IO.NodeOutput: return await _generate_mm_video( cls, prompt_text=prompt_text, seed=seed, model=model, image=None, subject=None, average_duration=T2V_AVERAGE_DURATION, ) class MinimaxImageToVideoNode(IO.ComfyNode): @classmethod def define_schema(cls) -> IO.Schema: return IO.Schema( node_id="MinimaxImageToVideoNode", display_name="MiniMax Image to Video", category="api node/video/MiniMax", description="Generates videos synchronously based on an image and prompt, and optional parameters.", inputs=[ IO.Image.Input( "image", tooltip="Image to use as first frame of video generation", ), IO.String.Input( "prompt_text", multiline=True, default="", tooltip="Text prompt to guide the video generation", ), IO.Combo.Input( "model", options=["I2V-01-Director", "I2V-01", "I2V-01-live"], default="I2V-01", tooltip="Model to use for video generation", ), IO.Int.Input( "seed", default=0, min=0, max=0xFFFFFFFFFFFFFFFF, step=1, control_after_generate=True, tooltip="The random seed used for creating the noise.", optional=True, ), ], outputs=[IO.Video.Output()], hidden=[ IO.Hidden.auth_token_comfy_org, IO.Hidden.api_key_comfy_org, IO.Hidden.unique_id, ], is_api_node=True, price_badge=IO.PriceBadge( expr="""{"type":"usd","usd":0.43}""", ), ) @classmethod async def execute( cls, image: torch.Tensor, prompt_text: str, model: str = "I2V-01", seed: int = 0, ) -> IO.NodeOutput: return await _generate_mm_video( cls, prompt_text=prompt_text, seed=seed, model=model, image=image, subject=None, average_duration=I2V_AVERAGE_DURATION, ) class MinimaxSubjectToVideoNode(IO.ComfyNode): @classmethod def define_schema(cls) -> IO.Schema: return IO.Schema( node_id="MinimaxSubjectToVideoNode", display_name="MiniMax Subject to Video", category="api node/video/MiniMax", description="Generates videos synchronously based on an image and prompt, and optional parameters.", inputs=[ IO.Image.Input( "subject", tooltip="Image of subject to reference for video generation", ), IO.String.Input( "prompt_text", multiline=True, default="", tooltip="Text prompt to guide the video generation", ), IO.Combo.Input( "model", options=["S2V-01"], default="S2V-01", tooltip="Model to use for video generation", ), IO.Int.Input( "seed", default=0, min=0, max=0xFFFFFFFFFFFFFFFF, step=1, control_after_generate=True, tooltip="The random seed used for creating the noise.", optional=True, ), ], outputs=[IO.Video.Output()], hidden=[ IO.Hidden.auth_token_comfy_org, IO.Hidden.api_key_comfy_org, IO.Hidden.unique_id, ], is_api_node=True, ) @classmethod async def execute( cls, subject: torch.Tensor, prompt_text: str, model: str = "S2V-01", seed: int = 0, ) -> IO.NodeOutput: return await _generate_mm_video( cls, prompt_text=prompt_text, seed=seed, model=model, image=None, subject=subject, average_duration=T2V_AVERAGE_DURATION, ) class MinimaxHailuoVideoNode(IO.ComfyNode): @classmethod def define_schema(cls) -> IO.Schema: return IO.Schema( node_id="MinimaxHailuoVideoNode", display_name="MiniMax Hailuo Video", category="api node/video/MiniMax", description="Generates videos from prompt, with optional start frame using the new MiniMax Hailuo-02 model.", inputs=[ IO.String.Input( "prompt_text", multiline=True, default="", tooltip="Text prompt to guide the video generation.", ), IO.Int.Input( "seed", default=0, min=0, max=0xFFFFFFFFFFFFFFFF, step=1, control_after_generate=True, tooltip="The random seed used for creating the noise.", optional=True, ), IO.Image.Input( "first_frame_image", tooltip="Optional image to use as the first frame to generate a video.", optional=True, ), IO.Boolean.Input( "prompt_optimizer", default=True, tooltip="Optimize prompt to improve generation quality when needed.", optional=True, ), IO.Combo.Input( "duration", options=[6, 10], default=6, tooltip="The length of the output video in seconds.", optional=True, ), IO.Combo.Input( "resolution", options=["768P", "1080P"], default="768P", tooltip="The dimensions of the video display. 1080p is 1920x1080, 768p is 1366x768.", optional=True, ), ], outputs=[IO.Video.Output()], hidden=[ IO.Hidden.auth_token_comfy_org, IO.Hidden.api_key_comfy_org, IO.Hidden.unique_id, ], is_api_node=True, price_badge=IO.PriceBadge( depends_on=IO.PriceBadgeDepends(widgets=["resolution", "duration"]), expr=""" ( $prices := { "768p": {"6": 0.28, "10": 0.56}, "1080p": {"6": 0.49} }; $resPrices := $lookup($prices, $lowercase(widgets.resolution)); $price := $lookup($resPrices, $string(widgets.duration)); {"type":"usd","usd": $price ? $price : 0.43} ) """, ), ) @classmethod async def execute( cls, prompt_text: str, seed: int = 0, first_frame_image: Optional[torch.Tensor] = None, # used for ImageToVideo prompt_optimizer: bool = True, duration: int = 6, resolution: str = "768P", model: str = "MiniMax-Hailuo-02", ) -> IO.NodeOutput: if first_frame_image is None: validate_string(prompt_text, field_name="prompt_text") if model == "MiniMax-Hailuo-02" and resolution.upper() == "1080P" and duration != 6: raise Exception( "When model is MiniMax-Hailuo-02 and resolution is 1080P, duration is limited to 6 seconds." ) # upload image, if passed in image_url = None if first_frame_image is not None: image_url = (await upload_images_to_comfyapi(cls, first_frame_image, max_images=1))[0] response = await sync_op( cls, ApiEndpoint(path="/proxy/minimax/video_generation", method="POST"), response_model=MinimaxVideoGenerationResponse, data=MinimaxVideoGenerationRequest( model=MiniMaxModel(model), prompt=prompt_text, callback_url=None, first_frame_image=image_url, prompt_optimizer=prompt_optimizer, duration=duration, resolution=resolution, ), ) task_id = response.task_id if not task_id: raise Exception(f"MiniMax generation failed: {response.base_resp}") average_duration = 120 if resolution == "768P" else 240 task_result = await poll_op( cls, ApiEndpoint(path="/proxy/minimax/query/video_generation", query_params={"task_id": task_id}), response_model=MinimaxTaskResultResponse, status_extractor=lambda x: x.status.value, estimated_duration=average_duration, ) file_id = task_result.file_id if file_id is None: raise Exception("Request was not successful. Missing file ID.") file_result = await sync_op( cls, ApiEndpoint(path="/proxy/minimax/files/retrieve", query_params={"file_id": int(file_id)}), response_model=MinimaxFileRetrieveResponse, ) file_url = file_result.file.download_url if file_url is None: raise Exception(f"No video was found in the response. Full response: {file_result.model_dump()}") if file_result.file.backup_download_url: try: return IO.NodeOutput(await download_url_to_video_output(file_url, timeout=10, max_retries=2)) except Exception: # if we have a second URL to retrieve the result, try again using that one return IO.NodeOutput( await download_url_to_video_output(file_result.file.backup_download_url, max_retries=3) ) return IO.NodeOutput(await download_url_to_video_output(file_url)) MINIMAX_CHAT_MODELS = ["MiniMax-M2.7", "MiniMax-M2.7-highspeed"] MINIMAX_TTS_MODELS = ["speech-2.8-hd", "speech-2.8-turbo"] MINIMAX_TTS_VOICES = [ "English_Graceful_Lady", "English_Insightful_Speaker", "English_radiant_girl", "English_Persuasive_Man", "English_Lucky_Robot", "English_expressive_narrator", ] class MinimaxChatNode(IO.ComfyNode): """ Node to generate text responses from a MiniMax chat model. """ @classmethod def define_schema(cls) -> IO.Schema: return IO.Schema( node_id="MinimaxChatNode", display_name="MiniMax Chat", category="api node/text/MiniMax", description="Generate text responses using MiniMax chat models (M2.7 series).", inputs=[ IO.String.Input( "prompt", multiline=True, default="", tooltip="Text prompt to send to the MiniMax chat model.", ), IO.Combo.Input( "model", options=MINIMAX_CHAT_MODELS, default="MiniMax-M2.7", tooltip="The MiniMax chat model to use. MiniMax-M2.7-highspeed is faster with the same performance.", ), IO.String.Input( "system_prompt", multiline=True, default="", optional=True, tooltip="Optional system prompt to set the behavior of the assistant.", advanced=True, ), IO.Int.Input( "max_tokens", default=1024, min=1, max=8192, step=1, optional=True, tooltip="Maximum number of tokens to generate.", advanced=True, ), ], outputs=[ IO.String.Output(), ], hidden=[ IO.Hidden.auth_token_comfy_org, IO.Hidden.api_key_comfy_org, IO.Hidden.unique_id, ], is_api_node=True, price_badge=IO.PriceBadge( expr="""{"type":"text","text":"Token-based"}""", ), ) @classmethod async def execute( cls, prompt: str, model: str = "MiniMax-M2.7", system_prompt: str = "", max_tokens: int = 1024, ) -> IO.NodeOutput: validate_string(prompt, strip_whitespace=False) messages: list[MinimaxChatMessage] = [] if system_prompt.strip(): messages.append(MinimaxChatMessage(role="system", content=system_prompt)) messages.append(MinimaxChatMessage(role="user", content=prompt)) response = await sync_op( cls, ApiEndpoint(path="/proxy/minimax/v1/chat/completions", method="POST"), response_model=MinimaxChatResponse, data=MinimaxChatRequest( model=model, messages=messages, temperature=1.0, max_tokens=max_tokens, stream=False, ), ) if not response.choices: return IO.NodeOutput("Empty response from MiniMax model.") return IO.NodeOutput(response.choices[0].message.content) class MinimaxTextToSpeechNode(IO.ComfyNode): """ Node to convert text to speech using MiniMax TTS API. """ @classmethod def define_schema(cls) -> IO.Schema: return IO.Schema( node_id="MinimaxTextToSpeechNode", display_name="MiniMax Text to Speech", category="api node/audio/MiniMax", description="Convert text to speech using MiniMax TTS models.", inputs=[ IO.String.Input( "text", multiline=True, default="", tooltip="The text to synthesize into speech.", ), IO.Combo.Input( "voice", options=MINIMAX_TTS_VOICES, default="English_Graceful_Lady", tooltip="The voice to use for speech synthesis.", ), IO.Combo.Input( "model", options=MINIMAX_TTS_MODELS, default="speech-2.8-hd", tooltip="TTS model to use. speech-2.8-hd is higher quality; speech-2.8-turbo is faster.", ), IO.Float.Input( "speed", default=1.0, min=0.5, max=2.0, step=0.1, display_mode=IO.NumberDisplay.slider, optional=True, tooltip="Speech speed. 1.0 is normal speed.", advanced=True, ), ], outputs=[ IO.Audio.Output(), ], hidden=[ IO.Hidden.auth_token_comfy_org, IO.Hidden.api_key_comfy_org, IO.Hidden.unique_id, ], is_api_node=True, price_badge=IO.PriceBadge( expr="""{"type":"text","text":"Character-based"}""", ), ) @classmethod async def execute( cls, text: str, voice: str = "English_Graceful_Lady", model: str = "speech-2.8-hd", speed: float = 1.0, ) -> IO.NodeOutput: validate_string(text, min_length=1) response_bytes = await sync_op_raw( cls, ApiEndpoint(path="/proxy/minimax/v1/t2a_v2", method="POST"), data=MinimaxTTSRequest( model=model, text=text, stream=False, voice_setting=MinimaxTTSVoiceSetting( voice_id=voice, speed=speed, vol=1.0, pitch=0, ), audio_setting=MinimaxTTSAudioSetting( sample_rate=32000, bitrate=128000, format="mp3", channel=1, ), ), as_binary=True, ) return IO.NodeOutput(audio_bytes_to_audio_input(response_bytes)) class MinimaxExtension(ComfyExtension): @override async def get_node_list(self) -> list[type[IO.ComfyNode]]: return [ MinimaxTextToVideoNode, MinimaxImageToVideoNode, # MinimaxSubjectToVideoNode, MinimaxHailuoVideoNode, MinimaxChatNode, MinimaxTextToSpeechNode, ] async def comfy_entrypoint() -> MinimaxExtension: return MinimaxExtension()