diff --git a/comfy_api/latest/_input_impl/video_types.py b/comfy_api/latest/_input_impl/video_types.py index 1b4993aa7..5e2e2b1af 100644 --- a/comfy_api/latest/_input_impl/video_types.py +++ b/comfy_api/latest/_input_impl/video_types.py @@ -8,11 +8,14 @@ import av import io import itertools import json +import logging import numpy as np import math import torch from .._util import VideoContainer, VideoCodec, VideoComponents +logger = logging.getLogger(__name__) + def container_to_output_format(container_format: str | None) -> str | None: """ @@ -402,6 +405,16 @@ class VideoFromComponents(VideoInput): metadata: Optional[dict] = None, ): """Save the video to a file path or BytesIO buffer.""" + def mux_packets(container: av.OutputContainer, packets): + if packets is None: + return + if isinstance(packets, (list, tuple)): + for packet in packets: + if packet is not None: + container.mux(packet) + return + container.mux(packets) + if format != VideoContainer.AUTO and format != VideoContainer.MP4: raise ValueError("Only MP4 format is supported for now") if codec != VideoCodec.AUTO and codec != VideoCodec.H264: @@ -433,6 +446,8 @@ class VideoFromComponents(VideoInput): audio_sample_rate = int(self.__components.audio['sample_rate']) waveform = self.__components.audio['waveform'] waveform = waveform[0, :, :math.ceil((audio_sample_rate / frame_rate) * self.__components.images.shape[0])] + # Guard ffmpeg encoder against invalid upstream audio (NaN/Inf/out-of-range). + waveform = torch.nan_to_num(waveform, nan=0.0, posinf=0.0, neginf=0.0).clamp(-1.0, 1.0) layout = {1: 'mono', 2: 'stereo', 6: '5.1'}.get(waveform.shape[0], 'stereo') audio_stream = output.add_stream('aac', rate=audio_sample_rate, layout=layout) @@ -449,13 +464,22 @@ class VideoFromComponents(VideoInput): output.mux(packet) if audio_stream and self.__components.audio: - frame = av.AudioFrame.from_ndarray(waveform.float().cpu().contiguous().numpy(), format='fltp', layout=layout) - frame.sample_rate = audio_sample_rate - frame.pts = 0 - output.mux(audio_stream.encode(frame)) + try: + audio_np = waveform.float().cpu().contiguous().numpy() + if not np.isfinite(audio_np).all(): + audio_np = np.nan_to_num(audio_np, nan=0.0, posinf=0.0, neginf=0.0) - # Flush encoder - output.mux(audio_stream.encode(None)) + frame = av.AudioFrame.from_ndarray(audio_np, format='fltp', layout=layout) + frame.sample_rate = audio_sample_rate + frame.pts = 0 + mux_packets(output, audio_stream.encode(frame)) + + # Flush encoder + mux_packets(output, audio_stream.encode(None)) + except Exception as exc: + logger.warning( + "Failed to encode audio track, saving video-only output: %s", exc + ) def as_trimmed( self,