diff --git a/comfy_api/latest/_input/video_types.py b/comfy_api/latest/_input/video_types.py index 451e9526e..8fff52c16 100644 --- a/comfy_api/latest/_input/video_types.py +++ b/comfy_api/latest/_input/video_types.py @@ -65,6 +65,12 @@ class VideoInput(ABC): buffer.seek(0) return buffer + def get_active_trim_window(self) -> tuple[float, float]: + """Return the active trim as ``(start_time, duration)`` in seconds (start_time normalized + to ``>= 0``; ``duration == 0`` means "until the end"). Default: no trim; trimmable subclasses override. + """ + return 0.0, 0.0 + # Provide a default implementation, but subclasses can provide optimized versions # if possible. def get_dimensions(self) -> tuple[int, int]: diff --git a/comfy_api/latest/_input_impl/video_types.py b/comfy_api/latest/_input_impl/video_types.py index 99e67d363..4a12ff9c1 100644 --- a/comfy_api/latest/_input_impl/video_types.py +++ b/comfy_api/latest/_input_impl/video_types.py @@ -75,6 +75,12 @@ class VideoFromFile(VideoInput): self.__file.seek(0) return self.__file + def get_active_trim_window(self) -> tuple[float, float]: + start_time = self.__start_time + if start_time < 0: + start_time = max(self._get_raw_duration() + start_time, 0.0) + return float(start_time), float(self.__duration) + def get_dimensions(self) -> tuple[int, int]: """ Returns the dimensions of the video input. diff --git a/comfy_api_nodes/util/conversions.py b/comfy_api_nodes/util/conversions.py index 5738df57f..a1b5d599c 100644 --- a/comfy_api_nodes/util/conversions.py +++ b/comfy_api_nodes/util/conversions.py @@ -469,6 +469,11 @@ def _apply_video_scale(video: Input.Video, scale_dims: tuple[int, int]) -> Input input_container = None output_container = None + # get_stream_source() is untrimmed, so apply the trim window in this same pass. + # start_time is normalized (>= 0); duration == 0 means "until the end". + start_time, duration = video.get_active_trim_window() + trimming = bool(start_time or duration) + try: input_source = video.get_stream_source() input_container = av.open(input_source, mode="r") @@ -487,16 +492,45 @@ def _apply_video_scale(video: Input.Video, scale_dims: tuple[int, int]) -> Input audio_stream.layout = stream.layout break + in_video = input_container.streams.video[0] + start_pts = int(start_time / in_video.time_base) if trimming else 0 + end_pts = int((start_time + duration) / in_video.time_base) if duration else None + if start_pts: + input_container.seek(start_pts, stream=in_video) + + encoded = 0 for frame in input_container.decode(video=0): + if trimming: + if frame.pts is None or frame.pts < start_pts: + continue + if end_pts is not None and frame.pts >= end_pts: + break frame = frame.reformat(width=out_w, height=out_h, format="yuv420p") + # Re-wrap as a fresh frame: dropping irregular source timestamps (VFR/AVI/GIF/...) + # lets the encoder assign clean ones and avoids mp4 muxer errors. + frame = av.VideoFrame.from_ndarray(frame.to_ndarray(format="yuv420p"), format="yuv420p") for packet in video_stream.encode(frame): output_container.mux(packet) + encoded += 1 for packet in video_stream.encode(): output_container.mux(packet) + if encoded == 0: + raise ValueError( + f"resize produced no frames (start_time={start_time}, duration={duration} " + "selected nothing from the source)" + ) + if audio_stream is not None: input_container.seek(0) for audio_frame in input_container.decode(audio=0): + if trimming: + if audio_frame.time is None or audio_frame.time < start_time: + continue + if duration and audio_frame.time > start_time + duration: + break + # Carry odd audio time bases the mp4 muxer rejects; reset pts, encoder assigns clean ones (MP3-in-AVI) + audio_frame.pts = None for packet in audio_stream.encode(audio_frame): output_container.mux(packet) for packet in audio_stream.encode():