This commit is contained in:
AustinMroz 2026-02-02 16:23:46 +03:00 committed by GitHub
commit 3d42682daa
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 202 additions and 56 deletions

View File

@ -34,6 +34,21 @@ class VideoInput(ABC):
""" """
pass pass
@abstractmethod
def as_trimmed(
self,
start_time: float | None = None,
duration: float | None = None,
strict_duration: bool = False,
) -> VideoInput | None:
"""
Create a new VideoInput which is trimmed to have the corresponding start_time and duration
Returns:
A new VideoInput, or None if the result would have negative duration
"""
pass
def get_stream_source(self) -> Union[str, io.BytesIO]: def get_stream_source(self) -> Union[str, io.BytesIO]:
""" """
Get a streamable source for the video. This allows processing without Get a streamable source for the video. This allows processing without

View File

@ -6,6 +6,7 @@ from typing import Optional
from .._input import AudioInput, VideoInput from .._input import AudioInput, VideoInput
import av import av
import io import io
import itertools
import json import json
import numpy as np import numpy as np
import math import math
@ -29,7 +30,6 @@ def container_to_output_format(container_format: str | None) -> str | None:
formats = container_format.split(",") formats = container_format.split(",")
return formats[0] return formats[0]
def get_open_write_kwargs( def get_open_write_kwargs(
dest: str | io.BytesIO, container_format: str, to_format: str | None dest: str | io.BytesIO, container_format: str, to_format: str | None
) -> dict: ) -> dict:
@ -57,12 +57,14 @@ class VideoFromFile(VideoInput):
Class representing video input from a file. Class representing video input from a file.
""" """
def __init__(self, file: str | io.BytesIO): def __init__(self, file: str | io.BytesIO, *, start_time: float=0, duration: float=0):
""" """
Initialize the VideoFromFile object based off of either a path on disk or a BytesIO object Initialize the VideoFromFile object based off of either a path on disk or a BytesIO object
containing the file contents. containing the file contents.
""" """
self.__file = file self.__file = file
self.__start_time = start_time
self.__duration = duration
def get_stream_source(self) -> str | io.BytesIO: def get_stream_source(self) -> str | io.BytesIO:
""" """
@ -96,6 +98,16 @@ class VideoFromFile(VideoInput):
Returns: Returns:
Duration in seconds Duration in seconds
""" """
raw_duration = self._get_raw_duration()
if self.__start_time < 0:
duration_from_start = min(raw_duration, -self.__start_time)
else:
duration_from_start = raw_duration - self.__start_time
if self.__duration:
return min(self.__duration, duration_from_start)
return duration_from_start
def _get_raw_duration(self) -> float:
if isinstance(self.__file, io.BytesIO): if isinstance(self.__file, io.BytesIO):
self.__file.seek(0) self.__file.seek(0)
with av.open(self.__file, mode="r") as container: with av.open(self.__file, mode="r") as container:
@ -113,9 +125,13 @@ class VideoFromFile(VideoInput):
if video_stream and video_stream.average_rate: if video_stream and video_stream.average_rate:
frame_count = 0 frame_count = 0
container.seek(0) container.seek(0)
for packet in container.demux(video_stream): frame_iterator = (
for _ in packet.decode(): container.decode(video_stream)
frame_count += 1 if video_stream.codec.capabilities & 0x100
else container.demux(video_stream)
)
for packet in frame_iterator:
frame_count += 1
if frame_count > 0: if frame_count > 0:
return float(frame_count / video_stream.average_rate) return float(frame_count / video_stream.average_rate)
@ -131,36 +147,54 @@ class VideoFromFile(VideoInput):
with av.open(self.__file, mode="r") as container: with av.open(self.__file, mode="r") as container:
video_stream = self._get_first_video_stream(container) video_stream = self._get_first_video_stream(container)
# 1. Prefer the frames field if available # 1. Prefer the frames field if available and usable
if video_stream.frames and video_stream.frames > 0: if (
video_stream.frames
and video_stream.frames > 0
and not self.__start_time
and not self.__duration
):
return int(video_stream.frames) return int(video_stream.frames)
# 2. Try to estimate from duration and average_rate using only metadata # 2. Try to estimate from duration and average_rate using only metadata
if container.duration is not None and video_stream.average_rate:
duration_seconds = float(container.duration / av.time_base)
estimated_frames = int(round(duration_seconds * float(video_stream.average_rate)))
if estimated_frames > 0:
return estimated_frames
if ( if (
getattr(video_stream, "duration", None) is not None getattr(video_stream, "duration", None) is not None
and getattr(video_stream, "time_base", None) is not None and getattr(video_stream, "time_base", None) is not None
and video_stream.average_rate and video_stream.average_rate
): ):
duration_seconds = float(video_stream.duration * video_stream.time_base) raw_duration = float(video_stream.duration * video_stream.time_base)
if self.__start_time < 0:
duration_from_start = min(raw_duration, -self.__start_time)
else:
duration_from_start = raw_duration - self.__start_time
duration_seconds = min(self.__duration, duration_from_start)
estimated_frames = int(round(duration_seconds * float(video_stream.average_rate))) estimated_frames = int(round(duration_seconds * float(video_stream.average_rate)))
if estimated_frames > 0: if estimated_frames > 0:
return estimated_frames return estimated_frames
# 3. Last resort: decode frames and count them (streaming) # 3. Last resort: decode frames and count them (streaming)
frame_count = 0 if self.__start_time < 0:
container.seek(0) start_time = max(self._get_raw_duration() + self.__start_time, 0)
for packet in container.demux(video_stream): else:
for _ in packet.decode(): start_time = self.__start_time
frame_count += 1 frame_count = 1
start_pts = int(start_time / video_stream.time_base)
if frame_count == 0: end_pts = int((start_time + self.__duration) / video_stream.time_base)
raise ValueError(f"Could not determine frame count for file '{self.__file}'") container.seek(start_pts, stream=video_stream)
frame_iterator = (
container.decode(video_stream)
if video_stream.codec.capabilities & 0x100
else container.demux(video_stream)
)
for frame in frame_iterator:
if frame.pts >= start_pts:
break
else:
raise ValueError(f"Could not determine frame count for file '{self.__file}'\nNo frames exist for start_time {self.__start_time}")
for frame in frame_iterator:
if frame.pts >= end_pts:
break
frame_count += 1
return frame_count return frame_count
def get_frame_rate(self) -> Fraction: def get_frame_rate(self) -> Fraction:
@ -199,9 +233,21 @@ class VideoFromFile(VideoInput):
return container.format.name return container.format.name
def get_components_internal(self, container: InputContainer) -> VideoComponents: def get_components_internal(self, container: InputContainer) -> VideoComponents:
video_stream = self._get_first_video_stream(container)
if self.__start_time < 0:
start_time = max(self._get_raw_duration() + self.__start_time, 0)
else:
start_time = self.__start_time
# Get video frames # Get video frames
frames = [] frames = []
for frame in container.decode(video=0): start_pts = int(start_time / video_stream.time_base)
end_pts = int((start_time + self.__duration) / video_stream.time_base)
container.seek(start_pts, stream=video_stream)
for frame in container.decode(video_stream):
if frame.pts < start_pts:
continue
if self.__duration and frame.pts >= end_pts:
break
img = frame.to_ndarray(format='rgb24') # shape: (H, W, 3) img = frame.to_ndarray(format='rgb24') # shape: (H, W, 3)
img = torch.from_numpy(img) / 255.0 # shape: (H, W, 3) img = torch.from_numpy(img) / 255.0 # shape: (H, W, 3)
frames.append(img) frames.append(img)
@ -209,31 +255,44 @@ class VideoFromFile(VideoInput):
images = torch.stack(frames) if len(frames) > 0 else torch.zeros(0, 3, 0, 0) images = torch.stack(frames) if len(frames) > 0 else torch.zeros(0, 3, 0, 0)
# Get frame rate # Get frame rate
video_stream = next(s for s in container.streams if s.type == 'video') frame_rate = Fraction(video_stream.average_rate) if video_stream.average_rate else Fraction(1)
frame_rate = Fraction(video_stream.average_rate) if video_stream and video_stream.average_rate else Fraction(1)
# Get audio if available # Get audio if available
audio = None audio = None
try: container.seek(start_pts, stream=video_stream)
container.seek(0) # Reset the container to the beginning # Use last stream for consistency
for stream in container.streams: if len(container.streams.audio):
if stream.type != 'audio': audio_stream = container.streams.audio[-1]
continue audio_frames = []
assert isinstance(stream, av.AudioStream) resample = av.audio.resampler.AudioResampler(format='fltp').resample
audio_frames = [] frames = itertools.chain.from_iterable(
for packet in container.demux(stream): map(resample, container.decode(audio_stream))
for frame in packet.decode(): )
assert isinstance(frame, av.AudioFrame)
audio_frames.append(frame.to_ndarray()) # shape: (channels, samples) has_first_frame = False
if len(audio_frames) > 0: for frame in frames:
audio_data = np.concatenate(audio_frames, axis=1) # shape: (channels, total_samples) offset_seconds = start_time - frame.pts * audio_stream.time_base
audio_tensor = torch.from_numpy(audio_data).unsqueeze(0) # shape: (1, channels, total_samples) to_skip = int(offset_seconds * audio_stream.sample_rate)
audio = AudioInput({ if to_skip < frame.samples:
"waveform": audio_tensor, has_first_frame = True
"sample_rate": int(stream.sample_rate) if stream.sample_rate else 1, break
}) if has_first_frame:
except StopIteration: audio_frames.append(frame.to_ndarray()[..., to_skip:])
pass # No audio stream
for frame in frames:
if frame.time > start_time + self.__duration:
break
audio_frames.append(frame.to_ndarray()) # shape: (channels, samples)
if len(audio_frames) > 0:
audio_data = np.concatenate(audio_frames, axis=1) # shape: (channels, total_samples)
if self.__duration:
audio_data = audio_data[..., :int(self.__duration * audio_stream.sample_rate)]
audio_tensor = torch.from_numpy(audio_data).unsqueeze(0) # shape: (1, channels, total_samples)
audio = AudioInput({
"waveform": audio_tensor,
"sample_rate": int(audio_stream.sample_rate) if audio_stream.sample_rate else 1,
})
metadata = container.metadata metadata = container.metadata
return VideoComponents(images=images, audio=audio, frame_rate=frame_rate, metadata=metadata) return VideoComponents(images=images, audio=audio, frame_rate=frame_rate, metadata=metadata)
@ -250,7 +309,7 @@ class VideoFromFile(VideoInput):
path: str | io.BytesIO, path: str | io.BytesIO,
format: VideoContainer = VideoContainer.AUTO, format: VideoContainer = VideoContainer.AUTO,
codec: VideoCodec = VideoCodec.AUTO, codec: VideoCodec = VideoCodec.AUTO,
metadata: Optional[dict] = None metadata: Optional[dict] = None,
): ):
if isinstance(self.__file, io.BytesIO): if isinstance(self.__file, io.BytesIO):
self.__file.seek(0) # Reset the BytesIO object to the beginning self.__file.seek(0) # Reset the BytesIO object to the beginning
@ -262,15 +321,14 @@ class VideoFromFile(VideoInput):
reuse_streams = False reuse_streams = False
if codec != VideoCodec.AUTO and codec != video_encoding and video_encoding is not None: if codec != VideoCodec.AUTO and codec != video_encoding and video_encoding is not None:
reuse_streams = False reuse_streams = False
if self.__start_time or self.__duration:
reuse_streams = False
if not reuse_streams: if not reuse_streams:
components = self.get_components_internal(container) components = self.get_components_internal(container)
video = VideoFromComponents(components) video = VideoFromComponents(components)
return video.save_to( return video.save_to(
path, path, format=format, codec=codec, metadata=metadata
format=format,
codec=codec,
metadata=metadata
) )
streams = container.streams streams = container.streams
@ -304,10 +362,21 @@ class VideoFromFile(VideoInput):
output_container.mux(packet) output_container.mux(packet)
def _get_first_video_stream(self, container: InputContainer): def _get_first_video_stream(self, container: InputContainer):
video_stream = next((s for s in container.streams if s.type == "video"), None) if len(container.streams.video):
if video_stream is None: return container.streams.video[0]
raise ValueError(f"No video stream found in file '{self.__file}'") raise ValueError(f"No video stream found in file '{self.__file}'")
return video_stream
def as_trimmed(
self, start_time: float = 0, duration: float = 0, strict_duration: bool = True
) -> VideoInput | None:
trimmed = VideoFromFile(
self.get_stream_source(),
start_time=start_time + self.__start_time,
duration=duration + self.__duration,
)
if trimmed.get_duration() < duration and strict_duration:
return None
return trimmed
class VideoFromComponents(VideoInput): class VideoFromComponents(VideoInput):
@ -322,7 +391,7 @@ class VideoFromComponents(VideoInput):
return VideoComponents( return VideoComponents(
images=self.__components.images, images=self.__components.images,
audio=self.__components.audio, audio=self.__components.audio,
frame_rate=self.__components.frame_rate frame_rate=self.__components.frame_rate,
) )
def save_to( def save_to(
@ -330,7 +399,7 @@ class VideoFromComponents(VideoInput):
path: str, path: str,
format: VideoContainer = VideoContainer.AUTO, format: VideoContainer = VideoContainer.AUTO,
codec: VideoCodec = VideoCodec.AUTO, codec: VideoCodec = VideoCodec.AUTO,
metadata: Optional[dict] = None metadata: Optional[dict] = None,
): ):
if format != VideoContainer.AUTO and format != VideoContainer.MP4: if format != VideoContainer.AUTO and format != VideoContainer.MP4:
raise ValueError("Only MP4 format is supported for now") raise ValueError("Only MP4 format is supported for now")
@ -381,3 +450,14 @@ class VideoFromComponents(VideoInput):
# Flush encoder # Flush encoder
output.mux(audio_stream.encode(None)) output.mux(audio_stream.encode(None))
def as_trimmed(
self,
start_time: float | None = None,
duration: float | None = None,
strict_duration: bool = True,
) -> VideoInput | None:
if self.get_duration() < start_time + duration:
return None
#TODO Consider tracking duration and trimming at time of save?
return VideoFromFile(self.get_stream_source(), start_time=start_time, duration=duration)

View File

@ -202,6 +202,56 @@ class LoadVideo(io.ComfyNode):
return True return True
class VideoSlice(io.ComfyNode):
@classmethod
def define_schema(cls):
return io.Schema(
node_id="Video Slice",
display_name="Video Slice",
search_aliases=[
"trim video duration",
"skip first frames",
"frame load cap",
"start time",
],
category="image/video",
inputs=[
io.Video.Input("video"),
io.Float.Input(
"start_time",
default=0.0,
max=1e5,
min=-1e5,
step=0.001,
tooltip="Start time in seconds",
),
io.Float.Input(
"duration",
default=0.0,
min=0.0,
step=0.001,
tooltip="Duration in seconds, or 0 for unlimited duration",
),
io.Boolean.Input(
"strict_duration",
default=False,
tooltip="If True, when the specified duration is not possible, an error will be raised.",
),
],
outputs=[
io.Video.Output(),
],
)
@classmethod
def execute(cls, video: io.Video.Type, start_time: float, duration: float, strict_duration: bool) -> io.NodeOutput:
trimmed = video.as_trimmed(start_time, duration, strict_duration=strict_duration)
if trimmed is not None:
return io.NodeOutput(trimmed)
raise ValueError(
f"Failed to slice video:\nSource duration: {video.get_duration()}\nStart time: {start_time}\nTarget duration: {duration}"
)
class VideoExtension(ComfyExtension): class VideoExtension(ComfyExtension):
@override @override
@ -212,6 +262,7 @@ class VideoExtension(ComfyExtension):
CreateVideo, CreateVideo,
GetVideoComponents, GetVideoComponents,
LoadVideo, LoadVideo,
VideoSlice,
] ]
async def comfy_entrypoint() -> VideoExtension: async def comfy_entrypoint() -> VideoExtension: