Support loading the alpha channel of videos. (#13564)

Not exposed in nodes yet.
2026-05-25 16:37:23 +08:00 · 2026-04-25 18:02:58 -07:00 · 2026-04-25 18:02:58 -07:00 · df22bcd5e1
commit df22bcd5e1
parent 5e3f15a830
2 changed files with 22 additions and 8 deletions
--- a/comfy_api/latest/_input_impl/video_types.py
+++ b/comfy_api/latest/_input_impl/video_types.py
@ -240,19 +240,34 @@ class VideoFromFile(VideoInput):
            start_time = self.__start_time
        # Get video frames
        frames = []
+        alphas = None
        start_pts = int(start_time / video_stream.time_base)
        end_pts = int((start_time + self.__duration) / video_stream.time_base)
        container.seek(start_pts, stream=video_stream)
+        image_format = 'gbrpf32le'
        for frame in container.decode(video_stream):
+            if alphas is None:
+                for comp in frame.format.components:
+                    if comp.is_alpha:
+                        alphas = []
+                        image_format = 'gbrapf32le'
+                        break
+
            if frame.pts < start_pts:
                continue
            if self.__duration and frame.pts >= end_pts:
                break
-            img = frame.to_ndarray(format='gbrpf32le')  # shape: (H, W, 3)
-            img = torch.from_numpy(img)
-            frames.append(img)

-        images = torch.stack(frames) if len(frames) > 0 else torch.zeros(0, 3, 0, 0)
+            img = frame.to_ndarray(format=image_format)  # shape: (H, W, 4)
+            if alphas is None:
+                frames.append(torch.from_numpy(img))
+            else:
+                frames.append(torch.from_numpy(img[..., :-1]))
+                alphas.append(torch.from_numpy(img[..., -1:]))
+
+        images = torch.stack(frames) if len(frames) > 0 else torch.zeros(0, 0, 0, 3)
+        if alphas is not None:
+            alphas = torch.stack(alphas) if len(alphas) > 0 else torch.zeros(0, 0, 0, 1)

        # Get frame rate
        frame_rate = Fraction(video_stream.average_rate) if video_stream.average_rate else Fraction(1)
@ -295,7 +310,7 @@ class VideoFromFile(VideoInput):
                })

        metadata = container.metadata
-        return VideoComponents(images=images, audio=audio, frame_rate=frame_rate, metadata=metadata)
+        return VideoComponents(images=images, alpha=alphas, audio=audio, frame_rate=frame_rate, metadata=metadata)

    def get_components(self) -> VideoComponents:
        if isinstance(self.__file, io.BytesIO):
--- a/comfy_api/latest/_util/video_types.py
+++ b/comfy_api/latest/_util/video_types.py
@ -3,7 +3,7 @@ from dataclasses import dataclass
 from enum import Enum
 from fractions import Fraction
 from typing import Optional
-from .._input import ImageInput, AudioInput
+from .._input import ImageInput, AudioInput, MaskInput

 class VideoCodec(str, Enum):
    AUTO = "auto"
@ -48,5 +48,4 @@ class VideoComponents:
    frame_rate: Fraction
    audio: Optional[AudioInput] = None
    metadata: Optional[dict] = None
-
-
+    alpha: Optional[MaskInput] = None