mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2026-05-28 09:57:24 +08:00
Refactor to a single socket ouptut for DA3 compatible with MoGeRender.
This commit is contained in:
parent
9e30a0b56c
commit
22982da481
@ -5,10 +5,10 @@ Adds these nodes:
|
|||||||
* ``LoadDepthAnything3`` -- load a DA3 ``.safetensors`` file from the
|
* ``LoadDepthAnything3`` -- load a DA3 ``.safetensors`` file from the
|
||||||
``models/geometry_estimation/`` folder.
|
``models/geometry_estimation/`` folder.
|
||||||
* ``DepthAnything3`` -- unified depth estimation node supporting both mono and
|
* ``DepthAnything3`` -- unified depth estimation node supporting both mono and
|
||||||
multi-view modes via a DynamicCombo selector. In mono mode, returns a
|
multi-view modes via a DynamicCombo selector. Returns a single DA3_GEOMETRY
|
||||||
normalised depth image plus sky/confidence masks. In multi-view mode,
|
dict containing raw depth, normalised depth image, source image, and
|
||||||
additionally returns per-view extrinsics, intrinsics and raw depth packed
|
optionally sky/mask (Mono/Metric), confidence (Small/Base), and
|
||||||
as a LATENT.
|
extrinsics/intrinsics (multi-view). Compatible with MoGe Render.
|
||||||
|
|
||||||
Model capability matrix
|
Model capability matrix
|
||||||
-----------------------
|
-----------------------
|
||||||
@ -31,12 +31,28 @@ import torch
|
|||||||
|
|
||||||
import comfy.model_management as mm
|
import comfy.model_management as mm
|
||||||
import comfy.sd
|
import comfy.sd
|
||||||
from comfy_extras.nodes_moge import MoGeGeometry
|
|
||||||
import folder_paths
|
import folder_paths
|
||||||
from comfy.ldm.depth_anything_3 import preprocess as da3_preprocess
|
from comfy.ldm.depth_anything_3 import preprocess as da3_preprocess
|
||||||
from comfy_api.latest import ComfyExtension, io
|
from comfy_api.latest import ComfyExtension, io
|
||||||
|
|
||||||
DA3ModelType = io.Custom("DA3_MODEL")
|
DA3ModelType = io.Custom("DA3_MODEL")
|
||||||
|
DA3Geometry = io.Custom("DA3_GEOMETRY")
|
||||||
|
|
||||||
|
# DA3_GEOMETRY is a dict with these optional keys (absent when the upstream model didn't produce them):
|
||||||
|
#
|
||||||
|
# Per-frame tensors — B = batch size in mono mode; B = S (number of views) in multi-view mode.
|
||||||
|
# "depth": torch.Tensor (B, H, W) -- raw depth (always present)
|
||||||
|
# "depth_image": torch.Tensor (B, H, W, 3) -- normalised depth for display (always present)
|
||||||
|
# "image": torch.Tensor (B, H, W, 3) -- source image in [0, 1], CPU (always present)
|
||||||
|
# "mode": str -- "mono" or "multiview" (always present)
|
||||||
|
# "sky": torch.Tensor (B, H, W) -- sky probability in [0, 1] (Mono/Metric variants only)
|
||||||
|
# "mask": torch.Tensor (B, H, W) bool -- True = valid foreground / False = sky (present when sky head available)
|
||||||
|
# "confidence": torch.Tensor (B, H, W) -- normalised depth confidence in [0, 1] (Small/Base variants only)
|
||||||
|
#
|
||||||
|
# Multi-view only — S = number of views; the leading 1 is the scene dimension from the model.
|
||||||
|
# "extrinsics": torch.Tensor (1, S, 4, 4) -- world-to-camera matrices
|
||||||
|
# "intrinsics": torch.Tensor (1, S, 3, 3) -- pixel-space intrinsics
|
||||||
|
|
||||||
|
|
||||||
class LoadDepthAnything3(io.ComfyNode):
|
class LoadDepthAnything3(io.ComfyNode):
|
||||||
@classmethod
|
@classmethod
|
||||||
@ -143,17 +159,19 @@ def _run_da3(model_patcher, image: torch.Tensor, process_res: int,
|
|||||||
class DepthAnything3(io.ComfyNode):
|
class DepthAnything3(io.ComfyNode):
|
||||||
"""Unified Depth Anything 3 node.
|
"""Unified Depth Anything 3 node.
|
||||||
|
|
||||||
|
Returns a single DA3_GEOMETRY dict containing all useful outputs.
|
||||||
|
See the DA3_GEOMETRY comment block near the top of this module for the full key listing.
|
||||||
|
|
||||||
Mono mode
|
Mono mode
|
||||||
---------
|
---------
|
||||||
Runs the model on each batch element independently and returns a
|
Runs the model on each batch element independently.
|
||||||
normalised depth image together with sky and confidence masks.
|
|
||||||
|
|
||||||
Multi-view mode
|
Multi-view mode
|
||||||
---------------
|
---------------
|
||||||
Treats every batch element as a separate view of the same scene.
|
Treats every batch element as a separate view of the same scene.
|
||||||
Runs all views in a single forward pass so cross-view attention can
|
Runs all views in a single forward pass so cross-view attention can
|
||||||
establish geometric consistency. Additionally returns a ``LATENT``
|
establish geometric consistency. Adds ``extrinsics`` and ``intrinsics``
|
||||||
dict with per-view camera extrinsics, intrinsics and raw depth.
|
to the geometry dict.
|
||||||
|
|
||||||
Capability errors
|
Capability errors
|
||||||
-----------------
|
-----------------
|
||||||
@ -161,15 +179,6 @@ class DepthAnything3(io.ComfyNode):
|
|||||||
model feature that is absent in the loaded checkpoint (e.g.
|
model feature that is absent in the loaded checkpoint (e.g.
|
||||||
``apply_sky_clip=True`` on DA3-Small/Base which has no sky head,
|
``apply_sky_clip=True`` on DA3-Small/Base which has no sky head,
|
||||||
or ``pose_method='cam_dec'`` on a monocular model).
|
or ``pose_method='cam_dec'`` on a monocular model).
|
||||||
|
|
||||||
Camera LATENT structure (multi-view only)
|
|
||||||
-----------------------------------------
|
|
||||||
samples: (1, S, 1, H, W) -- raw depth packed as latent samples
|
|
||||||
type: "da3_multiview"
|
|
||||||
extrinsics: (1, S, 4, 4) -- world-to-camera matrices
|
|
||||||
intrinsics: (1, S, 3, 3) -- pixel-space intrinsics
|
|
||||||
depth_raw: (S, H, W) -- un-normalised depth
|
|
||||||
confidence: (S, H, W) -- per-pixel confidence (zeros if N/A)
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@ -244,16 +253,13 @@ class DepthAnything3(io.ComfyNode):
|
|||||||
]),
|
]),
|
||||||
],
|
],
|
||||||
outputs=[
|
outputs=[
|
||||||
io.Image.Output("depth_image"),
|
DA3Geometry.Output("geometry",
|
||||||
io.Mask.Output("sky_mask",
|
tooltip="DA3_GEOMETRY dict. Always contains: "
|
||||||
tooltip="Sky probability mask (Mono/Metric variants). "
|
"'depth' (raw), 'depth_image' (normalised), 'image' (source), 'mode'. "
|
||||||
"Zeros for Small/Base."),
|
"Optional: 'sky' + 'mask' (Mono/Metric variants), "
|
||||||
io.Mask.Output("confidence",
|
"'confidence' (Small/Base variants), "
|
||||||
tooltip="Depth confidence (Small/Base variants). "
|
"'extrinsics' + 'intrinsics' (multi-view only). "
|
||||||
"Zeros for Mono/Metric."),
|
"Compatible with MoGe Render for depth and mask visualisation."),
|
||||||
io.Latent.Output("camera",
|
|
||||||
tooltip="Multi-view: per-view extrinsics + intrinsics + raw depth. "
|
|
||||||
"In mono mode this is an empty placeholder."),
|
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -340,18 +346,22 @@ class DepthAnything3(io.ComfyNode):
|
|||||||
if apply_sky_clip and sky is not None:
|
if apply_sky_clip and sky is not None:
|
||||||
depth = cls._apply_sky_clip(depth, sky)
|
depth = cls._apply_sky_clip(depth, sky)
|
||||||
|
|
||||||
out_image = cls._depth_to_image(depth, sky, normalization)
|
depth_image = cls._depth_to_image(depth, sky, normalization)
|
||||||
|
|
||||||
sky_mask = sky if sky is not None else torch.zeros_like(depth)
|
geometry: dict = {
|
||||||
conf_mask = (_normalize_confidence(confidence)
|
"depth": depth.contiguous(),
|
||||||
if confidence is not None else torch.zeros_like(depth))
|
"depth_image": depth_image,
|
||||||
camera = {"samples": torch.zeros(1, 1, 1, 1, 1), "type": "mono"}
|
"image": image[..., :3].cpu(),
|
||||||
return io.NodeOutput(
|
"mode": "mono",
|
||||||
out_image,
|
}
|
||||||
sky_mask.contiguous(),
|
if sky is not None:
|
||||||
conf_mask.contiguous(),
|
geometry["sky"] = sky.contiguous()
|
||||||
camera,
|
# True = valid foreground, False = sky/invalid — matches MoGe mask semantics.
|
||||||
)
|
geometry["mask"] = (sky < 0.5).contiguous()
|
||||||
|
if confidence is not None:
|
||||||
|
geometry["confidence"] = confidence.contiguous()
|
||||||
|
geometry["confidence_image"] = _normalize_confidence(confidence).contiguous()
|
||||||
|
return io.NodeOutput(geometry)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def _execute_multiview(cls, model, image, process_res, resize_method,
|
def _execute_multiview(cls, model, image, process_res, resize_method,
|
||||||
@ -410,21 +420,22 @@ class DepthAnything3(io.ComfyNode):
|
|||||||
sky_for_norm = sky if diffusion.has_sky else None
|
sky_for_norm = sky if diffusion.has_sky else None
|
||||||
depth_image = cls._depth_to_image(depth, sky_for_norm, normalization)
|
depth_image = cls._depth_to_image(depth, sky_for_norm, normalization)
|
||||||
|
|
||||||
sky_mask = sky if sky is not None else torch.zeros_like(depth)
|
geometry: dict = {
|
||||||
camera_latent = {
|
"depth": depth.contiguous(),
|
||||||
"samples": depth.unsqueeze(0).unsqueeze(2).contiguous(), # (1, S, 1, H, W)
|
"depth_image": depth_image,
|
||||||
"type": "da3_multiview",
|
"image": image[..., :3].cpu(),
|
||||||
|
"mode": "multiview",
|
||||||
"extrinsics": extrinsics.contiguous(),
|
"extrinsics": extrinsics.contiguous(),
|
||||||
"intrinsics": intrinsics.contiguous(),
|
"intrinsics": intrinsics.contiguous(),
|
||||||
"depth_raw": depth.contiguous(),
|
|
||||||
"confidence": conf_raw.contiguous(),
|
|
||||||
}
|
}
|
||||||
return io.NodeOutput(
|
if sky is not None:
|
||||||
depth_image,
|
geometry["sky"] = sky.contiguous()
|
||||||
sky_mask.contiguous(),
|
# True = valid foreground, False = sky/invalid — matches MoGe mask semantics.
|
||||||
conf_mask.contiguous(),
|
geometry["mask"] = (sky < 0.5).contiguous()
|
||||||
camera_latent,
|
if conf_raw.any():
|
||||||
)
|
geometry["confidence"] = conf_mask.contiguous()
|
||||||
|
geometry["confidence_image"] = _normalize_confidence(conf_mask).contiguous()
|
||||||
|
return io.NodeOutput(geometry)
|
||||||
|
|
||||||
|
|
||||||
class DepthAnything3Extension(ComfyExtension):
|
class DepthAnything3Extension(ComfyExtension):
|
||||||
|
|||||||
@ -17,6 +17,9 @@ from tqdm.auto import tqdm
|
|||||||
MoGeModelType = io.Custom("MOGE_MODEL")
|
MoGeModelType = io.Custom("MOGE_MODEL")
|
||||||
MoGeGeometry = io.Custom("MOGE_GEOMETRY")
|
MoGeGeometry = io.Custom("MOGE_GEOMETRY")
|
||||||
|
|
||||||
|
# Redefined (not imported) to avoid a hard dependency on nodes_depth_anything_3;
|
||||||
|
# io.Custom types are matched by string key, so both definitions refer to the same wire type.
|
||||||
|
DA3Geometry = io.Custom("DA3_GEOMETRY")
|
||||||
|
|
||||||
# MOGE_GEOMETRY is a dict with these optional keys (absent when the upstream model didn't produce them):
|
# MOGE_GEOMETRY is a dict with these optional keys (absent when the upstream model didn't produce them):
|
||||||
# "points": torch.Tensor (B, H, W, 3)
|
# "points": torch.Tensor (B, H, W, 3)
|
||||||
@ -285,7 +288,10 @@ class MoGeRender(io.ComfyNode):
|
|||||||
description="Render a depth map or normal map from geometry data",
|
description="Render a depth map or normal map from geometry data",
|
||||||
category="image/geometry_estimation",
|
category="image/geometry_estimation",
|
||||||
inputs=[
|
inputs=[
|
||||||
MoGeGeometry.Input("moge_geometry"),
|
io.MultiType.Input("moge_geometry", types=[MoGeGeometry, DA3Geometry],
|
||||||
|
tooltip="Accepts MOGE_GEOMETRY (from MoGe nodes) or DA3_GEOMETRY (from Depth Anything 3). "
|
||||||
|
"Normal render modes require points or normals and will error if those are absent — "
|
||||||
|
"DA3 produces no point cloud, so only 'depth' and 'mask' outputs are supported."),
|
||||||
io.Combo.Input("output", options=["depth", "depth_colored", "normal_opengl", "normal_directx", "mask"], default="depth",
|
io.Combo.Input("output", options=["depth", "depth_colored", "normal_opengl", "normal_directx", "mask"], default="depth",
|
||||||
tooltip="DirectX vs OpenGL controls the normal-map green-channel convention. DirectX: green = -Y down (Unreal). OpenGL: green = +Y up (Blender, Substance, Unity, glTF)."),
|
tooltip="DirectX vs OpenGL controls the normal-map green-channel convention. DirectX: green = -Y down (Unreal). OpenGL: green = +Y up (Blender, Substance, Unity, glTF)."),
|
||||||
],
|
],
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user