From f674c44e5868a291db64a6e875d849a78edbd0dd Mon Sep 17 00:00:00 2001
From: Talmaj Marinc <talmaj@comfy.org>
Date: Wed, 27 May 2026 13:52:59 +0200
Subject: [PATCH] Apply PR comments, reduce verbosity of docstrings, unify
 model and image/frame naming convention.

---
 comfy_extras/nodes_depth_anything_3.py | 95 +++++++++-----------------
 1 file changed, 31 insertions(+), 64 deletions(-)

diff --git a/comfy_extras/nodes_depth_anything_3.py b/comfy_extras/nodes_depth_anything_3.py
index c5cb08c09..cfb023c5b 100644
--- a/comfy_extras/nodes_depth_anything_3.py
+++ b/comfy_extras/nodes_depth_anything_3.py
@@ -1,16 +1,5 @@
 """ComfyUI nodes for Depth Anything 3.
 
-Adds these nodes:
-
-* ``LoadDA3Model`` -- load a DA3 ``.safetensors`` file from the
-  ``models/geometry_estimation/`` folder.
-* ``DA3Inference`` -- unified depth estimation node supporting both mono and
-  multi-view modes via a DynamicCombo selector. Returns a DA3_GEOMETRY dict of
-  raw tensors (depth, sky, confidence, camera). Feed into ``DA3Render``
-  to produce display images, or directly into ``MoGeRender`` for depth / mask views.
-* ``DA3Render`` -- post-processes a DA3_GEOMETRY dict: applies optional
-  sky clipping, normalises depth and confidence, and returns display images.
-
 Model capability matrix
 -----------------------
   Variant               head_type  has_sky  has_conf  cam_dec
@@ -18,10 +7,6 @@ Model capability matrix
   DA3-Base              dualdpt    False    True      yes
   DA3-Mono-Large        dpt        True     False     no
   DA3-Metric-Large      dpt        True     False     no  (raw output is metres)
-
-The node raises a ``ValueError`` at execution time when the selected
-parameters conflict with the loaded model's capabilities (e.g.
-``apply_sky_clip=True`` on a model with no sky head).
 """
 
 from __future__ import annotations
@@ -241,17 +226,6 @@ def _run_da3(model_patcher, image: torch.Tensor, process_res: int,
 
 
 class DA3Inference(io.ComfyNode):
-    """Raw Depth Anything 3 inference node.
-
-    Outputs a DA3_GEOMETRY dict of raw tensors. All display normalization
-    (sky clipping, depth scaling, confidence normalisation) is handled by
-    the companion ``DA3Render`` node.
-
-    Mono mode: each batch element is processed independently.
-    Multi-view mode: all frames share a single forward pass with cross-view
-    attention; adds ``extrinsics`` and ``intrinsics`` to the geometry dict.
-    """
-
     @classmethod
     def define_schema(cls):
         return io.Schema(
@@ -259,12 +233,11 @@ class DA3Inference(io.ComfyNode):
             search_aliases=["depth", "geometry", "da3", "depth anything", "monocular", "pointmap", "sky", "3d", "metric depth", "disparity"],
             display_name="Run Depth Anything 3",
             category="image/geometry_estimation",
-            description="Run Depth Anything 3 on an image or image batch. In multi-view mode each frame is treated as a separate view of the same scene.",
+            description="Run Depth Anything 3 on an image. In multi-view mode each image is treated as a separate view of the same scene.",
             inputs=[
                 DA3ModelType.Input("da3_model"),
                 io.Image.Input("image",
-                               tooltip="Single image or image batch. "
-                                       "In multi-view mode each frame is treated as "
+                               tooltip="In multi-view mode each image is treated as "
                                        "a separate view of the same scene."),
                 io.Int.Input("process_res", default=504, min=140, max=2520, step=14,
                              tooltip="Resolution the model runs at (longest side, multiple of 14). "
@@ -276,8 +249,8 @@ class DA3Inference(io.ComfyNode):
                                tooltip="- upper_bound_resize: scale so the longest side = process_res (caps memory, default).\n"
                                        "- lower_bound_resize: scale so the shortest side = process_res (preserves more detail on tall/wide images, uses more memory)."),
                 io.DynamicCombo.Input("mode",
-                                      tooltip="- mono: single image or independent batch — works with any model variant.\n"
-                                              "- multiview: all frames processed together for geometric consistency + camera pose — requires DA3-Small or DA3-Base (DA3-Mono-Large / DA3-Metric-Large do NOT support this mode).",
+                                      tooltip="- mono: single view image — works with any model variant.\n"
+                                              "- multiview: all images processed together for geometric consistency + camera pose, for Small/Base models only.",
                                       options=[
                     io.DynamicCombo.Option("mono", []),
                     io.DynamicCombo.Option("multiview", [
@@ -292,17 +265,17 @@ class DA3Inference(io.ComfyNode):
                         io.Combo.Input("pose_method",
                                        options=["cam_dec", "ray_pose"],
                                        default="cam_dec",
-                                               tooltip="- cam_dec: small MLP on the final camera token — works on DA3-Small and DA3-Base.\n"
-                                               "- ray_pose: RANSAC over the DualDPT ray output — works on DA3-Small and DA3-Base.\n"
-                                               "Both methods require DA3-Small or DA3-Base; this setting is ignored on Mono/Metric-Large."),
+                                               tooltip="This seeting is ignored for Mono/Metric models."
+                                               "- cam_dec: small MLP on the final camera token.\n"
+                                               "- ray_pose: RANSAC over the DualDPT ray output."),
                     ]),
                 ]),
             ],
             outputs=[
-                DA3Geometry.Output("geometry",
-                                   tooltip="DA3_GEOMETRY dict of raw tensors.\n"
-                                           "- Always: 'depth' (B,H,W), 'image', 'mode'.\n"
-                                           "- Optional: 'sky' + 'mask' (Mono/Metric), 'confidence' raw (Small/Base), 'extrinsics' + 'intrinsics' (multi-view)."),
+                DA3Geometry.Output("da3_geometry",
+                                   tooltip="Dictionary of non-normalized tensors.\n"
+                                           "- Always: 'depth', 'image', 'mode'.\n"
+                                           "- Optional: 'sky' (Mono/Metric), 'confidence' (Small/Base), 'extrinsics' + 'intrinsics' (multi-view)."),
             ],
         )
 
@@ -323,10 +296,10 @@ class DA3Inference(io.ComfyNode):
 
         if not has_cam_dec and not has_dualdpt:
             raise ValueError(
-                "multiview mode requires DA3-Small or DA3-Base — the loaded model "
+                "multiview mode requires Small or Base model. The loaded model "
                 f"(head_type='{diffusion.head_type}') does not support cross-view "
                 "attention or camera pose estimation. Switch mode to 'mono', or "
-                "load DA3-Small / DA3-Base for multiview."
+                "load Small or Base model for multiview."
             )
 
         if pose_method == "cam_dec" and not has_cam_dec:
@@ -424,39 +397,33 @@ class DA3Inference(io.ComfyNode):
 
 
 class DA3Render(io.ComfyNode):
-    """Visualise a DA3_GEOMETRY packet as a single image.
-
-    Mirrors the MoGeRender interface: one ``output`` selector, one IMAGE out.
-    Use multiple nodes in parallel to get depth + sky + confidence simultaneously.
-    """
-
     _DEPTH_RENDER_INPUTS = [
         io.Combo.Input("normalization",
                     options=["v2_style", "min_max", "raw"],
                     default="v2_style",
                     tooltip="- v2_style: mean/std normalisation for perceptually balanced results (default).\n"
                             "- min_max: stretches the full depth range to [0, 1] for maximum contrast.\n"
-                            "- raw: no scaling — preserves metric units for DA3-Metric-Large."),
+                            "- raw: no scaling — preserves metric units for Metric model."),
         io.Boolean.Input("apply_sky_clip", default=False,
                         tooltip="Clip sky-region depth to the 99th percentile of foreground depth before "
-                                "normalisation. Requires a 'sky' tensor in the geometry "
-                                "(DA3-Mono-Large or DA3-Metric-Large); raises an error otherwise."),
+                                "normalisation. Requires a 'sky' tensor in the da3_geometry input"
+                                "provided by Mono/Metric models; raises an error otherwise."),
     ]
 
     @classmethod
     def define_schema(cls):
         return io.Schema(
             node_id="DA3Render",
-            display_name="Depth Anything 3 Render",
+            display_name="Render Depth Anything 3",
             category="image/geometry_estimation",
-            description="Visualise a DA3_GEOMETRY packet. Drop multiple nodes to get different views simultaneously.",
+            description="Render a depth map, confidence map, or sky mask from DA3 geometry data.",
             inputs=[
-                DA3Geometry.Input("geometry"),
+                DA3Geometry.Input("da3_geometry"),
                 io.DynamicCombo.Input("output",
                                       tooltip="- depth: normalised greyscale depth image.\n"
                                               "- depth_colored: depth mapped through the Turbo colormap.\n"
-                                              "- sky_mask: sky probability in [0, 1] (Mono/Metric variants only).\n"
-                                              "- confidence: normalised depth confidence (Small/Base variants only).",
+                                              "- sky_mask: sky probability in [0, 1] (for Mono/Metric models only).\n"
+                                              "- confidence: normalised depth confidence (for Small/Base models only).",
                                       options=[
                     io.DynamicCombo.Option("depth", cls._DEPTH_RENDER_INPUTS),
                     io.DynamicCombo.Option("depth_colored", cls._DEPTH_RENDER_INPUTS),
@@ -476,8 +443,8 @@ class DA3Render(io.ComfyNode):
             apply_sky_clip = output["apply_sky_clip"]
             if apply_sky_clip and "sky" not in geometry:
                 raise ValueError(
-                    "apply_sky_clip=True requires a sky tensor in the geometry, but none is present. "
-                    "Run with DA3-Mono-Large or DA3-Metric-Large, or set apply_sky_clip=False."
+                    "apply_sky_clip=True requires a sky tensor in the da3_geometry input, but none is present. "
+                    "Run with Mono/Metric models or set apply_sky_clip=False."
                 )
             depth = geometry["depth"]
             sky = geometry.get("sky")
@@ -491,13 +458,13 @@ class DA3Render(io.ComfyNode):
 
         elif output_val == "sky_mask":
             if "sky" not in geometry:
-                raise ValueError("geometry has no sky output; run with DA3-Mono-Large or DA3-Metric-Large.")
+                raise ValueError("geometry has no sky output; run with Mono/Metric models.")
             sky = geometry["sky"]
             result = sky.unsqueeze(-1).expand(*sky.shape, 3).contiguous()
 
         elif output_val == "confidence":
             if "confidence" not in geometry:
-                raise ValueError("geometry has no confidence output; run with DA3-Small or DA3-Base.")
+                raise ValueError("geometry has no confidence output; run with Small/Base models.")
             result = _normalize_confidence(geometry["confidence"])
             result = result.unsqueeze(-1).expand(*result.shape, 3).contiguous()
 
@@ -538,14 +505,14 @@ class DA3GeometryToMesh(io.ComfyNode):
         return io.Schema(
             node_id="DA3GeometryToMesh",
             search_aliases=["da3", "depth anything", "mesh", "geometry", "3d", "triangulate"],
-            display_name="DA3 Geometry to Mesh",
+            display_name="Convert DA3 Geometry to Mesh",
             category="image/geometry_estimation",
-            description="Convert a DA3_GEOMETRY depth map into a triangulated 3D mesh (Types.MESH).",
+            description="Convert a depth map into a triangulated 3D mesh.",
             inputs=[
                 DA3Geometry.Input("da3_geometry"),
                 io.Int.Input("batch_index", default=0, min=0, max=4096,
-                             tooltip="Which frame of a batched DA3_GEOMETRY to mesh. "
-                                     "Per-frame vertex counts differ so batches cannot be stacked."),
+                             tooltip="Which image of a batched DA3_GEOMETRY to mesh. "
+                                     "Per-image vertex counts differ so batches cannot be stacked."),
                 io.Int.Input("decimation", default=1, min=1, max=8,
                              tooltip="Vertex stride; 1 = full resolution, 2 = half, etc."),
                 io.Float.Input("discontinuity_threshold", default=0.04, min=0.0, max=1.0, step=0.01,
@@ -640,13 +607,13 @@ class DA3GeometryToPointCloud(io.ComfyNode):
         return io.Schema(
             node_id="DA3GeometryToPointCloud",
             search_aliases=["da3", "depth anything", "point cloud", "pointcloud", "3d", "geometry"],
-            display_name="DA3 Geometry to Point Cloud",
+            display_name="Convert DA3 Geometry to Point Cloud",
             category="image/geometry_estimation",
             description="Unproject a DA3_GEOMETRY depth map into a 3D point cloud (DA3_POINT_CLOUD).",
             inputs=[
                 DA3Geometry.Input("da3_geometry"),
                 io.Int.Input("batch_index", default=0, min=0, max=4096,
-                             tooltip="Which frame of a batched DA3_GEOMETRY to convert."),
+                             tooltip="Which image of a batched DA3_GEOMETRY to convert."),
                 io.Float.Input("confidence_threshold", default=0.1, min=0.0, max=1.0, step=0.01,
                                tooltip="Exclude pixels whose per-image normalised confidence is below this value (0 = keep all). "
                                        "Ignored when the geometry has no confidence map."),