Cleanup Pixal3DConditioning

2026-07-03 21:20:49 +08:00 · 2026-06-30 20:29:13 +03:00 · 2026-06-30 20:29:13 +03:00 · 2333d6bc40
commit 2333d6bc40
parent f4b2173cf2
2 changed files with 59 additions and 26 deletions
--- a/comfy_extras/nodes_moge.py
+++ b/comfy_extras/nodes_moge.py
@ -2,6 +2,7 @@


 import torch
+import math

 import comfy.utils
 import folder_paths
@ -391,6 +392,53 @@ class MoGePointMapToMesh(io.ComfyNode):
        return io.NodeOutput(mesh)


+class MoGeGeometryToFOV(io.ComfyNode):
+    """Extract horizontal/vertical FOV from MoGe intrinsics, e.g. fov_y to feed SAM3DBody_Predict."""
+
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="MoGeGeometryToFOV",
+            search_aliases=["moge", "fov", "geometry", "intrinsics", "field of view"],
+            display_name="Get FoV from MoGe Geometry",
+            description="Derive the field of view and focal length from MoGe intrinsics.",
+            category="image/geometry estimation",
+            inputs=[
+                MoGeGeometry.Input("moge_geometry"),
+                io.Combo.Input("axis", options=["vertical", "horizontal", "diagonal"], default="vertical",
+                               tooltip="'vertical' (fov_y), 'horizontal' (fov_x), or 'diagonal'."),
+                io.Combo.Input("unit", options=["degrees", "radians"], default="degrees",
+                               tooltip="Output unit for the FOV."),
+            ],
+            outputs=[
+                io.Float.Output(display_name="fov"),
+                io.Float.Output(display_name="focal_pixels"),
+            ],
+        )
+
+    @classmethod
+    def execute(cls, moge_geometry, axis, unit) -> io.NodeOutput:
+        K = moge_geometry.get("intrinsics") if isinstance(moge_geometry, dict) else None
+        if K is None:
+            raise ValueError("moge_geometry has no intrinsics (panorama geometry has none).")
+        if K.ndim == 3:
+            K = K[0]
+        # MoGe normalizes fx by width and fy by height; with cx=cy=0.5 the half-extent
+        # in normalized units is 0.5, so fov = 2*atan(0.5 / f) per axis (hypot for diagonal).
+        hx = 0.5 / float(K[0, 0].item())
+        hy = 0.5 / float(K[1, 1].item())
+        half_tan = {"horizontal": hx, "vertical": hy, "diagonal": math.hypot(hx, hy)}[axis]
+        fov_radians = 2.0 * math.atan(half_tan)
+        fov = fov_radians if unit == "radians" else math.degrees(fov_radians)
+        # Pixels are square here, so fy*H == fx*W is the single lens focal in pixels.
+        src = next((moge_geometry[k] for k in ("image", "points", "depth") if k in moge_geometry), None)
+        if src is None:
+            raise ValueError("moge_geometry has no image/points/depth to read the pixel height from.")
+        H = int(src.shape[1])
+        focal_pixels = float(K[1, 1].item()) * H
+        return io.NodeOutput(fov, focal_pixels)
+
+
 class MoGeMaskOut(io.ComfyNode):
    """Mark masked pixels as invalid in a MoGe geometry. MoGePointMapToMesh's
    finite-check then drops them during triangulation, so the scene mesh has a
@ -466,7 +514,7 @@ class MoGeMaskOut(io.ComfyNode):
 class MoGeExtension(ComfyExtension):
    @override
    async def get_node_list(self) -> list[type[io.ComfyNode]]:
-        return [LoadMoGeModel, MoGeInference, MoGePanoramaInference, MoGeRender, MoGePointMapToMesh, MoGeMaskOut]
+        return [LoadMoGeModel, MoGeInference, MoGePanoramaInference, MoGeRender, MoGePointMapToMesh, MoGeGeometryToFOV, MoGeMaskOut]


 async def comfy_entrypoint() -> MoGeExtension:
--- a/comfy_extras/nodes_trellis2.py
+++ b/comfy_extras/nodes_trellis2.py
@ -845,11 +845,6 @@ def _crop_image_with_mask(item_image, item_mask, max_image_size=1024):
    composite = (composite * 255.0).round().clamp(0, 255).to(torch.uint8).float() / 255.0
    return composite, crop_bbox, scene_size

-def _fov_from_moge_intrinsics(moge_intrinsics: torch.Tensor) -> float:
-    fx = moge_intrinsics[..., 0, 0].float()
-    fov = 2.0 * torch.atan(0.5 / fx.clamp(min=1e-4))
-    return float(fov.mean().item())
-
 class Pixal3DConditioning(IO.ComfyNode):

    @classmethod
@ -862,18 +857,11 @@ class Pixal3DConditioning(IO.ComfyNode):
                IO.Image.Input("image"),
                IO.Mask.Input("mask"),
                IO.Float.Input(
-                    "camera_angle_x", default=0.2, min=0.0175, max=2.9671, step=0.001,
-                    tooltip="Horizontal FOV in radians (upstream demo default 0.2). "
-                            "Overridden by moge_geometry if connected.",
-                ),
-                IO.Float.Input(
-                    "mesh_scale", default=1.0, min=0.1, max=4.0, step=0.01,
-                    tooltip="Mesh scale; 1.0 means unit cube.",
-                ),
-                io.Custom("MOGE_GEOMETRY").Input(
-                    "moge_geometry",
-                    optional=True,
-                    tooltip="If connected, camera_angle_x is recovered from MoGe.",
+                    "camera_angle_x", display_name="fov",
+                    default=11.46, min=1.0, max=170.0, step=0.01, advanced=True,
+                    tooltip="Horizontal FOV in degrees (original default ~11.46° = 0.2 rad). "
+                            "Wire a MoGeGeometryToFOV (axis='horizontal', unit='degrees') "
+                            "output here for a MoGe-derived FOV.",
                ),
                NAFModel.Input(
                    "naf_model",
@ -889,8 +877,7 @@ class Pixal3DConditioning(IO.ComfyNode):
        )

    @classmethod
-    def execute(cls, clip_vision_model, image, mask, camera_angle_x, mesh_scale,
-                moge_geometry=None, naf_model=None) -> IO.NodeOutput:
+    def execute(cls, clip_vision_model, image, mask, camera_angle_x, naf_model=None) -> IO.NodeOutput:
        if image.ndim == 3:
            image = image.unsqueeze(0)
        if mask.ndim == 2:
@ -901,9 +888,6 @@ class Pixal3DConditioning(IO.ComfyNode):
        elif mask.shape[0] != batch_size:
            raise ValueError(f"Pixal3DConditioning mask batch {mask.shape[0]} != image batch {batch_size}")

-        if moge_geometry is not None and "intrinsics" in moge_geometry:
-            camera_angle_x = _fov_from_moge_intrinsics(moge_geometry["intrinsics"])
-
        device = comfy.model_management.intermediate_device()

        cond_512_list, cond_1024_list = [], []
@ -954,11 +938,12 @@ class Pixal3DConditioning(IO.ComfyNode):
        hr_tex_1024   = _naf_hr(fm_1024_dino, composite_list, 1024, (1024, 1024))

        # distance_from_fov: grid_point (-1, 0, 0) projects to pixel (0, image_resolution-1).
-        camera_angle_x = float(camera_angle_x)
-        distance = 0.5 / math.tan(camera_angle_x / 2.0) / float(mesh_scale)
+        # FOV widget is in degrees for UX; trig + downstream projection expect radians.
+        camera_angle_x = math.radians(float(camera_angle_x))
+        distance = 0.5 / math.tan(camera_angle_x / 2.0)
        cam_angle_t = torch.tensor([camera_angle_x] * batch_size, device=device, dtype=torch.float32)
        dist_t = torch.tensor([distance] * batch_size, device=device, dtype=torch.float32)
-        scale_t = torch.tensor([float(mesh_scale)] * batch_size, device=device, dtype=torch.float32)
+        scale_t = torch.ones(batch_size, device=device, dtype=torch.float32)
        T = build_proj_transform_matrix(dist_t, batch_size, device=device, dtype=torch.float32)

        proj_pack = {