diff --git a/comfy_extras/nodes_moge.py b/comfy_extras/nodes_moge.py index 422949531..c680c1e73 100644 --- a/comfy_extras/nodes_moge.py +++ b/comfy_extras/nodes_moge.py @@ -1,6 +1,8 @@ """ComfyUI nodes for the native MoGe (Monocular Geometry Estimation) integration.""" +import math + import torch import comfy.utils @@ -403,10 +405,57 @@ class MoGePointMapToMesh(io.ComfyNode): return io.NodeOutput(mesh) +class MoGeGeometryToFOV(io.ComfyNode): + """Extract horizontal/vertical FOV from MoGe intrinsics, e.g. fov_y to feed SAM3DBody_Predict.""" + + @classmethod + def define_schema(cls): + return io.Schema( + node_id="MoGeGeometryToFOV", + search_aliases=["moge", "fov", "geometry", "intrinsics", "field of view"], + display_name="Get FoV from MoGe Geometry", + description="Derive the field of view and focal length from MoGe intrinsics.", + category="image/geometry estimation", + inputs=[ + MoGeGeometry.Input("moge_geometry"), + io.Combo.Input("axis", options=["vertical", "horizontal", "diagonal"], default="vertical", + tooltip="'vertical' (fov_y), 'horizontal' (fov_x), or 'diagonal'."), + io.Combo.Input("unit", options=["degrees", "radians"], default="degrees", + tooltip="Output unit for the FOV."), + ], + outputs=[ + io.Float.Output(display_name="fov"), + io.Float.Output(display_name="focal_pixels"), + ], + ) + + @classmethod + def execute(cls, moge_geometry, axis, unit) -> io.NodeOutput: + K = moge_geometry.get("intrinsics") if isinstance(moge_geometry, dict) else None + if K is None: + raise ValueError("moge_geometry has no intrinsics (panorama geometry has none).") + if K.ndim == 3: + K = K[0] + # MoGe normalizes fx by width and fy by height; with cx=cy=0.5 the half-extent + # in normalized units is 0.5, so fov = 2*atan(0.5 / f) per axis (hypot for diagonal). + hx = 0.5 / float(K[0, 0].item()) + hy = 0.5 / float(K[1, 1].item()) + half_tan = {"horizontal": hx, "vertical": hy, "diagonal": math.hypot(hx, hy)}[axis] + fov_radians = 2.0 * math.atan(half_tan) + fov = fov_radians if unit == "radians" else math.degrees(fov_radians) + # Pixels are square here, so fy*H == fx*W is the single lens focal in pixels. + src = next((moge_geometry[k] for k in ("image", "points", "depth") if k in moge_geometry), None) + if src is None: + raise ValueError("moge_geometry has no image/points/depth to read the pixel height from.") + H = int(src.shape[1]) + focal_pixels = float(K[1, 1].item()) * H + return io.NodeOutput(fov, focal_pixels) + + class MoGeExtension(ComfyExtension): @override async def get_node_list(self) -> list[type[io.ComfyNode]]: - return [LoadMoGeModel, MoGeInference, MoGePanoramaInference, MoGeRender, MoGePointMapToMesh] + return [LoadMoGeModel, MoGeInference, MoGePanoramaInference, MoGeRender, MoGePointMapToMesh, MoGeGeometryToFOV] async def comfy_entrypoint() -> MoGeExtension: diff --git a/comfy_extras/nodes_sam3d_body.py b/comfy_extras/nodes_sam3d_body.py index 9b57f7399..c779a2f6e 100644 --- a/comfy_extras/nodes_sam3d_body.py +++ b/comfy_extras/nodes_sam3d_body.py @@ -19,7 +19,6 @@ from comfy.ldm.sam3d_body.model.dinov3 import apply_dinov3_qkv_bias_mask from comfy_extras.sam3d_body.utils import ( apply_camera_override, cam_int_from_fov, - cam_int_from_moge, inputs_from_sam3_track, run_batched_frames, run_batched_single_chunk, @@ -42,7 +41,6 @@ SAM3TrackData = io.Custom("SAM3_TRACK_DATA") # KIMODO_POSE_DATA via a MultiType union — those types are mirrored there. MHRPoseData = io.Custom("MHR_POSE_DATA") SAM3DBodyModel = io.Custom("SAM3D_BODY_MODEL") -MoGeGeometry = io.Custom("MOGE_GEOMETRY") # Loader @@ -153,18 +151,10 @@ class SAM3DBody_Predict(io.ComfyNode): io.Float.Input( "fov_degrees", default=0.0, min=0.0, max=170.0, step=0.5, - tooltip=( #TODO: get FoV from moge another way? - "Vertical FOV in degrees. Affects predicted depth (cam_t.z) and " - "absolute scale. 0 = use moge_geometry or fall back to ~53° (16:9). " - "Any non-zero value overrides moge_geometry." - ), - ), - MoGeGeometry.Input( - "moge_geometry", - optional=True, tooltip=( - "MoGe geometry, used to calculate camera field of view." - "For batches choose the most representative frame, or leave unset" + "Vertical FOV in degrees. Affects predicted depth (cam_t.z) and " + "absolute scale. 0 = fall back to ~53° (16:9). Feed MoGeGeometryToFOV " + "here to derive it from a MoGe estimate." ), ), io.Int.Input( @@ -180,7 +170,7 @@ class SAM3DBody_Predict(io.ComfyNode): ) @classmethod - def execute(cls, sam3d_body_model, image, sam3_track_data=None, bboxes=None, run_hand_refinement=True, fov_degrees=0.0, moge_geometry=None, chunk_size=64) -> io.NodeOutput: + def execute(cls, sam3d_body_model, image, sam3_track_data=None, bboxes=None, run_hand_refinement=True, fov_degrees=0.0, chunk_size=64) -> io.NodeOutput: comfy.model_management.load_model_gpu(sam3d_body_model) inner: SAM3DBody = sam3d_body_model.model @@ -200,10 +190,8 @@ class SAM3DBody_Predict(io.ComfyNode): per_frame_bboxes = [full_frame_bbox.clone() for _ in range(B)] per_frame_masks = None inference_type = "full" if run_hand_refinement else "body" - # Precedence: explicit fov_degrees > MoGe estimate > diagonal default. + # fov_degrees > 0 sets intrinsics; else None falls back to prepare_batch's diagonal default. cam_int = cam_int_from_fov(int(H), int(W), float(fov_degrees)) - if cam_int is None: - cam_int = cam_int_from_moge(moge_geometry, int(H), int(W)) frames_rgb: List[Optional[torch.Tensor]] = [] for f in range(B): diff --git a/comfy_extras/nodes_save_3d.py b/comfy_extras/nodes_save_3d.py index 402c049ca..6dba0899c 100644 --- a/comfy_extras/nodes_save_3d.py +++ b/comfy_extras/nodes_save_3d.py @@ -480,14 +480,6 @@ class BuildPoseGLB(IO.ComfyNode): ), ), ]), - IO.DynamicCombo.Option("sticks", [ - IO.Combo.Input( - "bone_vis_color", - options=["white", "rainbow_y"], - default="rainbow_y", - tooltip="Per-bone vertex colors (see octahedrons).", - ), - ]), ], tooltip=("Bone vis shape, rigidly skinned to each joint. "), ), @@ -546,19 +538,11 @@ class BuildPoseGLB(IO.ComfyNode): ), ), ]), - IO.DynamicCombo.Option("sticks", [ - IO.Combo.Input( - "bone_vis_color", - options=["white", "rainbow_y"], - default="rainbow_y", - tooltip="Per-bone vertex colors (see octahedrons).", - ), - ]), ], tooltip=( "Bone vis shape, rigidly skinned to each joint. " "'octahedrons' = Blender-style directional bones (joint → " - "primary child); 'sticks' = thin lines." + "primary child)." ), ), ]), diff --git a/comfy_extras/sam3d_body/export/glb_skeletal.py b/comfy_extras/sam3d_body/export/glb_skeletal.py index aa66b10e9..124ffa80d 100644 --- a/comfy_extras/sam3d_body/export/glb_skeletal.py +++ b/comfy_extras/sam3d_body/export/glb_skeletal.py @@ -8,7 +8,7 @@ Rebuilds an Armature with the MHR 127-bone rig: - facial expression is re-exposed as 72 morph targets driven by `expr_params` so face animation survives plain glTF skinning. -Optional bone visualization (octahedrons / sticks) is rigidly +Optional bone visualization (octahedrons) is rigidly skinned alongside the body mesh — used to preview the armature in glTF viewers that don't draw bones. @@ -323,7 +323,7 @@ def build_glb_skeletal( skin_idx = len(skins) - 1 include_body = bool(include_body_mesh) - include_bones = bone_vis in ("octahedrons", "sticks") + include_bones = bone_vis == "octahedrons" body_mesh_node_idx: Optional[int] = None if include_body: @@ -386,13 +386,10 @@ def build_glb_skeletal( if include_bones: bone_palette = _bone_colors_rgb(bind_global_m[:, :3], bone_vis_color) - # Indexes `bone_palette`: octahedrons/sticks use the bone's child - # joint so every bone has its own color regardless of skin target. - # 'sticks' = thin octahedrons. glTF LINES skinning is unreliable - # (Three.js' GLTFLoader doesn't animate skinned line primitives), - # so we render triangle tubes instead. + # Indexes `bone_palette`: octahedrons use the bone's child joint so + # every bone has its own color regardless of skin target. color_idx_per_vert: Optional[np.ndarray] = None - hw = float(bone_vis_radius_m) if bone_vis == "octahedrons" else 0.0035 + hw = float(bone_vis_radius_m) bv_v, bv_n, bv_f, bv_j, bv_w, child_per_vert = _build_bone_octahedrons_mesh( bind_global_m[:, :3], rig_static["parents"], half_width_m=hw, ) diff --git a/comfy_extras/sam3d_body/utils.py b/comfy_extras/sam3d_body/utils.py index 6d3405418..576f0d340 100644 --- a/comfy_extras/sam3d_body/utils.py +++ b/comfy_extras/sam3d_body/utils.py @@ -82,29 +82,6 @@ def cam_int_from_fov(height: int, width: int, fov_degrees: float) -> Optional[to ) -def cam_int_from_moge(moge_geometry, height: int, width: int) -> Optional[torch.Tensor]: - """(1,3,3) intrinsic matrix from a MoGe geometry payload. Uses MoGe's - vertical focal for both axes; forces principal point to image center - (overrides MoGe's predicted cx/cy to match prepare_batch's convention).""" - if moge_geometry is None: - return None - # MOGE_GEOMETRY is a dict with optional keys (see comfy_extras/nodes_moge.py). - K_norm = moge_geometry.get("intrinsics") if isinstance(moge_geometry, dict) else None - if K_norm is None: - return None - if K_norm.ndim == 3: - K_norm = K_norm[0] - # MoGe stores fy in height-units (multiply by H to get pixels); vfov = fy. - fy_norm = float(K_norm[1, 1].item()) - focal = fy_norm * height - return torch.tensor( - [[[focal, 0.0, width / 2.0], - [0.0, focal, height / 2.0], - [0.0, 0.0, 1.0]]], - dtype=torch.float32, - ) - - def apply_camera_override(mhr_pose_data: Dict[str, Any], camera_info: Dict[str, Any], H: int, W: int, fov_deg: float = 0.0) -> Dict[str, Any]: """Re-project every frame's pose through a Load3D 6DOF camera (position/