mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2026-06-24 00:39:30 +08:00
Cleanup, add MoGeGeometryToFOV
This commit is contained in:
parent
ebd9c6e620
commit
59dc7ac152
@ -1,6 +1,8 @@
|
||||
"""ComfyUI nodes for the native MoGe (Monocular Geometry Estimation) integration."""
|
||||
|
||||
|
||||
import math
|
||||
|
||||
import torch
|
||||
|
||||
import comfy.utils
|
||||
@ -403,10 +405,57 @@ class MoGePointMapToMesh(io.ComfyNode):
|
||||
return io.NodeOutput(mesh)
|
||||
|
||||
|
||||
class MoGeGeometryToFOV(io.ComfyNode):
|
||||
"""Extract horizontal/vertical FOV from MoGe intrinsics, e.g. fov_y to feed SAM3DBody_Predict."""
|
||||
|
||||
@classmethod
|
||||
def define_schema(cls):
|
||||
return io.Schema(
|
||||
node_id="MoGeGeometryToFOV",
|
||||
search_aliases=["moge", "fov", "geometry", "intrinsics", "field of view"],
|
||||
display_name="Get FoV from MoGe Geometry",
|
||||
description="Derive the field of view and focal length from MoGe intrinsics.",
|
||||
category="image/geometry estimation",
|
||||
inputs=[
|
||||
MoGeGeometry.Input("moge_geometry"),
|
||||
io.Combo.Input("axis", options=["vertical", "horizontal", "diagonal"], default="vertical",
|
||||
tooltip="'vertical' (fov_y), 'horizontal' (fov_x), or 'diagonal'."),
|
||||
io.Combo.Input("unit", options=["degrees", "radians"], default="degrees",
|
||||
tooltip="Output unit for the FOV."),
|
||||
],
|
||||
outputs=[
|
||||
io.Float.Output(display_name="fov"),
|
||||
io.Float.Output(display_name="focal_pixels"),
|
||||
],
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def execute(cls, moge_geometry, axis, unit) -> io.NodeOutput:
|
||||
K = moge_geometry.get("intrinsics") if isinstance(moge_geometry, dict) else None
|
||||
if K is None:
|
||||
raise ValueError("moge_geometry has no intrinsics (panorama geometry has none).")
|
||||
if K.ndim == 3:
|
||||
K = K[0]
|
||||
# MoGe normalizes fx by width and fy by height; with cx=cy=0.5 the half-extent
|
||||
# in normalized units is 0.5, so fov = 2*atan(0.5 / f) per axis (hypot for diagonal).
|
||||
hx = 0.5 / float(K[0, 0].item())
|
||||
hy = 0.5 / float(K[1, 1].item())
|
||||
half_tan = {"horizontal": hx, "vertical": hy, "diagonal": math.hypot(hx, hy)}[axis]
|
||||
fov_radians = 2.0 * math.atan(half_tan)
|
||||
fov = fov_radians if unit == "radians" else math.degrees(fov_radians)
|
||||
# Pixels are square here, so fy*H == fx*W is the single lens focal in pixels.
|
||||
src = next((moge_geometry[k] for k in ("image", "points", "depth") if k in moge_geometry), None)
|
||||
if src is None:
|
||||
raise ValueError("moge_geometry has no image/points/depth to read the pixel height from.")
|
||||
H = int(src.shape[1])
|
||||
focal_pixels = float(K[1, 1].item()) * H
|
||||
return io.NodeOutput(fov, focal_pixels)
|
||||
|
||||
|
||||
class MoGeExtension(ComfyExtension):
|
||||
@override
|
||||
async def get_node_list(self) -> list[type[io.ComfyNode]]:
|
||||
return [LoadMoGeModel, MoGeInference, MoGePanoramaInference, MoGeRender, MoGePointMapToMesh]
|
||||
return [LoadMoGeModel, MoGeInference, MoGePanoramaInference, MoGeRender, MoGePointMapToMesh, MoGeGeometryToFOV]
|
||||
|
||||
|
||||
async def comfy_entrypoint() -> MoGeExtension:
|
||||
|
||||
@ -19,7 +19,6 @@ from comfy.ldm.sam3d_body.model.dinov3 import apply_dinov3_qkv_bias_mask
|
||||
from comfy_extras.sam3d_body.utils import (
|
||||
apply_camera_override,
|
||||
cam_int_from_fov,
|
||||
cam_int_from_moge,
|
||||
inputs_from_sam3_track,
|
||||
run_batched_frames,
|
||||
run_batched_single_chunk,
|
||||
@ -42,7 +41,6 @@ SAM3TrackData = io.Custom("SAM3_TRACK_DATA")
|
||||
# KIMODO_POSE_DATA via a MultiType union — those types are mirrored there.
|
||||
MHRPoseData = io.Custom("MHR_POSE_DATA")
|
||||
SAM3DBodyModel = io.Custom("SAM3D_BODY_MODEL")
|
||||
MoGeGeometry = io.Custom("MOGE_GEOMETRY")
|
||||
|
||||
# Loader
|
||||
|
||||
@ -153,18 +151,10 @@ class SAM3DBody_Predict(io.ComfyNode):
|
||||
io.Float.Input(
|
||||
"fov_degrees",
|
||||
default=0.0, min=0.0, max=170.0, step=0.5,
|
||||
tooltip=( #TODO: get FoV from moge another way?
|
||||
"Vertical FOV in degrees. Affects predicted depth (cam_t.z) and "
|
||||
"absolute scale. 0 = use moge_geometry or fall back to ~53° (16:9). "
|
||||
"Any non-zero value overrides moge_geometry."
|
||||
),
|
||||
),
|
||||
MoGeGeometry.Input(
|
||||
"moge_geometry",
|
||||
optional=True,
|
||||
tooltip=(
|
||||
"MoGe geometry, used to calculate camera field of view."
|
||||
"For batches choose the most representative frame, or leave unset"
|
||||
"Vertical FOV in degrees. Affects predicted depth (cam_t.z) and "
|
||||
"absolute scale. 0 = fall back to ~53° (16:9). Feed MoGeGeometryToFOV "
|
||||
"here to derive it from a MoGe estimate."
|
||||
),
|
||||
),
|
||||
io.Int.Input(
|
||||
@ -180,7 +170,7 @@ class SAM3DBody_Predict(io.ComfyNode):
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def execute(cls, sam3d_body_model, image, sam3_track_data=None, bboxes=None, run_hand_refinement=True, fov_degrees=0.0, moge_geometry=None, chunk_size=64) -> io.NodeOutput:
|
||||
def execute(cls, sam3d_body_model, image, sam3_track_data=None, bboxes=None, run_hand_refinement=True, fov_degrees=0.0, chunk_size=64) -> io.NodeOutput:
|
||||
comfy.model_management.load_model_gpu(sam3d_body_model)
|
||||
inner: SAM3DBody = sam3d_body_model.model
|
||||
|
||||
@ -200,10 +190,8 @@ class SAM3DBody_Predict(io.ComfyNode):
|
||||
per_frame_bboxes = [full_frame_bbox.clone() for _ in range(B)]
|
||||
per_frame_masks = None
|
||||
inference_type = "full" if run_hand_refinement else "body"
|
||||
# Precedence: explicit fov_degrees > MoGe estimate > diagonal default.
|
||||
# fov_degrees > 0 sets intrinsics; else None falls back to prepare_batch's diagonal default.
|
||||
cam_int = cam_int_from_fov(int(H), int(W), float(fov_degrees))
|
||||
if cam_int is None:
|
||||
cam_int = cam_int_from_moge(moge_geometry, int(H), int(W))
|
||||
|
||||
frames_rgb: List[Optional[torch.Tensor]] = []
|
||||
for f in range(B):
|
||||
|
||||
@ -480,14 +480,6 @@ class BuildPoseGLB(IO.ComfyNode):
|
||||
),
|
||||
),
|
||||
]),
|
||||
IO.DynamicCombo.Option("sticks", [
|
||||
IO.Combo.Input(
|
||||
"bone_vis_color",
|
||||
options=["white", "rainbow_y"],
|
||||
default="rainbow_y",
|
||||
tooltip="Per-bone vertex colors (see octahedrons).",
|
||||
),
|
||||
]),
|
||||
],
|
||||
tooltip=("Bone vis shape, rigidly skinned to each joint. "),
|
||||
),
|
||||
@ -546,19 +538,11 @@ class BuildPoseGLB(IO.ComfyNode):
|
||||
),
|
||||
),
|
||||
]),
|
||||
IO.DynamicCombo.Option("sticks", [
|
||||
IO.Combo.Input(
|
||||
"bone_vis_color",
|
||||
options=["white", "rainbow_y"],
|
||||
default="rainbow_y",
|
||||
tooltip="Per-bone vertex colors (see octahedrons).",
|
||||
),
|
||||
]),
|
||||
],
|
||||
tooltip=(
|
||||
"Bone vis shape, rigidly skinned to each joint. "
|
||||
"'octahedrons' = Blender-style directional bones (joint → "
|
||||
"primary child); 'sticks' = thin lines."
|
||||
"primary child)."
|
||||
),
|
||||
),
|
||||
]),
|
||||
|
||||
@ -8,7 +8,7 @@ Rebuilds an Armature with the MHR 127-bone rig:
|
||||
- facial expression is re-exposed as 72 morph targets driven by `expr_params`
|
||||
so face animation survives plain glTF skinning.
|
||||
|
||||
Optional bone visualization (octahedrons / sticks) is rigidly
|
||||
Optional bone visualization (octahedrons) is rigidly
|
||||
skinned alongside the body mesh — used to preview the armature in glTF
|
||||
viewers that don't draw bones.
|
||||
|
||||
@ -323,7 +323,7 @@ def build_glb_skeletal(
|
||||
skin_idx = len(skins) - 1
|
||||
|
||||
include_body = bool(include_body_mesh)
|
||||
include_bones = bone_vis in ("octahedrons", "sticks")
|
||||
include_bones = bone_vis == "octahedrons"
|
||||
body_mesh_node_idx: Optional[int] = None
|
||||
|
||||
if include_body:
|
||||
@ -386,13 +386,10 @@ def build_glb_skeletal(
|
||||
if include_bones:
|
||||
bone_palette = _bone_colors_rgb(bind_global_m[:, :3], bone_vis_color)
|
||||
|
||||
# Indexes `bone_palette`: octahedrons/sticks use the bone's child
|
||||
# joint so every bone has its own color regardless of skin target.
|
||||
# 'sticks' = thin octahedrons. glTF LINES skinning is unreliable
|
||||
# (Three.js' GLTFLoader doesn't animate skinned line primitives),
|
||||
# so we render triangle tubes instead.
|
||||
# Indexes `bone_palette`: octahedrons use the bone's child joint so
|
||||
# every bone has its own color regardless of skin target.
|
||||
color_idx_per_vert: Optional[np.ndarray] = None
|
||||
hw = float(bone_vis_radius_m) if bone_vis == "octahedrons" else 0.0035
|
||||
hw = float(bone_vis_radius_m)
|
||||
bv_v, bv_n, bv_f, bv_j, bv_w, child_per_vert = _build_bone_octahedrons_mesh(
|
||||
bind_global_m[:, :3], rig_static["parents"], half_width_m=hw,
|
||||
)
|
||||
|
||||
@ -82,29 +82,6 @@ def cam_int_from_fov(height: int, width: int, fov_degrees: float) -> Optional[to
|
||||
)
|
||||
|
||||
|
||||
def cam_int_from_moge(moge_geometry, height: int, width: int) -> Optional[torch.Tensor]:
|
||||
"""(1,3,3) intrinsic matrix from a MoGe geometry payload. Uses MoGe's
|
||||
vertical focal for both axes; forces principal point to image center
|
||||
(overrides MoGe's predicted cx/cy to match prepare_batch's convention)."""
|
||||
if moge_geometry is None:
|
||||
return None
|
||||
# MOGE_GEOMETRY is a dict with optional keys (see comfy_extras/nodes_moge.py).
|
||||
K_norm = moge_geometry.get("intrinsics") if isinstance(moge_geometry, dict) else None
|
||||
if K_norm is None:
|
||||
return None
|
||||
if K_norm.ndim == 3:
|
||||
K_norm = K_norm[0]
|
||||
# MoGe stores fy in height-units (multiply by H to get pixels); vfov = fy.
|
||||
fy_norm = float(K_norm[1, 1].item())
|
||||
focal = fy_norm * height
|
||||
return torch.tensor(
|
||||
[[[focal, 0.0, width / 2.0],
|
||||
[0.0, focal, height / 2.0],
|
||||
[0.0, 0.0, 1.0]]],
|
||||
dtype=torch.float32,
|
||||
)
|
||||
|
||||
|
||||
def apply_camera_override(mhr_pose_data: Dict[str, Any], camera_info: Dict[str, Any],
|
||||
H: int, W: int, fov_deg: float = 0.0) -> Dict[str, Any]:
|
||||
"""Re-project every frame's pose through a Load3D 6DOF camera (position/
|
||||
|
||||
Loading…
Reference in New Issue
Block a user