Align to Kimodo

This commit is contained in:
kijai 2026-06-16 18:22:08 +03:00
parent ebaf0a71fa
commit d754977178
9 changed files with 296 additions and 147 deletions

View File

@ -34,6 +34,7 @@ from comfy_extras.sam3d_body.utils import image_to_uint8
SAM3TrackData = io.Custom("SAM3_TRACK_DATA") SAM3TrackData = io.Custom("SAM3_TRACK_DATA")
MHRPoseData = io.Custom("MHR_POSE_DATA") # mhr_model_params, shape_params, expr_params, MHR70 keypoint layout, canonical_colors keyed to MHR mesh, hand_vert_mask from MHR LBS). MHRPoseData = io.Custom("MHR_POSE_DATA") # mhr_model_params, shape_params, expr_params, MHR70 keypoint layout, canonical_colors keyed to MHR mesh, hand_vert_mask from MHR LBS).
KimodoPoseData = io.Custom("KIMODO_POSE_DATA") # external Y-up rig (ComfyUI-Kimodo); carries per-frame pred_vertices/pred_cam_t/canonical_colors so the mesh rasterizer is rig-agnostic.
SAM3DBodyModel = io.Custom("SAM3D_BODY_MODEL") SAM3DBodyModel = io.Custom("SAM3D_BODY_MODEL")
# Loader # Loader
@ -827,10 +828,18 @@ class SAM3DBody_Render(io.ComfyNode):
def define_schema(cls): def define_schema(cls):
return io.Schema( return io.Schema(
node_id="SAM3DBody_Render", node_id="SAM3DBody_Render",
display_name="Render SAM3D Body", display_name="Render 3D Body Pose",
search_aliases=["Render SAM3D Body", "sam3d render", "kimodo render"],
category="image/detection", category="image/detection",
inputs=[ inputs=[
MHRPoseData.Input("mhr_pose_data"), io.MultiType.Input(
"pose_data", types=[MHRPoseData, KimodoPoseData],
tooltip=(
"MHR pose data, or external Y-up rig pose data (KimodoSample). "
"All render styles work for external rigs that carry OpenPose "
"joint maps in their _skeleton_override (KimodoSample does)."
),
),
io.Image.Input( io.Image.Input(
"background", "background",
optional=True, optional=True,
@ -882,11 +891,11 @@ class SAM3DBody_Render(io.ComfyNode):
@classmethod @classmethod
def execute(cls, mhr_pose_data, background=None, width=0, height=0, camera_info=None, render_style=None) -> io.NodeOutput: def execute(cls, pose_data, background=None, width=0, height=0, camera_info=None, render_style=None) -> io.NodeOutput:
render_style = render_style or {"render_style": "mesh"} render_style = render_style or {"render_style": "mesh"}
mode_key = render_style.get("render_style", "mesh") mode_key = render_style.get("render_style", "mesh")
native_H, native_W = mhr_pose_data["image_size"] native_H, native_W = pose_data["image_size"]
new_W, new_H = int(width), int(height) new_W, new_H = int(width), int(height)
if new_W == 0 and new_H == 0: if new_W == 0 and new_H == 0:
H, W = native_H, native_W H, W = native_H, native_W
@ -896,14 +905,14 @@ class SAM3DBody_Render(io.ComfyNode):
new_W = max(1, round(native_W * new_H / native_H)) new_W = max(1, round(native_W * new_H / native_H))
elif new_H == 0: elif new_H == 0:
new_H = max(1, round(native_H * new_W / native_W)) new_H = max(1, round(native_H * new_W / native_W))
mhr_pose_data = _scale_pose_data(mhr_pose_data, new_H, new_W) pose_data = _scale_pose_data(pose_data, new_H, new_W)
H, W = new_H, new_W H, W = new_H, new_W
px_scale = min(new_W / native_W, new_H / native_H) px_scale = min(new_W / native_W, new_H / native_H)
if camera_info is not None: if camera_info is not None:
mhr_pose_data = apply_camera_override(mhr_pose_data, camera_info, H, W) pose_data = apply_camera_override(pose_data, camera_info, H, W)
B = len(mhr_pose_data["frames"]) B = len(pose_data["frames"])
if B == 0: if B == 0:
return io.NodeOutput(torch.zeros(1, H, W, 3, dtype=torch.float32)) return io.NodeOutput(torch.zeros(1, H, W, 3, dtype=torch.float32))
@ -951,11 +960,11 @@ class SAM3DBody_Render(io.ComfyNode):
region = str(render_style.get("region", "full_body")) region = str(render_style.get("region", "full_body"))
if region == "hands_only": if region == "hands_only":
hand_mask = mhr_pose_data["hand_vert_mask"] hand_mask = pose_data["hand_vert_mask"]
faces_full = np.asarray(mhr_pose_data["faces"]) faces_full = np.asarray(pose_data["faces"])
keep = hand_mask[faces_full].all(axis=1) keep = hand_mask[faces_full].all(axis=1)
mhr_pose_data = dict(mhr_pose_data) pose_data = dict(pose_data)
mhr_pose_data["faces"] = np.ascontiguousarray( pose_data["faces"] = np.ascontiguousarray(
faces_full[keep], dtype=faces_full.dtype, faces_full[keep], dtype=faces_full.dtype,
) )
else: # silhouette — no shader/opacity controls, mask is binary else: # silhouette — no shader/opacity controls, mask is binary
@ -980,7 +989,7 @@ class SAM3DBody_Render(io.ComfyNode):
bg_f = bg_t[min(f, bg_t.shape[0] - 1)] bg_f = bg_t[min(f, bg_t.shape[0] - 1)]
if mode_key == "openpose_2d": if mode_key == "openpose_2d":
img = render_pose_data_openpose( img = render_pose_data_openpose(
mhr_pose_data, frame_idx=f, W=W, H=H, pose_data, frame_idx=f, W=W, H=H,
background=bg_f, background=bg_f,
composite=composite, composite=composite,
marker_radius_px=marker_radius_px, marker_radius_px=marker_radius_px,
@ -993,7 +1002,7 @@ class SAM3DBody_Render(io.ComfyNode):
) )
elif mode_key == "openpose_3d": elif mode_key == "openpose_3d":
img = render_pose_data_capsules( img = render_pose_data_capsules(
mhr_pose_data, frame_idx=f, W=W, H=H, pose_data, frame_idx=f, W=W, H=H,
background=bg_f, background=bg_f,
composite=composite, composite=composite,
radius_m=op3d_radius_m, radius_m=op3d_radius_m,
@ -1005,7 +1014,7 @@ class SAM3DBody_Render(io.ComfyNode):
elif mode_key == "scail": elif mode_key == "scail":
# SCAIL renders body as 3D capsules + 2D openpose hands on top # SCAIL renders body as 3D capsules + 2D openpose hands on top
img = render_pose_data_capsules( img = render_pose_data_capsules(
mhr_pose_data, frame_idx=f, W=W, H=H, pose_data, frame_idx=f, W=W, H=H,
background=bg_f, background=bg_f,
composite=composite, composite=composite,
radius_m=cap_radius_m, radius_m=cap_radius_m,
@ -1017,7 +1026,7 @@ class SAM3DBody_Render(io.ComfyNode):
scail_overlay_px = max(1, int(round(4 * px_scale))) scail_overlay_px = max(1, int(round(4 * px_scale)))
scail_face_px = max(1, int(round(1 * px_scale))) scail_face_px = max(1, int(round(1 * px_scale)))
img = render_pose_data_openpose( img = render_pose_data_openpose(
mhr_pose_data, frame_idx=f, W=W, H=H, pose_data, frame_idx=f, W=W, H=H,
background=img, background=img,
composite="over", composite="over",
include_body=False, include_body=False,
@ -1031,7 +1040,7 @@ class SAM3DBody_Render(io.ComfyNode):
) )
else: else:
img = render_pose_data( img = render_pose_data(
mhr_pose_data, frame_idx=f, W=W, H=H, pose_data, frame_idx=f, W=W, H=H,
background=bg_f, composite=composite, opacity=opacity, background=bg_f, composite=composite, opacity=opacity,
shader_preset=shader_key, shader_preset=shader_key,
rainbow_tilt_x_deg=rainbow_tilt_x, rainbow_tilt_x_deg=rainbow_tilt_x,

View File

@ -684,8 +684,13 @@ class BuildPoseFile(IO.ComfyNode):
fmt = format.get("format", "glb") fmt = format.get("format", "glb")
if fmt == "bvh": if fmt == "bvh":
if sam3d_body_model is None: # External rigs (e.g. Kimodo) supply pose_data["_skeleton_override"]
raise ValueError("Create 3D Animation: 'bvh' format needs the `sam3d_body_model` input.") has_external_rig = isinstance(pose_data, dict) and ("_skeleton_override" in pose_data)
if sam3d_body_model is None and not has_external_rig:
raise ValueError(
"Create 3D Animation: 'bvh' format needs the `sam3d_body_model` input OR a "
"`_skeleton_override` dict in pose_data (e.g. from KimodoSample)."
)
# BVH carries one skeleton; -1 (all tracks) collapses to the first. # BVH carries one skeleton; -1 (all tracks) collapses to the first.
ti = int(track_index) ti = int(track_index)
if ti < 0: if ti < 0:

View File

@ -16,10 +16,9 @@ from typing import Any, Dict, List
import numpy as np import numpy as np
from .glb_shared import ( from .glb_shared import (
bind_skel_state, Rig,
bone_locals_from_globals, bone_locals_from_globals,
collect_tracks, collect_tracks,
extract_rig_static,
global_skel_state_from_pose_data, global_skel_state_from_pose_data,
quat_sign_fix_per_joint, quat_sign_fix_per_joint,
unflip, unflip,
@ -49,9 +48,14 @@ def _quat_to_zxy_euler_deg(quat: np.ndarray) -> np.ndarray:
return out.astype(np.float32) return out.astype(np.float32)
def _find_bvh_root(parents: np.ndarray) -> int: def _find_bvh_root(parents: np.ndarray, is_external: bool = False) -> int:
"""First child of the rig's world anchor so the static origin→body stick """First child of the rig's world anchor so the static origin→body stick
bone gets left out. Falls back to the first root joint.""" bone gets left out. Falls back to the first root joint.
MHR's joint 0 is a static world anchor whose single child is the pelvis, so
skipping it is correct. External rigs (e.g. SOMA-77) whose root is already
the articulated body root with multiple child chains must keep the root
descending into one child would drop the sibling limbs from the BVH."""
NJ = parents.shape[0] NJ = parents.shape[0]
world_anchors = [j for j in range(NJ) world_anchors = [j for j in range(NJ)
if not (0 <= int(parents[j]) < NJ and int(parents[j]) != j)] if not (0 <= int(parents[j]) < NJ and int(parents[j]) != j)]
@ -64,6 +68,8 @@ def _find_bvh_root(parents: np.ndarray) -> int:
children[p].append(j) children[p].append(j)
wa = world_anchors[0] wa = world_anchors[0]
if children[wa]: if children[wa]:
if is_external and len(children[wa]) > 1:
return wa
return children[wa][0] return children[wa][0]
return wa return wa
@ -80,7 +86,7 @@ def _build_children_map(parents: np.ndarray) -> List[List[int]]:
def build_bvh( def build_bvh(
pose_data: Dict[str, Any], pose_data: Dict[str, Any],
model: Any, model: Any = None,
*, *,
fps: float = 24.0, fps: float = 24.0,
camera_translation: str = "off", camera_translation: str = "off",
@ -89,6 +95,10 @@ def build_bvh(
) -> bytes: ) -> bytes:
"""Build a BVH file from pose_data. Returns UTF-8 encoded text bytes. """Build a BVH file from pose_data. Returns UTF-8 encoded text bytes.
`model` may be None when pose_data carries a `_skeleton_override` (external
rigs, e.g. Kimodo); the rig hierarchy/offsets/bind are read from the
override instead of the MHR model.
`units` is "cm" (default, standard mocap convention) or "m". Affects the `units` is "cm" (default, standard mocap convention) or "m". Affects the
OFFSET and root-position values; rotations are independent of units. OFFSET and root-position values; rotations are independent of units.
""" """
@ -96,9 +106,10 @@ def build_bvh(
raise ValueError(f"build_bvh: units must be 'cm' or 'm', got {units!r}") raise ValueError(f"build_bvh: units must be 'cm' or 'm', got {units!r}")
unit_scale = 100.0 if units == "cm" else 1.0 unit_scale = 100.0 if units == "cm" else 1.0
rig_static = extract_rig_static(model) rig = Rig.from_pose_data(pose_data, model)
NJ = int(rig_static["num_joints"]) is_external = not rig.can_rerun_fk
parents = rig_static["parents"] NJ = rig.num_joints
parents = rig.parents
frames = pose_data["frames"] frames = pose_data["frames"]
tracks = collect_tracks(pose_data, track_index) tracks = collect_tracks(pose_data, track_index)
@ -109,16 +120,16 @@ def build_bvh(
if n_frames == 0: if n_frames == 0:
raise ValueError("build_bvh: track has zero frames") raise ValueError("build_bvh: track has zero frames")
body_root = _find_bvh_root(parents) body_root = _find_bvh_root(parents, is_external)
children_map = _build_children_map(parents) children_map = _build_children_map(parents)
# Bone OFFSETs come from MHR's translation_offsets (joint position # Bone OFFSETs come from MHR's translation_offsets (joint position
# relative to parent in parent's local-bind frame). For the BVH root, # relative to parent in parent's local-bind frame). For the BVH root,
# we use its bind world position so the skeleton sits at the right # we use its bind world position so the skeleton sits at the right
# spot when imported. # spot when imported.
bind_global = bind_skel_state(model) # (NJ, 8) cm bind_global = rig.bind_global_cm # (NJ, 8) cm
bind_pos_m = bind_global[:, :3].astype(np.float64) * 0.01 # (NJ, 3) m bind_pos_m = bind_global[:, :3].astype(np.float64) * 0.01 # (NJ, 3) m
offset_m = rig_static["joint_translation_offsets"].astype(np.float64) * 0.01 offset_m = rig.joint_offsets_cm.astype(np.float64) * 0.01
# DFS order rooted at body_root — matches per-frame channel order. # DFS order rooted at body_root — matches per-frame channel order.
bvh_order: List[int] = [] bvh_order: List[int] = []
@ -133,6 +144,7 @@ def build_bvh(
# treated as the hierarchy root in BVH-space. # treated as the hierarchy root in BVH-space.
rig_global_m = global_skel_state_from_pose_data( rig_global_m = global_skel_state_from_pose_data(
pose_data, frame_indices, person_k, NJ, pose_data, frame_indices, person_k, NJ,
joint_coords_y_down=rig.per_frame_y_down,
) )
rig_global_m[..., 3:7] = quat_sign_fix_per_joint(rig_global_m[..., 3:7]) rig_global_m[..., 3:7] = quat_sign_fix_per_joint(rig_global_m[..., 3:7])
bvh_parents = parents.copy() bvh_parents = parents.copy()

View File

@ -18,13 +18,11 @@ import comfy.model_management
from .glb_shared import ( from .glb_shared import (
OPENPOSE_18_PAIRS, OPENPOSE_18_PAIRS,
OPENPOSE18_TO_MHR70,
OPENPOSE_RAINBOW_18, OPENPOSE_RAINBOW_18,
SCAIL_LIMB_COLORS_17, SCAIL_LIMB_COLORS_17,
OPENPOSE_HAND_PAIRS, OPENPOSE_HAND_PAIRS,
OPENPOSE_HAND21_TO_MHR70_R,
OPENPOSE_HAND21_TO_MHR70_L,
OPENPOSE_HAND_COLORS_21, OPENPOSE_HAND_COLORS_21,
openpose_render_keypoints,
) )
@ -37,6 +35,7 @@ def _limb_palette_rgb01(palette: str) -> np.ndarray:
def _build_specs_from_pose( def _build_specs_from_pose(
persons: List[Dict[str, Any]], persons: List[Dict[str, Any]],
pose_data: Dict[str, Any],
*, *,
include_hands: bool, include_hands: bool,
palette: str, palette: str,
@ -61,17 +60,14 @@ def _build_specs_from_pose(
falloff = max(0.0, min(1.0, float(person_brightness_falloff))) falloff = max(0.0, min(1.0, float(person_brightness_falloff)))
for k, person in enumerate(persons): for k, person in enumerate(persons):
kp2d_full = person.get("pred_keypoints_3d")
cam_t = person.get("pred_cam_t") cam_t = person.get("pred_cam_t")
if kp2d_full is None or cam_t is None: body_op = openpose_render_keypoints(person, pose_data, "body", dim=3)
continue if body_op is None or cam_t is None:
kp = np.asarray(kp2d_full, dtype=np.float32)
if kp.ndim != 2 or kp.shape[1] != 3 or kp.shape[0] < 70:
continue continue
cam_t_np = np.asarray(cam_t, dtype=np.float32).reshape(3) cam_t_np = np.asarray(cam_t, dtype=np.float32).reshape(3)
# pred_keypoints_3d is camera frame (Y-down post-flip); add cam_t to # op-keypoints are camera frame (Y-down); add cam_t to place the
# place the subject in front of the camera. # subject in front of the camera.
kp_cam = kp + cam_t_np[None, :] body_kp = body_op + cam_t_np[None, :]
pastel = 0.0 if k == 0 else (1.0 - falloff ** k) pastel = 0.0 if k == 0 else (1.0 - falloff ** k)
@ -83,7 +79,6 @@ def _build_specs_from_pose(
# SCAIL skips face bones (13..16) and redirects limb 12 into a short # SCAIL skips face bones (13..16) and redirects limb 12 into a short
# head stub blending spine + neck→nose direction. # head stub blending spine + neck→nose direction.
body_limb_count = 13 if palette == "scail" else len(OPENPOSE_18_PAIRS) body_limb_count = 13 if palette == "scail" else len(OPENPOSE_18_PAIRS)
body_kp = kp_cam[OPENPOSE18_TO_MHR70]
spine_dir = None spine_dir = None
if palette == "scail": if palette == "scail":
mid_hip = 0.5 * (body_kp[8] + body_kp[11]) # 8=RHip, 11=LHip mid_hip = 0.5 * (body_kp[8] + body_kp[11]) # 8=RHip, 11=LHip
@ -117,10 +112,11 @@ def _build_specs_from_pose(
dtype=np.float32)) dtype=np.float32))
if include_hands: if include_hands:
r_kp = kp_cam[OPENPOSE_HAND21_TO_MHR70_R] hand_ops = [openpose_render_keypoints(person, pose_data, p, dim=3)
l_kp = kp_cam[OPENPOSE_HAND21_TO_MHR70_L] for p in ("hand_r", "hand_l")]
hand_kps = [h + cam_t_np[None, :] for h in hand_ops if h is not None]
for limb_i, (a, b) in enumerate(OPENPOSE_HAND_PAIRS): for limb_i, (a, b) in enumerate(OPENPOSE_HAND_PAIRS):
for hand_kp in (r_kp, l_kp): for hand_kp in hand_kps:
sa, sb = hand_kp[a], hand_kp[b] sa, sb = hand_kp[a], hand_kp[b]
if not (np.all(np.isfinite(sa)) and np.all(np.isfinite(sb))): if not (np.all(np.isfinite(sa)) and np.all(np.isfinite(sb))):
continue continue
@ -380,7 +376,7 @@ def render_pose_data_capsules(
cx, cy = W * 0.5, H * 0.5 cx, cy = W * 0.5, H * 0.5
starts_np, ends_np, colors_np, is_hand_np = _build_specs_from_pose( starts_np, ends_np, colors_np, is_hand_np = _build_specs_from_pose(
persons, include_hands=include_hands, palette=palette, persons, pose_data, include_hands=include_hands, palette=palette,
person_brightness_falloff=person_brightness_falloff, person_brightness_falloff=person_brightness_falloff,
) )

View File

@ -40,6 +40,7 @@ from .glb_shared import (
gaussian_smooth_positions, gaussian_smooth_positions,
make_lit_material, make_lit_material,
quat_sign_fix_per_joint, quat_sign_fix_per_joint,
resolve_openpose_keypoints_from_joints,
rotation_align, rotation_align,
rotmat_to_quat_np, rotmat_to_quat_np,
select_face_landmark_vert_ids, select_face_landmark_vert_ids,
@ -109,7 +110,7 @@ def _openpose_bind_at_rig_rest(
rest_pos = np.asarray(override["bind_global_m"], dtype=np.float32)[:, :3] rest_pos = np.asarray(override["bind_global_m"], dtype=np.float32)[:, :3]
op18_w = override.get("openpose18_joint_weights") op18_w = override.get("openpose18_joint_weights")
parts: List[np.ndarray] = [ parts: List[np.ndarray] = [
_resolve_openpose_keypoints_from_joints( resolve_openpose_keypoints_from_joints(
rest_pos, np.asarray(op18, dtype=np.int64), rest_pos, np.asarray(op18, dtype=np.int64),
weights=None if op18_w is None else np.asarray(op18_w, dtype=np.float32), weights=None if op18_w is None else np.asarray(op18_w, dtype=np.float32),
) )
@ -121,11 +122,11 @@ def _openpose_bind_at_rig_rest(
return None return None
op21_r_w = override.get("openpose_hand21_r_joint_weights") op21_r_w = override.get("openpose_hand21_r_joint_weights")
op21_l_w = override.get("openpose_hand21_l_joint_weights") op21_l_w = override.get("openpose_hand21_l_joint_weights")
parts.append(_resolve_openpose_keypoints_from_joints( parts.append(resolve_openpose_keypoints_from_joints(
rest_pos, np.asarray(op21_r, dtype=np.int64), rest_pos, np.asarray(op21_r, dtype=np.int64),
weights=None if op21_r_w is None else np.asarray(op21_r_w, dtype=np.float32), weights=None if op21_r_w is None else np.asarray(op21_r_w, dtype=np.float32),
)) ))
parts.append(_resolve_openpose_keypoints_from_joints( parts.append(resolve_openpose_keypoints_from_joints(
rest_pos, np.asarray(op21_l, dtype=np.int64), rest_pos, np.asarray(op21_l, dtype=np.int64),
weights=None if op21_l_w is None else np.asarray(op21_l_w, dtype=np.float32), weights=None if op21_l_w is None else np.asarray(op21_l_w, dtype=np.float32),
)) ))
@ -137,39 +138,6 @@ def _openpose_bind_at_rig_rest(
return np.concatenate(parts, axis=0).astype(np.float32) return np.concatenate(parts, axis=0).astype(np.float32)
def _resolve_openpose_keypoints_from_joints(
joints: np.ndarray, mapping: np.ndarray,
weights: Optional[np.ndarray] = None,
) -> np.ndarray:
"""Resolve a `(K, 2)` joint-index → keypoint mapping against a per-frame
`(J, 3)` joint-position array.
Row `(a, b)` with `b == -1` uses `joints[a]` directly (any weight ignored).
Row `(a, b)` with `b >= 0` returns `w * joints[a] + (1 - w) * joints[b]`:
- default (weights=None): `w = 0.5` plain midpoint, useful for
keypoints that genuinely lie between two joints (Nose midpoint of
eyes).
- explicit `w` outside [0, 1] EXTRAPOLATES past the line segment, which
is how we approximate landmarks that have no rig joint AND no
in-between joint pair (Ears RightEye + (RightEye LeftEye), i.e.
`w_a = 2.0` along the eyeear axis)."""
a = mapping[:, 0].astype(np.int64)
b = mapping[:, 1].astype(np.int64)
pos_a = joints[a]
has_b = b >= 0
if not has_b.any():
return pos_a.astype(np.float32, copy=False)
b_safe = np.where(has_b, b, a)
pos_b = joints[b_safe]
if weights is None:
w_a = np.where(has_b, 0.5, 1.0).astype(np.float32)
else:
w_a = np.where(has_b, np.asarray(weights, dtype=np.float32), 1.0)
w_b = (1.0 - w_a).astype(np.float32)
out = pos_a * w_a[:, None] + pos_b * w_b[:, None]
return out.astype(np.float32, copy=False)
def _extract_openpose_keypoints( def _extract_openpose_keypoints(
pose_data: Dict[str, Any], frame_indices: List[int], person_k: int, pose_data: Dict[str, Any], frame_indices: List[int], person_k: int,
) -> np.ndarray: ) -> np.ndarray:
@ -219,7 +187,7 @@ def _extract_openpose_keypoints(
f"missing at frame={t}, track={person_k}." f"missing at frame={t}, track={person_k}."
) )
joints = np.asarray(person["pred_joint_coords"], dtype=np.float32) joints = np.asarray(person["pred_joint_coords"], dtype=np.float32)
out[t_idx] = _resolve_openpose_keypoints_from_joints( out[t_idx] = resolve_openpose_keypoints_from_joints(
joints, op18, weights=op18_w, joints, op18, weights=op18_w,
) )
return out return out
@ -306,10 +274,10 @@ def _extract_openpose_hand_keypoints(
"per-frame `pred_joint_coords` for hands." "per-frame `pred_joint_coords` for hands."
) )
joints = np.asarray(person["pred_joint_coords"], dtype=np.float32) joints = np.asarray(person["pred_joint_coords"], dtype=np.float32)
out[t_idx, :21] = _resolve_openpose_keypoints_from_joints( out[t_idx, :21] = resolve_openpose_keypoints_from_joints(
joints, op21_r, weights=op21_r_w, joints, op21_r, weights=op21_r_w,
) )
out[t_idx, 21:] = _resolve_openpose_keypoints_from_joints( out[t_idx, 21:] = resolve_openpose_keypoints_from_joints(
joints, op21_l, weights=op21_l_w, joints, op21_l, weights=op21_l_w,
) )
return out return out

View File

@ -16,6 +16,7 @@ from __future__ import annotations
import json import json
import struct import struct
from dataclasses import dataclass, field
from typing import Any, Dict, List, Optional, Tuple from typing import Any, Dict, List, Optional, Tuple
import numpy as np import numpy as np
@ -505,9 +506,15 @@ def extract_rig_static(model: Any, pose_data: Optional[Dict[str, Any]] = None) -
# so we don't need MHR's PCA pose / expression bases. # so we don't need MHR's PCA pose / expression bases.
parents = np.asarray(override["parents"], dtype=np.int32) parents = np.asarray(override["parents"], dtype=np.int32)
rest_v = np.asarray(override["rest_verts_m"], dtype=np.float32) rest_v = np.asarray(override["rest_verts_m"], dtype=np.float32)
# BVH needs parent-relative bone OFFSETs (cm). MHR ships these directly;
# external rigs only give bind globals, so derive locals from them.
bind_global_m = np.asarray(override["bind_global_m"], dtype=np.float32)
local_bind = bone_locals_from_globals(bind_global_m[None], parents)[0]
joint_translation_offsets = (local_bind[:, :3] * 100.0).astype(np.float32)
return { return {
"parents": parents, "parents": parents,
"parents_pmi": parents, "parents_pmi": parents,
"joint_translation_offsets": joint_translation_offsets, # (NJ, 3) cm
"lbs_compact_joints": np.asarray(override["lbs_compact_joints"], dtype=np.uint16), "lbs_compact_joints": np.asarray(override["lbs_compact_joints"], dtype=np.uint16),
"lbs_compact_weights": np.asarray(override["lbs_compact_weights"], dtype=np.float32), "lbs_compact_weights": np.asarray(override["lbs_compact_weights"], dtype=np.float32),
"lbs_compact_max_inf": int(override.get("lbs_compact_max_inf", 4)), "lbs_compact_max_inf": int(override.get("lbs_compact_max_inf", 4)),
@ -737,6 +744,77 @@ def bind_skel_state(model: Any, pose_data: Optional[Dict[str, Any]] = None) -> n
return global_skel_state_per_frame(model, zero_mp)[0] return global_skel_state_per_frame(model, zero_mp)[0]
@dataclass
class Rig:
"""Normalized static rig for the GLB/BVH exporters, independent of where it
came from: an MHR model (`Rig.from_pose_data(pose_data, model)`) or an inline
`pose_data["_skeleton_override"]` (external rigs, e.g. ComfyUI-Kimodo).
Consumers read these fields and never branch on the source. The only
source-dependent operation is `rest_verts_m` MHR rest verts depend on the
subject's `shape_params`; external rigs ship fixed rest verts.
"""
parents: np.ndarray # (NJ,) int32, -1 = root
joint_offsets_cm: np.ndarray # (NJ, 3) parent-relative bind offsets, cm
bind_global_cm: np.ndarray # (NJ, 8) bind global [t cm | q xyzw | s]
lbs_joints: np.ndarray # (V, 8) uint16 — compacted skin influences
lbs_weights: np.ndarray # (V, 8) f32
lbs_max_inf: int # ≤ 8; lets callers skip JOINTS_1 when ≤ 4
faces: np.ndarray # (F, 3) uint32
num_joints: int
num_verts: int
num_expr: int # 0 = no face morphs
per_frame_y_down: bool # pred_joint_coords stored y-down (MHR) vs y-up (external)
can_rerun_fk: bool # True = per-frame FK re-runnable from mhr_model_params
expr_basis: Optional[np.ndarray] = None # (E, V, 3) cm — MHR only
_model: Any = None
_rest_override: Optional[np.ndarray] = None # (V, 3) m — external only
@property
def bind_global_m(self) -> np.ndarray:
b = self.bind_global_cm.astype(np.float32).copy()
b[:, :3] *= 0.01
return b
def rest_verts_m(self, shape_params: np.ndarray) -> np.ndarray:
"""Zero-pose rest verts (V, 3) in rig-native Y-up metres."""
if self._rest_override is not None:
return self._rest_override
return zero_pose_rest_verts(self._model, shape_params)
@classmethod
def from_pose_data(cls, pose_data: Optional[Dict[str, Any]], model: Any = None) -> "Rig":
rs = extract_rig_static(model, pose_data)
external = bool(rs.get("_external", False))
if external:
joints8 = np.asarray(rs["lbs_compact_joints"], dtype=np.uint16)
weights8 = np.asarray(rs["lbs_compact_weights"], dtype=np.float32)
max_inf = int(rs["lbs_compact_max_inf"])
override = _get_skeleton_override(pose_data) or {}
per_y_down = bool(override.get("per_frame_y_down", False))
rest_override = np.asarray(override["rest_verts_m"], dtype=np.float32)
expr_basis = None
else:
joints8, weights8, max_inf = compact_skin_to_n(
rs["lbs_skin_indices"], rs["lbs_vert_indices"],
rs["lbs_skin_weights"], int(rs["num_verts"]), max_inf=8,
)
per_y_down = True
rest_override = None
expr_basis = rs["expr_basis"] if int(rs["num_expr"]) > 0 else None
return cls(
parents=np.asarray(rs["parents"], dtype=np.int32),
joint_offsets_cm=np.asarray(rs["joint_translation_offsets"], dtype=np.float32),
bind_global_cm=np.asarray(bind_skel_state(model, pose_data), dtype=np.float32),
lbs_joints=joints8, lbs_weights=weights8, lbs_max_inf=max_inf,
faces=np.asarray(rs["faces"], dtype=np.uint32),
num_joints=int(rs["num_joints"]), num_verts=int(rs["num_verts"]),
num_expr=int(rs["num_expr"]),
per_frame_y_down=per_y_down, can_rerun_fk=not external,
expr_basis=expr_basis, _model=model, _rest_override=rest_override,
)
def ibp_from_bind_global(bind_skel_state_m: np.ndarray) -> np.ndarray: def ibp_from_bind_global(bind_skel_state_m: np.ndarray) -> np.ndarray:
"""Inverse-bind MAT4 by inverting the rig's bind global (meters). Guarantees """Inverse-bind MAT4 by inverting the rig's bind global (meters). Guarantees
IBP[j] = inverse(FK over bind local TRS) exactly what glTF skinning IBP[j] = inverse(FK over bind local TRS) exactly what glTF skinning
@ -1069,6 +1147,87 @@ DWPOSE_HAND_COLORS_21 = np.tile(
) )
def resolve_openpose_keypoints_from_joints(
joints: np.ndarray, mapping: np.ndarray, weights: Optional[np.ndarray] = None,
) -> np.ndarray:
"""(K, 2) joint-index map resolved against (J, D) joint positions -> (K, D).
Row (a, b): b == -1 uses joints[a]; b >= 0 returns w*joints[a]+(1-w)*joints[b]
(w defaults 0.5 = midpoint; w outside [0, 1] extrapolates past the segment)."""
a = mapping[:, 0].astype(np.int64)
b = mapping[:, 1].astype(np.int64)
pos_a = joints[a]
has_b = b >= 0
if not has_b.any():
return pos_a.astype(np.float32, copy=False)
b_safe = np.where(has_b, b, a)
pos_b = joints[b_safe]
if weights is None:
w_a = np.where(has_b, 0.5, 1.0).astype(np.float32)
else:
w_a = np.where(has_b, np.asarray(weights, dtype=np.float32), 1.0)
w_b = (1.0 - w_a).astype(np.float32)
out = pos_a * w_a[:, None] + pos_b * w_b[:, None]
return out.astype(np.float32, copy=False)
# part -> (override map key, override weight key, MHR70 reindex map)
_OPENPOSE_RENDER_MAPS = {
"body": ("openpose18_joint_indices", "openpose18_joint_weights", OPENPOSE18_TO_MHR70),
"hand_r": ("openpose_hand21_r_joint_indices", "openpose_hand21_r_joint_weights", OPENPOSE_HAND21_TO_MHR70_R),
"hand_l": ("openpose_hand21_l_joint_indices", "openpose_hand21_l_joint_weights", OPENPOSE_HAND21_TO_MHR70_L),
}
def openpose_render_keypoints(
person: Dict[str, Any], pose_data: Optional[Dict[str, Any]], part: str,
*, dim: int, H: int = 0, W: int = 0,
) -> Optional[np.ndarray]:
"""OpenPose keypoints for one person, in op-layout, CAMERA frame (Y-down).
`part` in {'body','hand_r','hand_l'}. dim=3 -> (K, 3) metres pre-cam_t-add;
dim=2 -> (K, 2) image pixels. Returns None when the source data is missing.
External rigs (override carries the joint-index map) resolve from per-frame
`pred_joint_coords` (rig-native Y-up -> flipped to camera Y-down, matching
the pred_vertices convention). MHR reindexes the stored
`pred_keypoints_{3d,2d}` via the MHR70 map."""
map_key, w_key, mhr_map = _OPENPOSE_RENDER_MAPS[part]
override = _get_skeleton_override(pose_data)
ext_map = override.get(map_key) if override is not None else None
if ext_map is not None:
joints = person.get("pred_joint_coords")
if joints is None:
return None
w = override.get(w_key)
kp3d = resolve_openpose_keypoints_from_joints(
np.asarray(joints, dtype=np.float32),
np.asarray(ext_map, dtype=np.int64),
None if w is None else np.asarray(w, dtype=np.float32),
).copy()
kp3d[:, 1] *= -1.0 # rig-native Y-up -> camera Y-down
kp3d[:, 2] *= -1.0
if dim == 3:
return kp3d
cam_t = person.get("pred_cam_t")
focal = person.get("focal_length")
if cam_t is None or focal is None:
return None
pts3 = kp3d + np.asarray(cam_t, dtype=np.float32).reshape(1, 3)
z = np.maximum(pts3[:, 2:3], 1e-6)
f = float(np.asarray(focal, dtype=np.float32).reshape(-1)[0])
xy = pts3[:, :2] * f + np.array([W * 0.5, H * 0.5], dtype=np.float32)[None, :] * z
return (xy / z).astype(np.float32)
key = "pred_keypoints_3d" if dim == 3 else "pred_keypoints_2d"
kp_full = person.get(key)
if kp_full is None:
return None
kp_full = np.asarray(kp_full, dtype=np.float32)
if kp_full.ndim != 2 or kp_full.shape[0] < 70:
return None
return kp_full[mhr_map]
# Face landmarks from the MHR rig (option `face_source="rig"`). # Face landmarks from the MHR rig (option `face_source="rig"`).
# MHR has no face bones — face deforms via expr_params morphs — so landmarks # MHR has no face bones — face deforms via expr_params morphs — so landmarks
# are sourced from `pred_vertices` at fixed vertex IDs picked by NN against # are sourced from `pred_vertices` at fixed vertex IDs picked by NN against

View File

@ -24,14 +24,12 @@ import numpy as np
from .glb_shared import ( from .glb_shared import (
GLBWriter, GLBWriter,
Rig,
bake_vertex_colors, bake_vertex_colors,
bind_skel_state,
bone_locals_from_globals, bone_locals_from_globals,
collect_tracks, collect_tracks,
compact_skin_to_n,
compute_normals, compute_normals,
compute_pastel_mix, compute_pastel_mix,
extract_rig_static,
flat_shade_mesh, flat_shade_mesh,
gaussian_smooth_quats, gaussian_smooth_quats,
global_skel_state_from_pose_data, global_skel_state_from_pose_data,
@ -41,7 +39,6 @@ from .glb_shared import (
quat_sign_fix_per_joint, quat_sign_fix_per_joint,
rotation_align, rotation_align,
unflip, unflip,
zero_pose_rest_verts,
) )
from comfy_extras.sam3d_body.utils import jet_colormap from comfy_extras.sam3d_body.utils import jet_colormap
@ -220,30 +217,22 @@ def build_glb_skeletal(
if not tracks: if not tracks:
raise ValueError("build_glb_skeletal: no valid tracks in pose_data") raise ValueError("build_glb_skeletal: no valid tracks in pose_data")
rig_static = extract_rig_static(model, pose_data) rig = Rig.from_pose_data(pose_data, model)
NJ = rig_static["num_joints"] NJ = rig.num_joints
NV = rig_static["num_verts"] NV = rig.num_verts
NEXPR = rig_static["num_expr"] NEXPR = rig.num_expr
parents = rig_static["parents"] parents = rig.parents
is_external = bool(rig_static.get("_external", False)) if not rig.can_rerun_fk:
if is_external:
# External rigs have no PCA pose params to re-run; only stored globals # External rigs have no PCA pose params to re-run; only stored globals
# are available, and kimodo stores joint coords already Y-up. # are available, and they store joint coords already Y-up.
use_stored_global_rots = True use_stored_global_rots = True
joint_coords_y_down = not is_external joint_coords_y_down = rig.per_frame_y_down
# Compact sparse skinning to 8 influences per vertex into glTF's two # Skinning is already compacted to ≤8 influences per vertex (MHR averages
# JOINTS_*/WEIGHTS_* sets. MHR averages ~2.8 influences/vert but some # ~2.8 but some shoulder/hip verts hit 5-8; keeping only 4 there leaks
# shoulder/hip verts have 5-8 where multiple joints cancel — keeping only # per-bone rotation noise into the rendered mesh).
# 4 there leaks per-bone rotation noise into the rendered mesh. joints_8 = rig.lbs_joints
if is_external: weights_8 = rig.lbs_weights
joints_8 = rig_static["lbs_compact_joints"] actual_max_inf = rig.lbs_max_inf
weights_8 = rig_static["lbs_compact_weights"]
actual_max_inf = rig_static["lbs_compact_max_inf"]
else:
joints_8, weights_8, actual_max_inf = compact_skin_to_n(
rig_static["lbs_skin_indices"], rig_static["lbs_vert_indices"],
rig_static["lbs_skin_weights"], NV, max_inf=8,
)
joints_set0 = np.ascontiguousarray(joints_8[:, :4]) joints_set0 = np.ascontiguousarray(joints_8[:, :4])
weights_set0 = np.ascontiguousarray(weights_8[:, :4]) weights_set0 = np.ascontiguousarray(weights_8[:, :4])
use_set1 = actual_max_inf > 4 use_set1 = actual_max_inf > 4
@ -252,10 +241,8 @@ def build_glb_skeletal(
# Derive bone locals from the rig's bind globals rather than recomputing # Derive bone locals from the rig's bind globals rather than recomputing
# FK ourselves, so any mismatch between `parents` and the rig's actual FK # FK ourselves, so any mismatch between `parents` and the rig's actual FK
# is absorbed into the local TRS instead of producing wrong globals. # is absorbed into the local TRS instead of producing wrong globals.
bind_global_cm = bind_skel_state(model, pose_data) bind_global_m = rig.bind_global_m
bind_global_m = bind_global_cm.copy().astype(np.float32) bind_local = bone_locals_from_globals(bind_global_m[None], parents)[0]
bind_global_m[:, :3] *= 0.01
bind_local = bone_locals_from_globals(bind_global_m[None], rig_static["parents"])[0]
# IBP = inverse of bind global. With bone defaults set to bind_local and # IBP = inverse of bind global. With bone defaults set to bind_local and
# FK composed via `parents`, skin_matrix at rest = identity. # FK composed via `parents`, skin_matrix at rest = identity.
@ -280,7 +267,7 @@ def build_glb_skeletal(
expr_morph_accs: List[int] = [] expr_morph_accs: List[int] = []
if include_face_morphs and NEXPR > 0: if include_face_morphs and NEXPR > 0:
eb = rig_static["expr_basis"].astype(np.float32) * 0.01 eb = rig.expr_basis.astype(np.float32) * 0.01
for e in range(NEXPR): for e in range(NEXPR):
expr_morph_accs.append(w.add_vec3_f32_no_minmax(eb[e])) expr_morph_accs.append(w.add_vec3_f32_no_minmax(eb[e]))
@ -329,16 +316,14 @@ def build_glb_skeletal(
body_mesh_node_idx: Optional[int] = None body_mesh_node_idx: Optional[int] = None
if include_body: if include_body:
# External rigs have no PCA shape — `zero_pose_rest_verts` short- # MHR rest verts depend on the subject's shape_params; external rigs
# circuits to `pose_data["_skeleton_override"]["rest_verts_m"]`, # ship fixed rest verts and ignore the arg (so the empty external
# so zeroed shape_params is safe there. # `shape_params` is harmless).
if is_external: shape_params_arr = np.asarray(
shape_params_arr = np.zeros(0, dtype=np.float32) frames[frame_indices[0]][person_k].get("shape_params", []),
else: dtype=np.float32,
shape_params_arr = np.asarray( )
frames[frame_indices[0]][person_k]["shape_params"], dtype=np.float32, rest_v = rig.rest_verts_m(shape_params_arr)
)
rest_v = zero_pose_rest_verts(model, shape_params_arr, pose_data=pose_data)
normals = compute_normals(rest_v, faces_native) normals = compute_normals(rest_v, faces_native)
positions_acc = w.add_vec3_f32(rest_v) positions_acc = w.add_vec3_f32(rest_v)
normals_acc = w.add_vec3_f32(normals) normals_acc = w.add_vec3_f32(normals)
@ -393,7 +378,7 @@ def build_glb_skeletal(
color_idx_per_vert: Optional[np.ndarray] = None color_idx_per_vert: Optional[np.ndarray] = None
hw = float(bone_vis_radius_m) hw = float(bone_vis_radius_m)
bv_v, bv_n, bv_f, bv_j, bv_w, child_per_vert = _build_bone_octahedrons_mesh( bv_v, bv_n, bv_f, bv_j, bv_w, child_per_vert = _build_bone_octahedrons_mesh(
bind_global_m[:, :3], rig_static["parents"], half_width_m=hw, bind_global_m[:, :3], parents, half_width_m=hw,
) )
if bv_v.shape[0] > 0: if bv_v.shape[0] > 0:
F = bv_f.shape[0] F = bv_f.shape[0]
@ -458,7 +443,7 @@ def build_glb_skeletal(
# local translation (t_local inherits parent sign via q_parent_inv) # local translation (t_local inherits parent sign via q_parent_inv)
# and produces visible "axis resets" mid-animation. # and produces visible "axis resets" mid-animation.
rig_global_m[..., 3:7] = quat_sign_fix_per_joint(rig_global_m[..., 3:7]) rig_global_m[..., 3:7] = quat_sign_fix_per_joint(rig_global_m[..., 3:7])
bone_local_anim = bone_locals_from_globals(rig_global_m, rig_static["parents"]) bone_local_anim = bone_locals_from_globals(rig_global_m, parents)
local_t = bone_local_anim[..., :3].astype(np.float32) local_t = bone_local_anim[..., :3].astype(np.float32)
local_q = bone_local_anim[..., 3:7].astype(np.float32) local_q = bone_local_anim[..., 3:7].astype(np.float32)
local_s = bone_local_anim[..., 7].astype(np.float32) local_s = bone_local_anim[..., 7].astype(np.float32)

View File

@ -19,10 +19,8 @@ from PIL import Image
from comfy_extras.pose.keypoint_draw import KeypointDraw from comfy_extras.pose.keypoint_draw import KeypointDraw
from .glb_shared import ( from .glb_shared import (
OPENPOSE18_TO_MHR70,
OPENPOSE_HAND21_TO_MHR70_L,
OPENPOSE_HAND21_TO_MHR70_R,
OPENPOSE_HAND_COLORS_21, OPENPOSE_HAND_COLORS_21,
openpose_render_keypoints,
select_face_landmark_vert_ids, select_face_landmark_vert_ids,
) )
@ -53,32 +51,31 @@ def _project_face_landmarks_2d(
def _pack_dwpose_134( def _pack_dwpose_134(
person: Dict[str, Any], *, include_body: bool, include_hands: bool, person: Dict[str, Any], pose_data: Dict[str, Any], *,
include_body: bool, include_hands: bool, H: int, W: int,
) -> Tuple[np.ndarray, np.ndarray]: ) -> Tuple[np.ndarray, np.ndarray]:
"""Pack a SAM3D person dict into (kp, scores): (134, 2) DWPose-layout """Pack a SAM3D person dict into (kp, scores): (134, 2) DWPose-layout
coords + (134,) confidence. Face slot (24-91) is left zeroed; face dots coords + (134,) confidence. Face slot (24-91) is left zeroed; face dots
are drawn separately so SAM3D's 238-sapiens / rig-fallback counts work. are drawn separately so SAM3D's 238-sapiens / rig-fallback counts work.
Non-finite or out-of-band entries get score=0 and are filtered downstream.""" Non-finite or out-of-band entries get score=0 and are filtered downstream.
Keypoints come from the shared provider: MHR reindexes `pred_keypoints_2d`,
external rigs (Kimodo) resolve + project from `pred_joint_coords`."""
kp = np.zeros((134, 2), dtype=np.float32) kp = np.zeros((134, 2), dtype=np.float32)
scores = np.zeros(134, dtype=np.float32) scores = np.zeros(134, dtype=np.float32)
kp2d_full = person.get("pred_keypoints_2d")
if kp2d_full is None:
return kp, scores
kp2d = np.asarray(kp2d_full, dtype=np.float32)
if kp2d.ndim != 2 or kp2d.shape[1] != 2 or kp2d.shape[0] < 70:
return kp, scores
if include_body: if include_body:
body_xy = kp2d[OPENPOSE18_TO_MHR70] body_xy = openpose_render_keypoints(person, pose_data, "body", dim=2, H=H, W=W)
finite = np.isfinite(body_xy).all(axis=1) if body_xy is not None:
kp[:18][finite] = body_xy[finite] finite = np.isfinite(body_xy).all(axis=1)
scores[:18][finite] = 1.0 kp[:18][finite] = body_xy[finite]
scores[:18][finite] = 1.0
if include_hands: if include_hands:
for slot_start, mhr_idx in ((92, OPENPOSE_HAND21_TO_MHR70_R), for slot_start, part in ((92, "hand_r"), (113, "hand_l")):
(113, OPENPOSE_HAND21_TO_MHR70_L)): hand_xy = openpose_render_keypoints(person, pose_data, part, dim=2, H=H, W=W)
hand_xy = kp2d[mhr_idx] if hand_xy is None:
continue
finite = np.isfinite(hand_xy).all(axis=1) finite = np.isfinite(hand_xy).all(axis=1)
kp[slot_start:slot_start + 21][finite] = hand_xy[finite] kp[slot_start:slot_start + 21][finite] = hand_xy[finite]
scores[slot_start:slot_start + 21][finite] = 1.0 scores[slot_start:slot_start + 21][finite] = 1.0
@ -190,7 +187,8 @@ def render_pose_data_openpose(
pre = canvas.copy() if pastel > 0 else None pre = canvas.copy() if pastel > 0 else None
kp134, scores134 = _pack_dwpose_134( kp134, scores134 = _pack_dwpose_134(
person, include_body=include_body, include_hands=include_hands, person, pose_data, include_body=include_body,
include_hands=include_hands, H=H, W=W,
) )
_KD.draw_wholebody_keypoints( _KD.draw_wholebody_keypoints(
canvas, kp134, scores=scores134, threshold=0.5, canvas, kp134, scores=scores134, threshold=0.5,

View File

@ -178,6 +178,12 @@ def apply_camera_override(mhr_pose_data: Dict[str, Any], camera_info: Dict[str,
center = np.array([W * 0.5, H * 0.5], dtype=np.float32) center = np.array([W * 0.5, H * 0.5], dtype=np.float32)
reproj = {"pred_keypoints_3d": "pred_keypoints_2d", "pred_face_keypoints_3d": "pred_face_keypoints_2d"} reproj = {"pred_keypoints_3d": "pred_keypoints_2d", "pred_face_keypoints_3d": "pred_face_keypoints_2d"}
# External rigs (e.g. Kimodo) store pred_joint_coords rig-native Y-up; the
# render openpose/scail keypoint provider resolves from them and flips Y/Z.
# Transform them through the camera too (in camera space, then back to Y-up)
# so those keypoints follow the override instead of staying in the old frame.
override = mhr_pose_data.get("_skeleton_override")
joints_y_up = override is not None and not bool(override.get("per_frame_y_down", False))
new_frames: List[List[Dict[str, Any]]] = [] new_frames: List[List[Dict[str, Any]]] = []
for frame in mhr_pose_data["frames"]: for frame in mhr_pose_data["frames"]:
scaled = [] scaled = []
@ -197,6 +203,17 @@ def apply_camera_override(mhr_pose_data: Dict[str, Any], camera_info: Dict[str,
if k in reproj: # re-project the new 3D to 2D image coords if k in reproj: # re-project the new 3D to 2D image coords
z = np.maximum(cam[..., 2:3], 1e-6) z = np.maximum(cam[..., 2:3], 1e-6)
p[reproj[k]] = (cam[..., :2] * new_focal / z + center).astype(np.float32) p[reproj[k]] = (cam[..., :2] * new_focal / z + center).astype(np.float32)
jc = p.get("pred_joint_coords")
if jc is not None:
jc = np.asarray(jc, dtype=np.float32).copy()
if joints_y_up:
jc[..., 1] *= -1.0
jc[..., 2] *= -1.0
jc = (jc + cam_t - eye) @ R.T
if joints_y_up:
jc[..., 1] *= -1.0
jc[..., 2] *= -1.0
p["pred_joint_coords"] = jc.astype(np.float32)
p["pred_cam_t"] = np.zeros(3, dtype=np.float32) p["pred_cam_t"] = np.zeros(3, dtype=np.float32)
p["focal_length"] = np.array(new_focal, dtype=np.float32) p["focal_length"] = np.array(new_focal, dtype=np.float32)
scaled.append(p) scaled.append(p)