Align to Kimodo

This commit is contained in:
kijai 2026-06-16 18:22:08 +03:00
parent ebaf0a71fa
commit d754977178
9 changed files with 296 additions and 147 deletions

View File

@ -34,6 +34,7 @@ from comfy_extras.sam3d_body.utils import image_to_uint8
SAM3TrackData = io.Custom("SAM3_TRACK_DATA")
MHRPoseData = io.Custom("MHR_POSE_DATA") # mhr_model_params, shape_params, expr_params, MHR70 keypoint layout, canonical_colors keyed to MHR mesh, hand_vert_mask from MHR LBS).
KimodoPoseData = io.Custom("KIMODO_POSE_DATA") # external Y-up rig (ComfyUI-Kimodo); carries per-frame pred_vertices/pred_cam_t/canonical_colors so the mesh rasterizer is rig-agnostic.
SAM3DBodyModel = io.Custom("SAM3D_BODY_MODEL")
# Loader
@ -827,10 +828,18 @@ class SAM3DBody_Render(io.ComfyNode):
def define_schema(cls):
return io.Schema(
node_id="SAM3DBody_Render",
display_name="Render SAM3D Body",
display_name="Render 3D Body Pose",
search_aliases=["Render SAM3D Body", "sam3d render", "kimodo render"],
category="image/detection",
inputs=[
MHRPoseData.Input("mhr_pose_data"),
io.MultiType.Input(
"pose_data", types=[MHRPoseData, KimodoPoseData],
tooltip=(
"MHR pose data, or external Y-up rig pose data (KimodoSample). "
"All render styles work for external rigs that carry OpenPose "
"joint maps in their _skeleton_override (KimodoSample does)."
),
),
io.Image.Input(
"background",
optional=True,
@ -882,11 +891,11 @@ class SAM3DBody_Render(io.ComfyNode):
@classmethod
def execute(cls, mhr_pose_data, background=None, width=0, height=0, camera_info=None, render_style=None) -> io.NodeOutput:
def execute(cls, pose_data, background=None, width=0, height=0, camera_info=None, render_style=None) -> io.NodeOutput:
render_style = render_style or {"render_style": "mesh"}
mode_key = render_style.get("render_style", "mesh")
native_H, native_W = mhr_pose_data["image_size"]
native_H, native_W = pose_data["image_size"]
new_W, new_H = int(width), int(height)
if new_W == 0 and new_H == 0:
H, W = native_H, native_W
@ -896,14 +905,14 @@ class SAM3DBody_Render(io.ComfyNode):
new_W = max(1, round(native_W * new_H / native_H))
elif new_H == 0:
new_H = max(1, round(native_H * new_W / native_W))
mhr_pose_data = _scale_pose_data(mhr_pose_data, new_H, new_W)
pose_data = _scale_pose_data(pose_data, new_H, new_W)
H, W = new_H, new_W
px_scale = min(new_W / native_W, new_H / native_H)
if camera_info is not None:
mhr_pose_data = apply_camera_override(mhr_pose_data, camera_info, H, W)
pose_data = apply_camera_override(pose_data, camera_info, H, W)
B = len(mhr_pose_data["frames"])
B = len(pose_data["frames"])
if B == 0:
return io.NodeOutput(torch.zeros(1, H, W, 3, dtype=torch.float32))
@ -951,11 +960,11 @@ class SAM3DBody_Render(io.ComfyNode):
region = str(render_style.get("region", "full_body"))
if region == "hands_only":
hand_mask = mhr_pose_data["hand_vert_mask"]
faces_full = np.asarray(mhr_pose_data["faces"])
hand_mask = pose_data["hand_vert_mask"]
faces_full = np.asarray(pose_data["faces"])
keep = hand_mask[faces_full].all(axis=1)
mhr_pose_data = dict(mhr_pose_data)
mhr_pose_data["faces"] = np.ascontiguousarray(
pose_data = dict(pose_data)
pose_data["faces"] = np.ascontiguousarray(
faces_full[keep], dtype=faces_full.dtype,
)
else: # silhouette — no shader/opacity controls, mask is binary
@ -980,7 +989,7 @@ class SAM3DBody_Render(io.ComfyNode):
bg_f = bg_t[min(f, bg_t.shape[0] - 1)]
if mode_key == "openpose_2d":
img = render_pose_data_openpose(
mhr_pose_data, frame_idx=f, W=W, H=H,
pose_data, frame_idx=f, W=W, H=H,
background=bg_f,
composite=composite,
marker_radius_px=marker_radius_px,
@ -993,7 +1002,7 @@ class SAM3DBody_Render(io.ComfyNode):
)
elif mode_key == "openpose_3d":
img = render_pose_data_capsules(
mhr_pose_data, frame_idx=f, W=W, H=H,
pose_data, frame_idx=f, W=W, H=H,
background=bg_f,
composite=composite,
radius_m=op3d_radius_m,
@ -1005,7 +1014,7 @@ class SAM3DBody_Render(io.ComfyNode):
elif mode_key == "scail":
# SCAIL renders body as 3D capsules + 2D openpose hands on top
img = render_pose_data_capsules(
mhr_pose_data, frame_idx=f, W=W, H=H,
pose_data, frame_idx=f, W=W, H=H,
background=bg_f,
composite=composite,
radius_m=cap_radius_m,
@ -1017,7 +1026,7 @@ class SAM3DBody_Render(io.ComfyNode):
scail_overlay_px = max(1, int(round(4 * px_scale)))
scail_face_px = max(1, int(round(1 * px_scale)))
img = render_pose_data_openpose(
mhr_pose_data, frame_idx=f, W=W, H=H,
pose_data, frame_idx=f, W=W, H=H,
background=img,
composite="over",
include_body=False,
@ -1031,7 +1040,7 @@ class SAM3DBody_Render(io.ComfyNode):
)
else:
img = render_pose_data(
mhr_pose_data, frame_idx=f, W=W, H=H,
pose_data, frame_idx=f, W=W, H=H,
background=bg_f, composite=composite, opacity=opacity,
shader_preset=shader_key,
rainbow_tilt_x_deg=rainbow_tilt_x,

View File

@ -684,8 +684,13 @@ class BuildPoseFile(IO.ComfyNode):
fmt = format.get("format", "glb")
if fmt == "bvh":
if sam3d_body_model is None:
raise ValueError("Create 3D Animation: 'bvh' format needs the `sam3d_body_model` input.")
# External rigs (e.g. Kimodo) supply pose_data["_skeleton_override"]
has_external_rig = isinstance(pose_data, dict) and ("_skeleton_override" in pose_data)
if sam3d_body_model is None and not has_external_rig:
raise ValueError(
"Create 3D Animation: 'bvh' format needs the `sam3d_body_model` input OR a "
"`_skeleton_override` dict in pose_data (e.g. from KimodoSample)."
)
# BVH carries one skeleton; -1 (all tracks) collapses to the first.
ti = int(track_index)
if ti < 0:

View File

@ -16,10 +16,9 @@ from typing import Any, Dict, List
import numpy as np
from .glb_shared import (
bind_skel_state,
Rig,
bone_locals_from_globals,
collect_tracks,
extract_rig_static,
global_skel_state_from_pose_data,
quat_sign_fix_per_joint,
unflip,
@ -49,9 +48,14 @@ def _quat_to_zxy_euler_deg(quat: np.ndarray) -> np.ndarray:
return out.astype(np.float32)
def _find_bvh_root(parents: np.ndarray) -> int:
def _find_bvh_root(parents: np.ndarray, is_external: bool = False) -> int:
"""First child of the rig's world anchor so the static origin→body stick
bone gets left out. Falls back to the first root joint."""
bone gets left out. Falls back to the first root joint.
MHR's joint 0 is a static world anchor whose single child is the pelvis, so
skipping it is correct. External rigs (e.g. SOMA-77) whose root is already
the articulated body root with multiple child chains must keep the root
descending into one child would drop the sibling limbs from the BVH."""
NJ = parents.shape[0]
world_anchors = [j for j in range(NJ)
if not (0 <= int(parents[j]) < NJ and int(parents[j]) != j)]
@ -64,6 +68,8 @@ def _find_bvh_root(parents: np.ndarray) -> int:
children[p].append(j)
wa = world_anchors[0]
if children[wa]:
if is_external and len(children[wa]) > 1:
return wa
return children[wa][0]
return wa
@ -80,7 +86,7 @@ def _build_children_map(parents: np.ndarray) -> List[List[int]]:
def build_bvh(
pose_data: Dict[str, Any],
model: Any,
model: Any = None,
*,
fps: float = 24.0,
camera_translation: str = "off",
@ -89,6 +95,10 @@ def build_bvh(
) -> bytes:
"""Build a BVH file from pose_data. Returns UTF-8 encoded text bytes.
`model` may be None when pose_data carries a `_skeleton_override` (external
rigs, e.g. Kimodo); the rig hierarchy/offsets/bind are read from the
override instead of the MHR model.
`units` is "cm" (default, standard mocap convention) or "m". Affects the
OFFSET and root-position values; rotations are independent of units.
"""
@ -96,9 +106,10 @@ def build_bvh(
raise ValueError(f"build_bvh: units must be 'cm' or 'm', got {units!r}")
unit_scale = 100.0 if units == "cm" else 1.0
rig_static = extract_rig_static(model)
NJ = int(rig_static["num_joints"])
parents = rig_static["parents"]
rig = Rig.from_pose_data(pose_data, model)
is_external = not rig.can_rerun_fk
NJ = rig.num_joints
parents = rig.parents
frames = pose_data["frames"]
tracks = collect_tracks(pose_data, track_index)
@ -109,16 +120,16 @@ def build_bvh(
if n_frames == 0:
raise ValueError("build_bvh: track has zero frames")
body_root = _find_bvh_root(parents)
body_root = _find_bvh_root(parents, is_external)
children_map = _build_children_map(parents)
# Bone OFFSETs come from MHR's translation_offsets (joint position
# relative to parent in parent's local-bind frame). For the BVH root,
# we use its bind world position so the skeleton sits at the right
# spot when imported.
bind_global = bind_skel_state(model) # (NJ, 8) cm
bind_global = rig.bind_global_cm # (NJ, 8) cm
bind_pos_m = bind_global[:, :3].astype(np.float64) * 0.01 # (NJ, 3) m
offset_m = rig_static["joint_translation_offsets"].astype(np.float64) * 0.01
offset_m = rig.joint_offsets_cm.astype(np.float64) * 0.01
# DFS order rooted at body_root — matches per-frame channel order.
bvh_order: List[int] = []
@ -133,6 +144,7 @@ def build_bvh(
# treated as the hierarchy root in BVH-space.
rig_global_m = global_skel_state_from_pose_data(
pose_data, frame_indices, person_k, NJ,
joint_coords_y_down=rig.per_frame_y_down,
)
rig_global_m[..., 3:7] = quat_sign_fix_per_joint(rig_global_m[..., 3:7])
bvh_parents = parents.copy()

View File

@ -18,13 +18,11 @@ import comfy.model_management
from .glb_shared import (
OPENPOSE_18_PAIRS,
OPENPOSE18_TO_MHR70,
OPENPOSE_RAINBOW_18,
SCAIL_LIMB_COLORS_17,
OPENPOSE_HAND_PAIRS,
OPENPOSE_HAND21_TO_MHR70_R,
OPENPOSE_HAND21_TO_MHR70_L,
OPENPOSE_HAND_COLORS_21,
openpose_render_keypoints,
)
@ -37,6 +35,7 @@ def _limb_palette_rgb01(palette: str) -> np.ndarray:
def _build_specs_from_pose(
persons: List[Dict[str, Any]],
pose_data: Dict[str, Any],
*,
include_hands: bool,
palette: str,
@ -61,17 +60,14 @@ def _build_specs_from_pose(
falloff = max(0.0, min(1.0, float(person_brightness_falloff)))
for k, person in enumerate(persons):
kp2d_full = person.get("pred_keypoints_3d")
cam_t = person.get("pred_cam_t")
if kp2d_full is None or cam_t is None:
continue
kp = np.asarray(kp2d_full, dtype=np.float32)
if kp.ndim != 2 or kp.shape[1] != 3 or kp.shape[0] < 70:
body_op = openpose_render_keypoints(person, pose_data, "body", dim=3)
if body_op is None or cam_t is None:
continue
cam_t_np = np.asarray(cam_t, dtype=np.float32).reshape(3)
# pred_keypoints_3d is camera frame (Y-down post-flip); add cam_t to
# place the subject in front of the camera.
kp_cam = kp + cam_t_np[None, :]
# op-keypoints are camera frame (Y-down); add cam_t to place the
# subject in front of the camera.
body_kp = body_op + cam_t_np[None, :]
pastel = 0.0 if k == 0 else (1.0 - falloff ** k)
@ -83,7 +79,6 @@ def _build_specs_from_pose(
# SCAIL skips face bones (13..16) and redirects limb 12 into a short
# head stub blending spine + neck→nose direction.
body_limb_count = 13 if palette == "scail" else len(OPENPOSE_18_PAIRS)
body_kp = kp_cam[OPENPOSE18_TO_MHR70]
spine_dir = None
if palette == "scail":
mid_hip = 0.5 * (body_kp[8] + body_kp[11]) # 8=RHip, 11=LHip
@ -117,10 +112,11 @@ def _build_specs_from_pose(
dtype=np.float32))
if include_hands:
r_kp = kp_cam[OPENPOSE_HAND21_TO_MHR70_R]
l_kp = kp_cam[OPENPOSE_HAND21_TO_MHR70_L]
hand_ops = [openpose_render_keypoints(person, pose_data, p, dim=3)
for p in ("hand_r", "hand_l")]
hand_kps = [h + cam_t_np[None, :] for h in hand_ops if h is not None]
for limb_i, (a, b) in enumerate(OPENPOSE_HAND_PAIRS):
for hand_kp in (r_kp, l_kp):
for hand_kp in hand_kps:
sa, sb = hand_kp[a], hand_kp[b]
if not (np.all(np.isfinite(sa)) and np.all(np.isfinite(sb))):
continue
@ -380,7 +376,7 @@ def render_pose_data_capsules(
cx, cy = W * 0.5, H * 0.5
starts_np, ends_np, colors_np, is_hand_np = _build_specs_from_pose(
persons, include_hands=include_hands, palette=palette,
persons, pose_data, include_hands=include_hands, palette=palette,
person_brightness_falloff=person_brightness_falloff,
)

View File

@ -40,6 +40,7 @@ from .glb_shared import (
gaussian_smooth_positions,
make_lit_material,
quat_sign_fix_per_joint,
resolve_openpose_keypoints_from_joints,
rotation_align,
rotmat_to_quat_np,
select_face_landmark_vert_ids,
@ -109,7 +110,7 @@ def _openpose_bind_at_rig_rest(
rest_pos = np.asarray(override["bind_global_m"], dtype=np.float32)[:, :3]
op18_w = override.get("openpose18_joint_weights")
parts: List[np.ndarray] = [
_resolve_openpose_keypoints_from_joints(
resolve_openpose_keypoints_from_joints(
rest_pos, np.asarray(op18, dtype=np.int64),
weights=None if op18_w is None else np.asarray(op18_w, dtype=np.float32),
)
@ -121,11 +122,11 @@ def _openpose_bind_at_rig_rest(
return None
op21_r_w = override.get("openpose_hand21_r_joint_weights")
op21_l_w = override.get("openpose_hand21_l_joint_weights")
parts.append(_resolve_openpose_keypoints_from_joints(
parts.append(resolve_openpose_keypoints_from_joints(
rest_pos, np.asarray(op21_r, dtype=np.int64),
weights=None if op21_r_w is None else np.asarray(op21_r_w, dtype=np.float32),
))
parts.append(_resolve_openpose_keypoints_from_joints(
parts.append(resolve_openpose_keypoints_from_joints(
rest_pos, np.asarray(op21_l, dtype=np.int64),
weights=None if op21_l_w is None else np.asarray(op21_l_w, dtype=np.float32),
))
@ -137,39 +138,6 @@ def _openpose_bind_at_rig_rest(
return np.concatenate(parts, axis=0).astype(np.float32)
def _resolve_openpose_keypoints_from_joints(
joints: np.ndarray, mapping: np.ndarray,
weights: Optional[np.ndarray] = None,
) -> np.ndarray:
"""Resolve a `(K, 2)` joint-index → keypoint mapping against a per-frame
`(J, 3)` joint-position array.
Row `(a, b)` with `b == -1` uses `joints[a]` directly (any weight ignored).
Row `(a, b)` with `b >= 0` returns `w * joints[a] + (1 - w) * joints[b]`:
- default (weights=None): `w = 0.5` plain midpoint, useful for
keypoints that genuinely lie between two joints (Nose midpoint of
eyes).
- explicit `w` outside [0, 1] EXTRAPOLATES past the line segment, which
is how we approximate landmarks that have no rig joint AND no
in-between joint pair (Ears RightEye + (RightEye LeftEye), i.e.
`w_a = 2.0` along the eyeear axis)."""
a = mapping[:, 0].astype(np.int64)
b = mapping[:, 1].astype(np.int64)
pos_a = joints[a]
has_b = b >= 0
if not has_b.any():
return pos_a.astype(np.float32, copy=False)
b_safe = np.where(has_b, b, a)
pos_b = joints[b_safe]
if weights is None:
w_a = np.where(has_b, 0.5, 1.0).astype(np.float32)
else:
w_a = np.where(has_b, np.asarray(weights, dtype=np.float32), 1.0)
w_b = (1.0 - w_a).astype(np.float32)
out = pos_a * w_a[:, None] + pos_b * w_b[:, None]
return out.astype(np.float32, copy=False)
def _extract_openpose_keypoints(
pose_data: Dict[str, Any], frame_indices: List[int], person_k: int,
) -> np.ndarray:
@ -219,7 +187,7 @@ def _extract_openpose_keypoints(
f"missing at frame={t}, track={person_k}."
)
joints = np.asarray(person["pred_joint_coords"], dtype=np.float32)
out[t_idx] = _resolve_openpose_keypoints_from_joints(
out[t_idx] = resolve_openpose_keypoints_from_joints(
joints, op18, weights=op18_w,
)
return out
@ -306,10 +274,10 @@ def _extract_openpose_hand_keypoints(
"per-frame `pred_joint_coords` for hands."
)
joints = np.asarray(person["pred_joint_coords"], dtype=np.float32)
out[t_idx, :21] = _resolve_openpose_keypoints_from_joints(
out[t_idx, :21] = resolve_openpose_keypoints_from_joints(
joints, op21_r, weights=op21_r_w,
)
out[t_idx, 21:] = _resolve_openpose_keypoints_from_joints(
out[t_idx, 21:] = resolve_openpose_keypoints_from_joints(
joints, op21_l, weights=op21_l_w,
)
return out

View File

@ -16,6 +16,7 @@ from __future__ import annotations
import json
import struct
from dataclasses import dataclass, field
from typing import Any, Dict, List, Optional, Tuple
import numpy as np
@ -505,9 +506,15 @@ def extract_rig_static(model: Any, pose_data: Optional[Dict[str, Any]] = None) -
# so we don't need MHR's PCA pose / expression bases.
parents = np.asarray(override["parents"], dtype=np.int32)
rest_v = np.asarray(override["rest_verts_m"], dtype=np.float32)
# BVH needs parent-relative bone OFFSETs (cm). MHR ships these directly;
# external rigs only give bind globals, so derive locals from them.
bind_global_m = np.asarray(override["bind_global_m"], dtype=np.float32)
local_bind = bone_locals_from_globals(bind_global_m[None], parents)[0]
joint_translation_offsets = (local_bind[:, :3] * 100.0).astype(np.float32)
return {
"parents": parents,
"parents_pmi": parents,
"joint_translation_offsets": joint_translation_offsets, # (NJ, 3) cm
"lbs_compact_joints": np.asarray(override["lbs_compact_joints"], dtype=np.uint16),
"lbs_compact_weights": np.asarray(override["lbs_compact_weights"], dtype=np.float32),
"lbs_compact_max_inf": int(override.get("lbs_compact_max_inf", 4)),
@ -737,6 +744,77 @@ def bind_skel_state(model: Any, pose_data: Optional[Dict[str, Any]] = None) -> n
return global_skel_state_per_frame(model, zero_mp)[0]
@dataclass
class Rig:
"""Normalized static rig for the GLB/BVH exporters, independent of where it
came from: an MHR model (`Rig.from_pose_data(pose_data, model)`) or an inline
`pose_data["_skeleton_override"]` (external rigs, e.g. ComfyUI-Kimodo).
Consumers read these fields and never branch on the source. The only
source-dependent operation is `rest_verts_m` MHR rest verts depend on the
subject's `shape_params`; external rigs ship fixed rest verts.
"""
parents: np.ndarray # (NJ,) int32, -1 = root
joint_offsets_cm: np.ndarray # (NJ, 3) parent-relative bind offsets, cm
bind_global_cm: np.ndarray # (NJ, 8) bind global [t cm | q xyzw | s]
lbs_joints: np.ndarray # (V, 8) uint16 — compacted skin influences
lbs_weights: np.ndarray # (V, 8) f32
lbs_max_inf: int # ≤ 8; lets callers skip JOINTS_1 when ≤ 4
faces: np.ndarray # (F, 3) uint32
num_joints: int
num_verts: int
num_expr: int # 0 = no face morphs
per_frame_y_down: bool # pred_joint_coords stored y-down (MHR) vs y-up (external)
can_rerun_fk: bool # True = per-frame FK re-runnable from mhr_model_params
expr_basis: Optional[np.ndarray] = None # (E, V, 3) cm — MHR only
_model: Any = None
_rest_override: Optional[np.ndarray] = None # (V, 3) m — external only
@property
def bind_global_m(self) -> np.ndarray:
b = self.bind_global_cm.astype(np.float32).copy()
b[:, :3] *= 0.01
return b
def rest_verts_m(self, shape_params: np.ndarray) -> np.ndarray:
"""Zero-pose rest verts (V, 3) in rig-native Y-up metres."""
if self._rest_override is not None:
return self._rest_override
return zero_pose_rest_verts(self._model, shape_params)
@classmethod
def from_pose_data(cls, pose_data: Optional[Dict[str, Any]], model: Any = None) -> "Rig":
rs = extract_rig_static(model, pose_data)
external = bool(rs.get("_external", False))
if external:
joints8 = np.asarray(rs["lbs_compact_joints"], dtype=np.uint16)
weights8 = np.asarray(rs["lbs_compact_weights"], dtype=np.float32)
max_inf = int(rs["lbs_compact_max_inf"])
override = _get_skeleton_override(pose_data) or {}
per_y_down = bool(override.get("per_frame_y_down", False))
rest_override = np.asarray(override["rest_verts_m"], dtype=np.float32)
expr_basis = None
else:
joints8, weights8, max_inf = compact_skin_to_n(
rs["lbs_skin_indices"], rs["lbs_vert_indices"],
rs["lbs_skin_weights"], int(rs["num_verts"]), max_inf=8,
)
per_y_down = True
rest_override = None
expr_basis = rs["expr_basis"] if int(rs["num_expr"]) > 0 else None
return cls(
parents=np.asarray(rs["parents"], dtype=np.int32),
joint_offsets_cm=np.asarray(rs["joint_translation_offsets"], dtype=np.float32),
bind_global_cm=np.asarray(bind_skel_state(model, pose_data), dtype=np.float32),
lbs_joints=joints8, lbs_weights=weights8, lbs_max_inf=max_inf,
faces=np.asarray(rs["faces"], dtype=np.uint32),
num_joints=int(rs["num_joints"]), num_verts=int(rs["num_verts"]),
num_expr=int(rs["num_expr"]),
per_frame_y_down=per_y_down, can_rerun_fk=not external,
expr_basis=expr_basis, _model=model, _rest_override=rest_override,
)
def ibp_from_bind_global(bind_skel_state_m: np.ndarray) -> np.ndarray:
"""Inverse-bind MAT4 by inverting the rig's bind global (meters). Guarantees
IBP[j] = inverse(FK over bind local TRS) exactly what glTF skinning
@ -1069,6 +1147,87 @@ DWPOSE_HAND_COLORS_21 = np.tile(
)
def resolve_openpose_keypoints_from_joints(
joints: np.ndarray, mapping: np.ndarray, weights: Optional[np.ndarray] = None,
) -> np.ndarray:
"""(K, 2) joint-index map resolved against (J, D) joint positions -> (K, D).
Row (a, b): b == -1 uses joints[a]; b >= 0 returns w*joints[a]+(1-w)*joints[b]
(w defaults 0.5 = midpoint; w outside [0, 1] extrapolates past the segment)."""
a = mapping[:, 0].astype(np.int64)
b = mapping[:, 1].astype(np.int64)
pos_a = joints[a]
has_b = b >= 0
if not has_b.any():
return pos_a.astype(np.float32, copy=False)
b_safe = np.where(has_b, b, a)
pos_b = joints[b_safe]
if weights is None:
w_a = np.where(has_b, 0.5, 1.0).astype(np.float32)
else:
w_a = np.where(has_b, np.asarray(weights, dtype=np.float32), 1.0)
w_b = (1.0 - w_a).astype(np.float32)
out = pos_a * w_a[:, None] + pos_b * w_b[:, None]
return out.astype(np.float32, copy=False)
# part -> (override map key, override weight key, MHR70 reindex map)
_OPENPOSE_RENDER_MAPS = {
"body": ("openpose18_joint_indices", "openpose18_joint_weights", OPENPOSE18_TO_MHR70),
"hand_r": ("openpose_hand21_r_joint_indices", "openpose_hand21_r_joint_weights", OPENPOSE_HAND21_TO_MHR70_R),
"hand_l": ("openpose_hand21_l_joint_indices", "openpose_hand21_l_joint_weights", OPENPOSE_HAND21_TO_MHR70_L),
}
def openpose_render_keypoints(
person: Dict[str, Any], pose_data: Optional[Dict[str, Any]], part: str,
*, dim: int, H: int = 0, W: int = 0,
) -> Optional[np.ndarray]:
"""OpenPose keypoints for one person, in op-layout, CAMERA frame (Y-down).
`part` in {'body','hand_r','hand_l'}. dim=3 -> (K, 3) metres pre-cam_t-add;
dim=2 -> (K, 2) image pixels. Returns None when the source data is missing.
External rigs (override carries the joint-index map) resolve from per-frame
`pred_joint_coords` (rig-native Y-up -> flipped to camera Y-down, matching
the pred_vertices convention). MHR reindexes the stored
`pred_keypoints_{3d,2d}` via the MHR70 map."""
map_key, w_key, mhr_map = _OPENPOSE_RENDER_MAPS[part]
override = _get_skeleton_override(pose_data)
ext_map = override.get(map_key) if override is not None else None
if ext_map is not None:
joints = person.get("pred_joint_coords")
if joints is None:
return None
w = override.get(w_key)
kp3d = resolve_openpose_keypoints_from_joints(
np.asarray(joints, dtype=np.float32),
np.asarray(ext_map, dtype=np.int64),
None if w is None else np.asarray(w, dtype=np.float32),
).copy()
kp3d[:, 1] *= -1.0 # rig-native Y-up -> camera Y-down
kp3d[:, 2] *= -1.0
if dim == 3:
return kp3d
cam_t = person.get("pred_cam_t")
focal = person.get("focal_length")
if cam_t is None or focal is None:
return None
pts3 = kp3d + np.asarray(cam_t, dtype=np.float32).reshape(1, 3)
z = np.maximum(pts3[:, 2:3], 1e-6)
f = float(np.asarray(focal, dtype=np.float32).reshape(-1)[0])
xy = pts3[:, :2] * f + np.array([W * 0.5, H * 0.5], dtype=np.float32)[None, :] * z
return (xy / z).astype(np.float32)
key = "pred_keypoints_3d" if dim == 3 else "pred_keypoints_2d"
kp_full = person.get(key)
if kp_full is None:
return None
kp_full = np.asarray(kp_full, dtype=np.float32)
if kp_full.ndim != 2 or kp_full.shape[0] < 70:
return None
return kp_full[mhr_map]
# Face landmarks from the MHR rig (option `face_source="rig"`).
# MHR has no face bones — face deforms via expr_params morphs — so landmarks
# are sourced from `pred_vertices` at fixed vertex IDs picked by NN against

View File

@ -24,14 +24,12 @@ import numpy as np
from .glb_shared import (
GLBWriter,
Rig,
bake_vertex_colors,
bind_skel_state,
bone_locals_from_globals,
collect_tracks,
compact_skin_to_n,
compute_normals,
compute_pastel_mix,
extract_rig_static,
flat_shade_mesh,
gaussian_smooth_quats,
global_skel_state_from_pose_data,
@ -41,7 +39,6 @@ from .glb_shared import (
quat_sign_fix_per_joint,
rotation_align,
unflip,
zero_pose_rest_verts,
)
from comfy_extras.sam3d_body.utils import jet_colormap
@ -220,30 +217,22 @@ def build_glb_skeletal(
if not tracks:
raise ValueError("build_glb_skeletal: no valid tracks in pose_data")
rig_static = extract_rig_static(model, pose_data)
NJ = rig_static["num_joints"]
NV = rig_static["num_verts"]
NEXPR = rig_static["num_expr"]
parents = rig_static["parents"]
is_external = bool(rig_static.get("_external", False))
if is_external:
rig = Rig.from_pose_data(pose_data, model)
NJ = rig.num_joints
NV = rig.num_verts
NEXPR = rig.num_expr
parents = rig.parents
if not rig.can_rerun_fk:
# External rigs have no PCA pose params to re-run; only stored globals
# are available, and kimodo stores joint coords already Y-up.
# are available, and they store joint coords already Y-up.
use_stored_global_rots = True
joint_coords_y_down = not is_external
# Compact sparse skinning to 8 influences per vertex into glTF's two
# JOINTS_*/WEIGHTS_* sets. MHR averages ~2.8 influences/vert but some
# shoulder/hip verts have 5-8 where multiple joints cancel — keeping only
# 4 there leaks per-bone rotation noise into the rendered mesh.
if is_external:
joints_8 = rig_static["lbs_compact_joints"]
weights_8 = rig_static["lbs_compact_weights"]
actual_max_inf = rig_static["lbs_compact_max_inf"]
else:
joints_8, weights_8, actual_max_inf = compact_skin_to_n(
rig_static["lbs_skin_indices"], rig_static["lbs_vert_indices"],
rig_static["lbs_skin_weights"], NV, max_inf=8,
)
joint_coords_y_down = rig.per_frame_y_down
# Skinning is already compacted to ≤8 influences per vertex (MHR averages
# ~2.8 but some shoulder/hip verts hit 5-8; keeping only 4 there leaks
# per-bone rotation noise into the rendered mesh).
joints_8 = rig.lbs_joints
weights_8 = rig.lbs_weights
actual_max_inf = rig.lbs_max_inf
joints_set0 = np.ascontiguousarray(joints_8[:, :4])
weights_set0 = np.ascontiguousarray(weights_8[:, :4])
use_set1 = actual_max_inf > 4
@ -252,10 +241,8 @@ def build_glb_skeletal(
# Derive bone locals from the rig's bind globals rather than recomputing
# FK ourselves, so any mismatch between `parents` and the rig's actual FK
# is absorbed into the local TRS instead of producing wrong globals.
bind_global_cm = bind_skel_state(model, pose_data)
bind_global_m = bind_global_cm.copy().astype(np.float32)
bind_global_m[:, :3] *= 0.01
bind_local = bone_locals_from_globals(bind_global_m[None], rig_static["parents"])[0]
bind_global_m = rig.bind_global_m
bind_local = bone_locals_from_globals(bind_global_m[None], parents)[0]
# IBP = inverse of bind global. With bone defaults set to bind_local and
# FK composed via `parents`, skin_matrix at rest = identity.
@ -280,7 +267,7 @@ def build_glb_skeletal(
expr_morph_accs: List[int] = []
if include_face_morphs and NEXPR > 0:
eb = rig_static["expr_basis"].astype(np.float32) * 0.01
eb = rig.expr_basis.astype(np.float32) * 0.01
for e in range(NEXPR):
expr_morph_accs.append(w.add_vec3_f32_no_minmax(eb[e]))
@ -329,16 +316,14 @@ def build_glb_skeletal(
body_mesh_node_idx: Optional[int] = None
if include_body:
# External rigs have no PCA shape — `zero_pose_rest_verts` short-
# circuits to `pose_data["_skeleton_override"]["rest_verts_m"]`,
# so zeroed shape_params is safe there.
if is_external:
shape_params_arr = np.zeros(0, dtype=np.float32)
else:
shape_params_arr = np.asarray(
frames[frame_indices[0]][person_k]["shape_params"], dtype=np.float32,
)
rest_v = zero_pose_rest_verts(model, shape_params_arr, pose_data=pose_data)
# MHR rest verts depend on the subject's shape_params; external rigs
# ship fixed rest verts and ignore the arg (so the empty external
# `shape_params` is harmless).
shape_params_arr = np.asarray(
frames[frame_indices[0]][person_k].get("shape_params", []),
dtype=np.float32,
)
rest_v = rig.rest_verts_m(shape_params_arr)
normals = compute_normals(rest_v, faces_native)
positions_acc = w.add_vec3_f32(rest_v)
normals_acc = w.add_vec3_f32(normals)
@ -393,7 +378,7 @@ def build_glb_skeletal(
color_idx_per_vert: Optional[np.ndarray] = None
hw = float(bone_vis_radius_m)
bv_v, bv_n, bv_f, bv_j, bv_w, child_per_vert = _build_bone_octahedrons_mesh(
bind_global_m[:, :3], rig_static["parents"], half_width_m=hw,
bind_global_m[:, :3], parents, half_width_m=hw,
)
if bv_v.shape[0] > 0:
F = bv_f.shape[0]
@ -458,7 +443,7 @@ def build_glb_skeletal(
# local translation (t_local inherits parent sign via q_parent_inv)
# and produces visible "axis resets" mid-animation.
rig_global_m[..., 3:7] = quat_sign_fix_per_joint(rig_global_m[..., 3:7])
bone_local_anim = bone_locals_from_globals(rig_global_m, rig_static["parents"])
bone_local_anim = bone_locals_from_globals(rig_global_m, parents)
local_t = bone_local_anim[..., :3].astype(np.float32)
local_q = bone_local_anim[..., 3:7].astype(np.float32)
local_s = bone_local_anim[..., 7].astype(np.float32)

View File

@ -19,10 +19,8 @@ from PIL import Image
from comfy_extras.pose.keypoint_draw import KeypointDraw
from .glb_shared import (
OPENPOSE18_TO_MHR70,
OPENPOSE_HAND21_TO_MHR70_L,
OPENPOSE_HAND21_TO_MHR70_R,
OPENPOSE_HAND_COLORS_21,
openpose_render_keypoints,
select_face_landmark_vert_ids,
)
@ -53,32 +51,31 @@ def _project_face_landmarks_2d(
def _pack_dwpose_134(
person: Dict[str, Any], *, include_body: bool, include_hands: bool,
person: Dict[str, Any], pose_data: Dict[str, Any], *,
include_body: bool, include_hands: bool, H: int, W: int,
) -> Tuple[np.ndarray, np.ndarray]:
"""Pack a SAM3D person dict into (kp, scores): (134, 2) DWPose-layout
coords + (134,) confidence. Face slot (24-91) is left zeroed; face dots
are drawn separately so SAM3D's 238-sapiens / rig-fallback counts work.
Non-finite or out-of-band entries get score=0 and are filtered downstream."""
Non-finite or out-of-band entries get score=0 and are filtered downstream.
Keypoints come from the shared provider: MHR reindexes `pred_keypoints_2d`,
external rigs (Kimodo) resolve + project from `pred_joint_coords`."""
kp = np.zeros((134, 2), dtype=np.float32)
scores = np.zeros(134, dtype=np.float32)
kp2d_full = person.get("pred_keypoints_2d")
if kp2d_full is None:
return kp, scores
kp2d = np.asarray(kp2d_full, dtype=np.float32)
if kp2d.ndim != 2 or kp2d.shape[1] != 2 or kp2d.shape[0] < 70:
return kp, scores
if include_body:
body_xy = kp2d[OPENPOSE18_TO_MHR70]
finite = np.isfinite(body_xy).all(axis=1)
kp[:18][finite] = body_xy[finite]
scores[:18][finite] = 1.0
body_xy = openpose_render_keypoints(person, pose_data, "body", dim=2, H=H, W=W)
if body_xy is not None:
finite = np.isfinite(body_xy).all(axis=1)
kp[:18][finite] = body_xy[finite]
scores[:18][finite] = 1.0
if include_hands:
for slot_start, mhr_idx in ((92, OPENPOSE_HAND21_TO_MHR70_R),
(113, OPENPOSE_HAND21_TO_MHR70_L)):
hand_xy = kp2d[mhr_idx]
for slot_start, part in ((92, "hand_r"), (113, "hand_l")):
hand_xy = openpose_render_keypoints(person, pose_data, part, dim=2, H=H, W=W)
if hand_xy is None:
continue
finite = np.isfinite(hand_xy).all(axis=1)
kp[slot_start:slot_start + 21][finite] = hand_xy[finite]
scores[slot_start:slot_start + 21][finite] = 1.0
@ -190,7 +187,8 @@ def render_pose_data_openpose(
pre = canvas.copy() if pastel > 0 else None
kp134, scores134 = _pack_dwpose_134(
person, include_body=include_body, include_hands=include_hands,
person, pose_data, include_body=include_body,
include_hands=include_hands, H=H, W=W,
)
_KD.draw_wholebody_keypoints(
canvas, kp134, scores=scores134, threshold=0.5,

View File

@ -178,6 +178,12 @@ def apply_camera_override(mhr_pose_data: Dict[str, Any], camera_info: Dict[str,
center = np.array([W * 0.5, H * 0.5], dtype=np.float32)
reproj = {"pred_keypoints_3d": "pred_keypoints_2d", "pred_face_keypoints_3d": "pred_face_keypoints_2d"}
# External rigs (e.g. Kimodo) store pred_joint_coords rig-native Y-up; the
# render openpose/scail keypoint provider resolves from them and flips Y/Z.
# Transform them through the camera too (in camera space, then back to Y-up)
# so those keypoints follow the override instead of staying in the old frame.
override = mhr_pose_data.get("_skeleton_override")
joints_y_up = override is not None and not bool(override.get("per_frame_y_down", False))
new_frames: List[List[Dict[str, Any]]] = []
for frame in mhr_pose_data["frames"]:
scaled = []
@ -197,6 +203,17 @@ def apply_camera_override(mhr_pose_data: Dict[str, Any], camera_info: Dict[str,
if k in reproj: # re-project the new 3D to 2D image coords
z = np.maximum(cam[..., 2:3], 1e-6)
p[reproj[k]] = (cam[..., :2] * new_focal / z + center).astype(np.float32)
jc = p.get("pred_joint_coords")
if jc is not None:
jc = np.asarray(jc, dtype=np.float32).copy()
if joints_y_up:
jc[..., 1] *= -1.0
jc[..., 2] *= -1.0
jc = (jc + cam_t - eye) @ R.T
if joints_y_up:
jc[..., 1] *= -1.0
jc[..., 2] *= -1.0
p["pred_joint_coords"] = jc.astype(np.float32)
p["pred_cam_t"] = np.zeros(3, dtype=np.float32)
p["focal_length"] = np.array(new_focal, dtype=np.float32)
scaled.append(p)