From d75497717847e4f08a2b44701f28900a9bb8fd39 Mon Sep 17 00:00:00 2001 From: kijai <40791699+kijai@users.noreply.github.com> Date: Tue, 16 Jun 2026 18:22:08 +0300 Subject: [PATCH] Align to Kimodo --- comfy_extras/nodes_sam3d_body.py | 41 +++-- comfy_extras/nodes_save_3d.py | 9 +- comfy_extras/sam3d_body/export/bvh.py | 34 ++-- comfy_extras/sam3d_body/export/capsules.py | 28 ++- .../sam3d_body/export/glb_openpose.py | 46 +---- comfy_extras/sam3d_body/export/glb_shared.py | 159 ++++++++++++++++++ .../sam3d_body/export/glb_skeletal.py | 71 +++----- comfy_extras/sam3d_body/export/openpose_2d.py | 38 ++--- comfy_extras/sam3d_body/utils.py | 17 ++ 9 files changed, 296 insertions(+), 147 deletions(-) diff --git a/comfy_extras/nodes_sam3d_body.py b/comfy_extras/nodes_sam3d_body.py index ff4328de0..00dbafa61 100644 --- a/comfy_extras/nodes_sam3d_body.py +++ b/comfy_extras/nodes_sam3d_body.py @@ -34,6 +34,7 @@ from comfy_extras.sam3d_body.utils import image_to_uint8 SAM3TrackData = io.Custom("SAM3_TRACK_DATA") MHRPoseData = io.Custom("MHR_POSE_DATA") # mhr_model_params, shape_params, expr_params, MHR70 keypoint layout, canonical_colors keyed to MHR mesh, hand_vert_mask from MHR LBS). +KimodoPoseData = io.Custom("KIMODO_POSE_DATA") # external Y-up rig (ComfyUI-Kimodo); carries per-frame pred_vertices/pred_cam_t/canonical_colors so the mesh rasterizer is rig-agnostic. SAM3DBodyModel = io.Custom("SAM3D_BODY_MODEL") # Loader @@ -827,10 +828,18 @@ class SAM3DBody_Render(io.ComfyNode): def define_schema(cls): return io.Schema( node_id="SAM3DBody_Render", - display_name="Render SAM3D Body", + display_name="Render 3D Body Pose", + search_aliases=["Render SAM3D Body", "sam3d render", "kimodo render"], category="image/detection", inputs=[ - MHRPoseData.Input("mhr_pose_data"), + io.MultiType.Input( + "pose_data", types=[MHRPoseData, KimodoPoseData], + tooltip=( + "MHR pose data, or external Y-up rig pose data (KimodoSample). " + "All render styles work for external rigs that carry OpenPose " + "joint maps in their _skeleton_override (KimodoSample does)." + ), + ), io.Image.Input( "background", optional=True, @@ -882,11 +891,11 @@ class SAM3DBody_Render(io.ComfyNode): @classmethod - def execute(cls, mhr_pose_data, background=None, width=0, height=0, camera_info=None, render_style=None) -> io.NodeOutput: + def execute(cls, pose_data, background=None, width=0, height=0, camera_info=None, render_style=None) -> io.NodeOutput: render_style = render_style or {"render_style": "mesh"} mode_key = render_style.get("render_style", "mesh") - native_H, native_W = mhr_pose_data["image_size"] + native_H, native_W = pose_data["image_size"] new_W, new_H = int(width), int(height) if new_W == 0 and new_H == 0: H, W = native_H, native_W @@ -896,14 +905,14 @@ class SAM3DBody_Render(io.ComfyNode): new_W = max(1, round(native_W * new_H / native_H)) elif new_H == 0: new_H = max(1, round(native_H * new_W / native_W)) - mhr_pose_data = _scale_pose_data(mhr_pose_data, new_H, new_W) + pose_data = _scale_pose_data(pose_data, new_H, new_W) H, W = new_H, new_W px_scale = min(new_W / native_W, new_H / native_H) if camera_info is not None: - mhr_pose_data = apply_camera_override(mhr_pose_data, camera_info, H, W) + pose_data = apply_camera_override(pose_data, camera_info, H, W) - B = len(mhr_pose_data["frames"]) + B = len(pose_data["frames"]) if B == 0: return io.NodeOutput(torch.zeros(1, H, W, 3, dtype=torch.float32)) @@ -951,11 +960,11 @@ class SAM3DBody_Render(io.ComfyNode): region = str(render_style.get("region", "full_body")) if region == "hands_only": - hand_mask = mhr_pose_data["hand_vert_mask"] - faces_full = np.asarray(mhr_pose_data["faces"]) + hand_mask = pose_data["hand_vert_mask"] + faces_full = np.asarray(pose_data["faces"]) keep = hand_mask[faces_full].all(axis=1) - mhr_pose_data = dict(mhr_pose_data) - mhr_pose_data["faces"] = np.ascontiguousarray( + pose_data = dict(pose_data) + pose_data["faces"] = np.ascontiguousarray( faces_full[keep], dtype=faces_full.dtype, ) else: # silhouette — no shader/opacity controls, mask is binary @@ -980,7 +989,7 @@ class SAM3DBody_Render(io.ComfyNode): bg_f = bg_t[min(f, bg_t.shape[0] - 1)] if mode_key == "openpose_2d": img = render_pose_data_openpose( - mhr_pose_data, frame_idx=f, W=W, H=H, + pose_data, frame_idx=f, W=W, H=H, background=bg_f, composite=composite, marker_radius_px=marker_radius_px, @@ -993,7 +1002,7 @@ class SAM3DBody_Render(io.ComfyNode): ) elif mode_key == "openpose_3d": img = render_pose_data_capsules( - mhr_pose_data, frame_idx=f, W=W, H=H, + pose_data, frame_idx=f, W=W, H=H, background=bg_f, composite=composite, radius_m=op3d_radius_m, @@ -1005,7 +1014,7 @@ class SAM3DBody_Render(io.ComfyNode): elif mode_key == "scail": # SCAIL renders body as 3D capsules + 2D openpose hands on top img = render_pose_data_capsules( - mhr_pose_data, frame_idx=f, W=W, H=H, + pose_data, frame_idx=f, W=W, H=H, background=bg_f, composite=composite, radius_m=cap_radius_m, @@ -1017,7 +1026,7 @@ class SAM3DBody_Render(io.ComfyNode): scail_overlay_px = max(1, int(round(4 * px_scale))) scail_face_px = max(1, int(round(1 * px_scale))) img = render_pose_data_openpose( - mhr_pose_data, frame_idx=f, W=W, H=H, + pose_data, frame_idx=f, W=W, H=H, background=img, composite="over", include_body=False, @@ -1031,7 +1040,7 @@ class SAM3DBody_Render(io.ComfyNode): ) else: img = render_pose_data( - mhr_pose_data, frame_idx=f, W=W, H=H, + pose_data, frame_idx=f, W=W, H=H, background=bg_f, composite=composite, opacity=opacity, shader_preset=shader_key, rainbow_tilt_x_deg=rainbow_tilt_x, diff --git a/comfy_extras/nodes_save_3d.py b/comfy_extras/nodes_save_3d.py index 05454242b..80103908b 100644 --- a/comfy_extras/nodes_save_3d.py +++ b/comfy_extras/nodes_save_3d.py @@ -684,8 +684,13 @@ class BuildPoseFile(IO.ComfyNode): fmt = format.get("format", "glb") if fmt == "bvh": - if sam3d_body_model is None: - raise ValueError("Create 3D Animation: 'bvh' format needs the `sam3d_body_model` input.") + # External rigs (e.g. Kimodo) supply pose_data["_skeleton_override"] + has_external_rig = isinstance(pose_data, dict) and ("_skeleton_override" in pose_data) + if sam3d_body_model is None and not has_external_rig: + raise ValueError( + "Create 3D Animation: 'bvh' format needs the `sam3d_body_model` input OR a " + "`_skeleton_override` dict in pose_data (e.g. from KimodoSample)." + ) # BVH carries one skeleton; -1 (all tracks) collapses to the first. ti = int(track_index) if ti < 0: diff --git a/comfy_extras/sam3d_body/export/bvh.py b/comfy_extras/sam3d_body/export/bvh.py index 906631977..e690a7b86 100644 --- a/comfy_extras/sam3d_body/export/bvh.py +++ b/comfy_extras/sam3d_body/export/bvh.py @@ -16,10 +16,9 @@ from typing import Any, Dict, List import numpy as np from .glb_shared import ( - bind_skel_state, + Rig, bone_locals_from_globals, collect_tracks, - extract_rig_static, global_skel_state_from_pose_data, quat_sign_fix_per_joint, unflip, @@ -49,9 +48,14 @@ def _quat_to_zxy_euler_deg(quat: np.ndarray) -> np.ndarray: return out.astype(np.float32) -def _find_bvh_root(parents: np.ndarray) -> int: +def _find_bvh_root(parents: np.ndarray, is_external: bool = False) -> int: """First child of the rig's world anchor so the static origin→body stick - bone gets left out. Falls back to the first root joint.""" + bone gets left out. Falls back to the first root joint. + + MHR's joint 0 is a static world anchor whose single child is the pelvis, so + skipping it is correct. External rigs (e.g. SOMA-77) whose root is already + the articulated body root with multiple child chains must keep the root — + descending into one child would drop the sibling limbs from the BVH.""" NJ = parents.shape[0] world_anchors = [j for j in range(NJ) if not (0 <= int(parents[j]) < NJ and int(parents[j]) != j)] @@ -64,6 +68,8 @@ def _find_bvh_root(parents: np.ndarray) -> int: children[p].append(j) wa = world_anchors[0] if children[wa]: + if is_external and len(children[wa]) > 1: + return wa return children[wa][0] return wa @@ -80,7 +86,7 @@ def _build_children_map(parents: np.ndarray) -> List[List[int]]: def build_bvh( pose_data: Dict[str, Any], - model: Any, + model: Any = None, *, fps: float = 24.0, camera_translation: str = "off", @@ -89,6 +95,10 @@ def build_bvh( ) -> bytes: """Build a BVH file from pose_data. Returns UTF-8 encoded text bytes. + `model` may be None when pose_data carries a `_skeleton_override` (external + rigs, e.g. Kimodo); the rig hierarchy/offsets/bind are read from the + override instead of the MHR model. + `units` is "cm" (default, standard mocap convention) or "m". Affects the OFFSET and root-position values; rotations are independent of units. """ @@ -96,9 +106,10 @@ def build_bvh( raise ValueError(f"build_bvh: units must be 'cm' or 'm', got {units!r}") unit_scale = 100.0 if units == "cm" else 1.0 - rig_static = extract_rig_static(model) - NJ = int(rig_static["num_joints"]) - parents = rig_static["parents"] + rig = Rig.from_pose_data(pose_data, model) + is_external = not rig.can_rerun_fk + NJ = rig.num_joints + parents = rig.parents frames = pose_data["frames"] tracks = collect_tracks(pose_data, track_index) @@ -109,16 +120,16 @@ def build_bvh( if n_frames == 0: raise ValueError("build_bvh: track has zero frames") - body_root = _find_bvh_root(parents) + body_root = _find_bvh_root(parents, is_external) children_map = _build_children_map(parents) # Bone OFFSETs come from MHR's translation_offsets (joint position # relative to parent in parent's local-bind frame). For the BVH root, # we use its bind world position so the skeleton sits at the right # spot when imported. - bind_global = bind_skel_state(model) # (NJ, 8) cm + bind_global = rig.bind_global_cm # (NJ, 8) cm bind_pos_m = bind_global[:, :3].astype(np.float64) * 0.01 # (NJ, 3) m - offset_m = rig_static["joint_translation_offsets"].astype(np.float64) * 0.01 + offset_m = rig.joint_offsets_cm.astype(np.float64) * 0.01 # DFS order rooted at body_root — matches per-frame channel order. bvh_order: List[int] = [] @@ -133,6 +144,7 @@ def build_bvh( # treated as the hierarchy root in BVH-space. rig_global_m = global_skel_state_from_pose_data( pose_data, frame_indices, person_k, NJ, + joint_coords_y_down=rig.per_frame_y_down, ) rig_global_m[..., 3:7] = quat_sign_fix_per_joint(rig_global_m[..., 3:7]) bvh_parents = parents.copy() diff --git a/comfy_extras/sam3d_body/export/capsules.py b/comfy_extras/sam3d_body/export/capsules.py index 1a3d85b32..50fe33a5f 100644 --- a/comfy_extras/sam3d_body/export/capsules.py +++ b/comfy_extras/sam3d_body/export/capsules.py @@ -18,13 +18,11 @@ import comfy.model_management from .glb_shared import ( OPENPOSE_18_PAIRS, - OPENPOSE18_TO_MHR70, OPENPOSE_RAINBOW_18, SCAIL_LIMB_COLORS_17, OPENPOSE_HAND_PAIRS, - OPENPOSE_HAND21_TO_MHR70_R, - OPENPOSE_HAND21_TO_MHR70_L, OPENPOSE_HAND_COLORS_21, + openpose_render_keypoints, ) @@ -37,6 +35,7 @@ def _limb_palette_rgb01(palette: str) -> np.ndarray: def _build_specs_from_pose( persons: List[Dict[str, Any]], + pose_data: Dict[str, Any], *, include_hands: bool, palette: str, @@ -61,17 +60,14 @@ def _build_specs_from_pose( falloff = max(0.0, min(1.0, float(person_brightness_falloff))) for k, person in enumerate(persons): - kp2d_full = person.get("pred_keypoints_3d") cam_t = person.get("pred_cam_t") - if kp2d_full is None or cam_t is None: - continue - kp = np.asarray(kp2d_full, dtype=np.float32) - if kp.ndim != 2 or kp.shape[1] != 3 or kp.shape[0] < 70: + body_op = openpose_render_keypoints(person, pose_data, "body", dim=3) + if body_op is None or cam_t is None: continue cam_t_np = np.asarray(cam_t, dtype=np.float32).reshape(3) - # pred_keypoints_3d is camera frame (Y-down post-flip); add cam_t to - # place the subject in front of the camera. - kp_cam = kp + cam_t_np[None, :] + # op-keypoints are camera frame (Y-down); add cam_t to place the + # subject in front of the camera. + body_kp = body_op + cam_t_np[None, :] pastel = 0.0 if k == 0 else (1.0 - falloff ** k) @@ -83,7 +79,6 @@ def _build_specs_from_pose( # SCAIL skips face bones (13..16) and redirects limb 12 into a short # head stub blending spine + neck→nose direction. body_limb_count = 13 if palette == "scail" else len(OPENPOSE_18_PAIRS) - body_kp = kp_cam[OPENPOSE18_TO_MHR70] spine_dir = None if palette == "scail": mid_hip = 0.5 * (body_kp[8] + body_kp[11]) # 8=RHip, 11=LHip @@ -117,10 +112,11 @@ def _build_specs_from_pose( dtype=np.float32)) if include_hands: - r_kp = kp_cam[OPENPOSE_HAND21_TO_MHR70_R] - l_kp = kp_cam[OPENPOSE_HAND21_TO_MHR70_L] + hand_ops = [openpose_render_keypoints(person, pose_data, p, dim=3) + for p in ("hand_r", "hand_l")] + hand_kps = [h + cam_t_np[None, :] for h in hand_ops if h is not None] for limb_i, (a, b) in enumerate(OPENPOSE_HAND_PAIRS): - for hand_kp in (r_kp, l_kp): + for hand_kp in hand_kps: sa, sb = hand_kp[a], hand_kp[b] if not (np.all(np.isfinite(sa)) and np.all(np.isfinite(sb))): continue @@ -380,7 +376,7 @@ def render_pose_data_capsules( cx, cy = W * 0.5, H * 0.5 starts_np, ends_np, colors_np, is_hand_np = _build_specs_from_pose( - persons, include_hands=include_hands, palette=palette, + persons, pose_data, include_hands=include_hands, palette=palette, person_brightness_falloff=person_brightness_falloff, ) diff --git a/comfy_extras/sam3d_body/export/glb_openpose.py b/comfy_extras/sam3d_body/export/glb_openpose.py index b1e456d53..666aab69e 100644 --- a/comfy_extras/sam3d_body/export/glb_openpose.py +++ b/comfy_extras/sam3d_body/export/glb_openpose.py @@ -40,6 +40,7 @@ from .glb_shared import ( gaussian_smooth_positions, make_lit_material, quat_sign_fix_per_joint, + resolve_openpose_keypoints_from_joints, rotation_align, rotmat_to_quat_np, select_face_landmark_vert_ids, @@ -109,7 +110,7 @@ def _openpose_bind_at_rig_rest( rest_pos = np.asarray(override["bind_global_m"], dtype=np.float32)[:, :3] op18_w = override.get("openpose18_joint_weights") parts: List[np.ndarray] = [ - _resolve_openpose_keypoints_from_joints( + resolve_openpose_keypoints_from_joints( rest_pos, np.asarray(op18, dtype=np.int64), weights=None if op18_w is None else np.asarray(op18_w, dtype=np.float32), ) @@ -121,11 +122,11 @@ def _openpose_bind_at_rig_rest( return None op21_r_w = override.get("openpose_hand21_r_joint_weights") op21_l_w = override.get("openpose_hand21_l_joint_weights") - parts.append(_resolve_openpose_keypoints_from_joints( + parts.append(resolve_openpose_keypoints_from_joints( rest_pos, np.asarray(op21_r, dtype=np.int64), weights=None if op21_r_w is None else np.asarray(op21_r_w, dtype=np.float32), )) - parts.append(_resolve_openpose_keypoints_from_joints( + parts.append(resolve_openpose_keypoints_from_joints( rest_pos, np.asarray(op21_l, dtype=np.int64), weights=None if op21_l_w is None else np.asarray(op21_l_w, dtype=np.float32), )) @@ -137,39 +138,6 @@ def _openpose_bind_at_rig_rest( return np.concatenate(parts, axis=0).astype(np.float32) -def _resolve_openpose_keypoints_from_joints( - joints: np.ndarray, mapping: np.ndarray, - weights: Optional[np.ndarray] = None, -) -> np.ndarray: - """Resolve a `(K, 2)` joint-index → keypoint mapping against a per-frame - `(J, 3)` joint-position array. - - Row `(a, b)` with `b == -1` uses `joints[a]` directly (any weight ignored). - Row `(a, b)` with `b >= 0` returns `w * joints[a] + (1 - w) * joints[b]`: - - default (weights=None): `w = 0.5` → plain midpoint, useful for - keypoints that genuinely lie between two joints (Nose ≈ midpoint of - eyes). - - explicit `w` outside [0, 1] EXTRAPOLATES past the line segment, which - is how we approximate landmarks that have no rig joint AND no - in-between joint pair (Ears ≈ RightEye + (RightEye − LeftEye), i.e. - `w_a = 2.0` along the eye→ear axis).""" - a = mapping[:, 0].astype(np.int64) - b = mapping[:, 1].astype(np.int64) - pos_a = joints[a] - has_b = b >= 0 - if not has_b.any(): - return pos_a.astype(np.float32, copy=False) - b_safe = np.where(has_b, b, a) - pos_b = joints[b_safe] - if weights is None: - w_a = np.where(has_b, 0.5, 1.0).astype(np.float32) - else: - w_a = np.where(has_b, np.asarray(weights, dtype=np.float32), 1.0) - w_b = (1.0 - w_a).astype(np.float32) - out = pos_a * w_a[:, None] + pos_b * w_b[:, None] - return out.astype(np.float32, copy=False) - - def _extract_openpose_keypoints( pose_data: Dict[str, Any], frame_indices: List[int], person_k: int, ) -> np.ndarray: @@ -219,7 +187,7 @@ def _extract_openpose_keypoints( f"missing at frame={t}, track={person_k}." ) joints = np.asarray(person["pred_joint_coords"], dtype=np.float32) - out[t_idx] = _resolve_openpose_keypoints_from_joints( + out[t_idx] = resolve_openpose_keypoints_from_joints( joints, op18, weights=op18_w, ) return out @@ -306,10 +274,10 @@ def _extract_openpose_hand_keypoints( "per-frame `pred_joint_coords` for hands." ) joints = np.asarray(person["pred_joint_coords"], dtype=np.float32) - out[t_idx, :21] = _resolve_openpose_keypoints_from_joints( + out[t_idx, :21] = resolve_openpose_keypoints_from_joints( joints, op21_r, weights=op21_r_w, ) - out[t_idx, 21:] = _resolve_openpose_keypoints_from_joints( + out[t_idx, 21:] = resolve_openpose_keypoints_from_joints( joints, op21_l, weights=op21_l_w, ) return out diff --git a/comfy_extras/sam3d_body/export/glb_shared.py b/comfy_extras/sam3d_body/export/glb_shared.py index 0935fe7f2..374c7b64c 100644 --- a/comfy_extras/sam3d_body/export/glb_shared.py +++ b/comfy_extras/sam3d_body/export/glb_shared.py @@ -16,6 +16,7 @@ from __future__ import annotations import json import struct +from dataclasses import dataclass, field from typing import Any, Dict, List, Optional, Tuple import numpy as np @@ -505,9 +506,15 @@ def extract_rig_static(model: Any, pose_data: Optional[Dict[str, Any]] = None) - # so we don't need MHR's PCA pose / expression bases. parents = np.asarray(override["parents"], dtype=np.int32) rest_v = np.asarray(override["rest_verts_m"], dtype=np.float32) + # BVH needs parent-relative bone OFFSETs (cm). MHR ships these directly; + # external rigs only give bind globals, so derive locals from them. + bind_global_m = np.asarray(override["bind_global_m"], dtype=np.float32) + local_bind = bone_locals_from_globals(bind_global_m[None], parents)[0] + joint_translation_offsets = (local_bind[:, :3] * 100.0).astype(np.float32) return { "parents": parents, "parents_pmi": parents, + "joint_translation_offsets": joint_translation_offsets, # (NJ, 3) cm "lbs_compact_joints": np.asarray(override["lbs_compact_joints"], dtype=np.uint16), "lbs_compact_weights": np.asarray(override["lbs_compact_weights"], dtype=np.float32), "lbs_compact_max_inf": int(override.get("lbs_compact_max_inf", 4)), @@ -737,6 +744,77 @@ def bind_skel_state(model: Any, pose_data: Optional[Dict[str, Any]] = None) -> n return global_skel_state_per_frame(model, zero_mp)[0] +@dataclass +class Rig: + """Normalized static rig for the GLB/BVH exporters, independent of where it + came from: an MHR model (`Rig.from_pose_data(pose_data, model)`) or an inline + `pose_data["_skeleton_override"]` (external rigs, e.g. ComfyUI-Kimodo). + + Consumers read these fields and never branch on the source. The only + source-dependent operation is `rest_verts_m` — MHR rest verts depend on the + subject's `shape_params`; external rigs ship fixed rest verts. + """ + parents: np.ndarray # (NJ,) int32, -1 = root + joint_offsets_cm: np.ndarray # (NJ, 3) parent-relative bind offsets, cm + bind_global_cm: np.ndarray # (NJ, 8) bind global [t cm | q xyzw | s] + lbs_joints: np.ndarray # (V, 8) uint16 — compacted skin influences + lbs_weights: np.ndarray # (V, 8) f32 + lbs_max_inf: int # ≤ 8; lets callers skip JOINTS_1 when ≤ 4 + faces: np.ndarray # (F, 3) uint32 + num_joints: int + num_verts: int + num_expr: int # 0 = no face morphs + per_frame_y_down: bool # pred_joint_coords stored y-down (MHR) vs y-up (external) + can_rerun_fk: bool # True = per-frame FK re-runnable from mhr_model_params + expr_basis: Optional[np.ndarray] = None # (E, V, 3) cm — MHR only + _model: Any = None + _rest_override: Optional[np.ndarray] = None # (V, 3) m — external only + + @property + def bind_global_m(self) -> np.ndarray: + b = self.bind_global_cm.astype(np.float32).copy() + b[:, :3] *= 0.01 + return b + + def rest_verts_m(self, shape_params: np.ndarray) -> np.ndarray: + """Zero-pose rest verts (V, 3) in rig-native Y-up metres.""" + if self._rest_override is not None: + return self._rest_override + return zero_pose_rest_verts(self._model, shape_params) + + @classmethod + def from_pose_data(cls, pose_data: Optional[Dict[str, Any]], model: Any = None) -> "Rig": + rs = extract_rig_static(model, pose_data) + external = bool(rs.get("_external", False)) + if external: + joints8 = np.asarray(rs["lbs_compact_joints"], dtype=np.uint16) + weights8 = np.asarray(rs["lbs_compact_weights"], dtype=np.float32) + max_inf = int(rs["lbs_compact_max_inf"]) + override = _get_skeleton_override(pose_data) or {} + per_y_down = bool(override.get("per_frame_y_down", False)) + rest_override = np.asarray(override["rest_verts_m"], dtype=np.float32) + expr_basis = None + else: + joints8, weights8, max_inf = compact_skin_to_n( + rs["lbs_skin_indices"], rs["lbs_vert_indices"], + rs["lbs_skin_weights"], int(rs["num_verts"]), max_inf=8, + ) + per_y_down = True + rest_override = None + expr_basis = rs["expr_basis"] if int(rs["num_expr"]) > 0 else None + return cls( + parents=np.asarray(rs["parents"], dtype=np.int32), + joint_offsets_cm=np.asarray(rs["joint_translation_offsets"], dtype=np.float32), + bind_global_cm=np.asarray(bind_skel_state(model, pose_data), dtype=np.float32), + lbs_joints=joints8, lbs_weights=weights8, lbs_max_inf=max_inf, + faces=np.asarray(rs["faces"], dtype=np.uint32), + num_joints=int(rs["num_joints"]), num_verts=int(rs["num_verts"]), + num_expr=int(rs["num_expr"]), + per_frame_y_down=per_y_down, can_rerun_fk=not external, + expr_basis=expr_basis, _model=model, _rest_override=rest_override, + ) + + def ibp_from_bind_global(bind_skel_state_m: np.ndarray) -> np.ndarray: """Inverse-bind MAT4 by inverting the rig's bind global (meters). Guarantees IBP[j] = inverse(FK over bind local TRS) — exactly what glTF skinning @@ -1069,6 +1147,87 @@ DWPOSE_HAND_COLORS_21 = np.tile( ) +def resolve_openpose_keypoints_from_joints( + joints: np.ndarray, mapping: np.ndarray, weights: Optional[np.ndarray] = None, +) -> np.ndarray: + """(K, 2) joint-index map resolved against (J, D) joint positions -> (K, D). + Row (a, b): b == -1 uses joints[a]; b >= 0 returns w*joints[a]+(1-w)*joints[b] + (w defaults 0.5 = midpoint; w outside [0, 1] extrapolates past the segment).""" + a = mapping[:, 0].astype(np.int64) + b = mapping[:, 1].astype(np.int64) + pos_a = joints[a] + has_b = b >= 0 + if not has_b.any(): + return pos_a.astype(np.float32, copy=False) + b_safe = np.where(has_b, b, a) + pos_b = joints[b_safe] + if weights is None: + w_a = np.where(has_b, 0.5, 1.0).astype(np.float32) + else: + w_a = np.where(has_b, np.asarray(weights, dtype=np.float32), 1.0) + w_b = (1.0 - w_a).astype(np.float32) + out = pos_a * w_a[:, None] + pos_b * w_b[:, None] + return out.astype(np.float32, copy=False) + + +# part -> (override map key, override weight key, MHR70 reindex map) +_OPENPOSE_RENDER_MAPS = { + "body": ("openpose18_joint_indices", "openpose18_joint_weights", OPENPOSE18_TO_MHR70), + "hand_r": ("openpose_hand21_r_joint_indices", "openpose_hand21_r_joint_weights", OPENPOSE_HAND21_TO_MHR70_R), + "hand_l": ("openpose_hand21_l_joint_indices", "openpose_hand21_l_joint_weights", OPENPOSE_HAND21_TO_MHR70_L), +} + + +def openpose_render_keypoints( + person: Dict[str, Any], pose_data: Optional[Dict[str, Any]], part: str, + *, dim: int, H: int = 0, W: int = 0, +) -> Optional[np.ndarray]: + """OpenPose keypoints for one person, in op-layout, CAMERA frame (Y-down). + `part` in {'body','hand_r','hand_l'}. dim=3 -> (K, 3) metres pre-cam_t-add; + dim=2 -> (K, 2) image pixels. Returns None when the source data is missing. + + External rigs (override carries the joint-index map) resolve from per-frame + `pred_joint_coords` (rig-native Y-up -> flipped to camera Y-down, matching + the pred_vertices convention). MHR reindexes the stored + `pred_keypoints_{3d,2d}` via the MHR70 map.""" + map_key, w_key, mhr_map = _OPENPOSE_RENDER_MAPS[part] + override = _get_skeleton_override(pose_data) + ext_map = override.get(map_key) if override is not None else None + + if ext_map is not None: + joints = person.get("pred_joint_coords") + if joints is None: + return None + w = override.get(w_key) + kp3d = resolve_openpose_keypoints_from_joints( + np.asarray(joints, dtype=np.float32), + np.asarray(ext_map, dtype=np.int64), + None if w is None else np.asarray(w, dtype=np.float32), + ).copy() + kp3d[:, 1] *= -1.0 # rig-native Y-up -> camera Y-down + kp3d[:, 2] *= -1.0 + if dim == 3: + return kp3d + cam_t = person.get("pred_cam_t") + focal = person.get("focal_length") + if cam_t is None or focal is None: + return None + pts3 = kp3d + np.asarray(cam_t, dtype=np.float32).reshape(1, 3) + z = np.maximum(pts3[:, 2:3], 1e-6) + f = float(np.asarray(focal, dtype=np.float32).reshape(-1)[0]) + xy = pts3[:, :2] * f + np.array([W * 0.5, H * 0.5], dtype=np.float32)[None, :] * z + return (xy / z).astype(np.float32) + + key = "pred_keypoints_3d" if dim == 3 else "pred_keypoints_2d" + kp_full = person.get(key) + if kp_full is None: + return None + kp_full = np.asarray(kp_full, dtype=np.float32) + if kp_full.ndim != 2 or kp_full.shape[0] < 70: + return None + return kp_full[mhr_map] + + # Face landmarks from the MHR rig (option `face_source="rig"`). # MHR has no face bones — face deforms via expr_params morphs — so landmarks # are sourced from `pred_vertices` at fixed vertex IDs picked by NN against diff --git a/comfy_extras/sam3d_body/export/glb_skeletal.py b/comfy_extras/sam3d_body/export/glb_skeletal.py index 6204c1067..b3d3fdfa8 100644 --- a/comfy_extras/sam3d_body/export/glb_skeletal.py +++ b/comfy_extras/sam3d_body/export/glb_skeletal.py @@ -24,14 +24,12 @@ import numpy as np from .glb_shared import ( GLBWriter, + Rig, bake_vertex_colors, - bind_skel_state, bone_locals_from_globals, collect_tracks, - compact_skin_to_n, compute_normals, compute_pastel_mix, - extract_rig_static, flat_shade_mesh, gaussian_smooth_quats, global_skel_state_from_pose_data, @@ -41,7 +39,6 @@ from .glb_shared import ( quat_sign_fix_per_joint, rotation_align, unflip, - zero_pose_rest_verts, ) from comfy_extras.sam3d_body.utils import jet_colormap @@ -220,30 +217,22 @@ def build_glb_skeletal( if not tracks: raise ValueError("build_glb_skeletal: no valid tracks in pose_data") - rig_static = extract_rig_static(model, pose_data) - NJ = rig_static["num_joints"] - NV = rig_static["num_verts"] - NEXPR = rig_static["num_expr"] - parents = rig_static["parents"] - is_external = bool(rig_static.get("_external", False)) - if is_external: + rig = Rig.from_pose_data(pose_data, model) + NJ = rig.num_joints + NV = rig.num_verts + NEXPR = rig.num_expr + parents = rig.parents + if not rig.can_rerun_fk: # External rigs have no PCA pose params to re-run; only stored globals - # are available, and kimodo stores joint coords already Y-up. + # are available, and they store joint coords already Y-up. use_stored_global_rots = True - joint_coords_y_down = not is_external - # Compact sparse skinning to 8 influences per vertex into glTF's two - # JOINTS_*/WEIGHTS_* sets. MHR averages ~2.8 influences/vert but some - # shoulder/hip verts have 5-8 where multiple joints cancel — keeping only - # 4 there leaks per-bone rotation noise into the rendered mesh. - if is_external: - joints_8 = rig_static["lbs_compact_joints"] - weights_8 = rig_static["lbs_compact_weights"] - actual_max_inf = rig_static["lbs_compact_max_inf"] - else: - joints_8, weights_8, actual_max_inf = compact_skin_to_n( - rig_static["lbs_skin_indices"], rig_static["lbs_vert_indices"], - rig_static["lbs_skin_weights"], NV, max_inf=8, - ) + joint_coords_y_down = rig.per_frame_y_down + # Skinning is already compacted to ≤8 influences per vertex (MHR averages + # ~2.8 but some shoulder/hip verts hit 5-8; keeping only 4 there leaks + # per-bone rotation noise into the rendered mesh). + joints_8 = rig.lbs_joints + weights_8 = rig.lbs_weights + actual_max_inf = rig.lbs_max_inf joints_set0 = np.ascontiguousarray(joints_8[:, :4]) weights_set0 = np.ascontiguousarray(weights_8[:, :4]) use_set1 = actual_max_inf > 4 @@ -252,10 +241,8 @@ def build_glb_skeletal( # Derive bone locals from the rig's bind globals rather than recomputing # FK ourselves, so any mismatch between `parents` and the rig's actual FK # is absorbed into the local TRS instead of producing wrong globals. - bind_global_cm = bind_skel_state(model, pose_data) - bind_global_m = bind_global_cm.copy().astype(np.float32) - bind_global_m[:, :3] *= 0.01 - bind_local = bone_locals_from_globals(bind_global_m[None], rig_static["parents"])[0] + bind_global_m = rig.bind_global_m + bind_local = bone_locals_from_globals(bind_global_m[None], parents)[0] # IBP = inverse of bind global. With bone defaults set to bind_local and # FK composed via `parents`, skin_matrix at rest = identity. @@ -280,7 +267,7 @@ def build_glb_skeletal( expr_morph_accs: List[int] = [] if include_face_morphs and NEXPR > 0: - eb = rig_static["expr_basis"].astype(np.float32) * 0.01 + eb = rig.expr_basis.astype(np.float32) * 0.01 for e in range(NEXPR): expr_morph_accs.append(w.add_vec3_f32_no_minmax(eb[e])) @@ -329,16 +316,14 @@ def build_glb_skeletal( body_mesh_node_idx: Optional[int] = None if include_body: - # External rigs have no PCA shape — `zero_pose_rest_verts` short- - # circuits to `pose_data["_skeleton_override"]["rest_verts_m"]`, - # so zeroed shape_params is safe there. - if is_external: - shape_params_arr = np.zeros(0, dtype=np.float32) - else: - shape_params_arr = np.asarray( - frames[frame_indices[0]][person_k]["shape_params"], dtype=np.float32, - ) - rest_v = zero_pose_rest_verts(model, shape_params_arr, pose_data=pose_data) + # MHR rest verts depend on the subject's shape_params; external rigs + # ship fixed rest verts and ignore the arg (so the empty external + # `shape_params` is harmless). + shape_params_arr = np.asarray( + frames[frame_indices[0]][person_k].get("shape_params", []), + dtype=np.float32, + ) + rest_v = rig.rest_verts_m(shape_params_arr) normals = compute_normals(rest_v, faces_native) positions_acc = w.add_vec3_f32(rest_v) normals_acc = w.add_vec3_f32(normals) @@ -393,7 +378,7 @@ def build_glb_skeletal( color_idx_per_vert: Optional[np.ndarray] = None hw = float(bone_vis_radius_m) bv_v, bv_n, bv_f, bv_j, bv_w, child_per_vert = _build_bone_octahedrons_mesh( - bind_global_m[:, :3], rig_static["parents"], half_width_m=hw, + bind_global_m[:, :3], parents, half_width_m=hw, ) if bv_v.shape[0] > 0: F = bv_f.shape[0] @@ -458,7 +443,7 @@ def build_glb_skeletal( # local translation (t_local inherits parent sign via q_parent_inv) # and produces visible "axis resets" mid-animation. rig_global_m[..., 3:7] = quat_sign_fix_per_joint(rig_global_m[..., 3:7]) - bone_local_anim = bone_locals_from_globals(rig_global_m, rig_static["parents"]) + bone_local_anim = bone_locals_from_globals(rig_global_m, parents) local_t = bone_local_anim[..., :3].astype(np.float32) local_q = bone_local_anim[..., 3:7].astype(np.float32) local_s = bone_local_anim[..., 7].astype(np.float32) diff --git a/comfy_extras/sam3d_body/export/openpose_2d.py b/comfy_extras/sam3d_body/export/openpose_2d.py index 4656a9435..8f5c4ab81 100644 --- a/comfy_extras/sam3d_body/export/openpose_2d.py +++ b/comfy_extras/sam3d_body/export/openpose_2d.py @@ -19,10 +19,8 @@ from PIL import Image from comfy_extras.pose.keypoint_draw import KeypointDraw from .glb_shared import ( - OPENPOSE18_TO_MHR70, - OPENPOSE_HAND21_TO_MHR70_L, - OPENPOSE_HAND21_TO_MHR70_R, OPENPOSE_HAND_COLORS_21, + openpose_render_keypoints, select_face_landmark_vert_ids, ) @@ -53,32 +51,31 @@ def _project_face_landmarks_2d( def _pack_dwpose_134( - person: Dict[str, Any], *, include_body: bool, include_hands: bool, + person: Dict[str, Any], pose_data: Dict[str, Any], *, + include_body: bool, include_hands: bool, H: int, W: int, ) -> Tuple[np.ndarray, np.ndarray]: """Pack a SAM3D person dict into (kp, scores): (134, 2) DWPose-layout coords + (134,) confidence. Face slot (24-91) is left zeroed; face dots are drawn separately so SAM3D's 238-sapiens / rig-fallback counts work. - Non-finite or out-of-band entries get score=0 and are filtered downstream.""" + Non-finite or out-of-band entries get score=0 and are filtered downstream. + + Keypoints come from the shared provider: MHR reindexes `pred_keypoints_2d`, + external rigs (Kimodo) resolve + project from `pred_joint_coords`.""" kp = np.zeros((134, 2), dtype=np.float32) scores = np.zeros(134, dtype=np.float32) - kp2d_full = person.get("pred_keypoints_2d") - if kp2d_full is None: - return kp, scores - kp2d = np.asarray(kp2d_full, dtype=np.float32) - if kp2d.ndim != 2 or kp2d.shape[1] != 2 or kp2d.shape[0] < 70: - return kp, scores - if include_body: - body_xy = kp2d[OPENPOSE18_TO_MHR70] - finite = np.isfinite(body_xy).all(axis=1) - kp[:18][finite] = body_xy[finite] - scores[:18][finite] = 1.0 + body_xy = openpose_render_keypoints(person, pose_data, "body", dim=2, H=H, W=W) + if body_xy is not None: + finite = np.isfinite(body_xy).all(axis=1) + kp[:18][finite] = body_xy[finite] + scores[:18][finite] = 1.0 if include_hands: - for slot_start, mhr_idx in ((92, OPENPOSE_HAND21_TO_MHR70_R), - (113, OPENPOSE_HAND21_TO_MHR70_L)): - hand_xy = kp2d[mhr_idx] + for slot_start, part in ((92, "hand_r"), (113, "hand_l")): + hand_xy = openpose_render_keypoints(person, pose_data, part, dim=2, H=H, W=W) + if hand_xy is None: + continue finite = np.isfinite(hand_xy).all(axis=1) kp[slot_start:slot_start + 21][finite] = hand_xy[finite] scores[slot_start:slot_start + 21][finite] = 1.0 @@ -190,7 +187,8 @@ def render_pose_data_openpose( pre = canvas.copy() if pastel > 0 else None kp134, scores134 = _pack_dwpose_134( - person, include_body=include_body, include_hands=include_hands, + person, pose_data, include_body=include_body, + include_hands=include_hands, H=H, W=W, ) _KD.draw_wholebody_keypoints( canvas, kp134, scores=scores134, threshold=0.5, diff --git a/comfy_extras/sam3d_body/utils.py b/comfy_extras/sam3d_body/utils.py index 40bdc6a97..dda3b6d04 100644 --- a/comfy_extras/sam3d_body/utils.py +++ b/comfy_extras/sam3d_body/utils.py @@ -178,6 +178,12 @@ def apply_camera_override(mhr_pose_data: Dict[str, Any], camera_info: Dict[str, center = np.array([W * 0.5, H * 0.5], dtype=np.float32) reproj = {"pred_keypoints_3d": "pred_keypoints_2d", "pred_face_keypoints_3d": "pred_face_keypoints_2d"} + # External rigs (e.g. Kimodo) store pred_joint_coords rig-native Y-up; the + # render openpose/scail keypoint provider resolves from them and flips Y/Z. + # Transform them through the camera too (in camera space, then back to Y-up) + # so those keypoints follow the override instead of staying in the old frame. + override = mhr_pose_data.get("_skeleton_override") + joints_y_up = override is not None and not bool(override.get("per_frame_y_down", False)) new_frames: List[List[Dict[str, Any]]] = [] for frame in mhr_pose_data["frames"]: scaled = [] @@ -197,6 +203,17 @@ def apply_camera_override(mhr_pose_data: Dict[str, Any], camera_info: Dict[str, if k in reproj: # re-project the new 3D to 2D image coords z = np.maximum(cam[..., 2:3], 1e-6) p[reproj[k]] = (cam[..., :2] * new_focal / z + center).astype(np.float32) + jc = p.get("pred_joint_coords") + if jc is not None: + jc = np.asarray(jc, dtype=np.float32).copy() + if joints_y_up: + jc[..., 1] *= -1.0 + jc[..., 2] *= -1.0 + jc = (jc + cam_t - eye) @ R.T + if joints_y_up: + jc[..., 1] *= -1.0 + jc[..., 2] *= -1.0 + p["pred_joint_coords"] = jc.astype(np.float32) p["pred_cam_t"] = np.zeros(3, dtype=np.float32) p["focal_length"] = np.array(new_focal, dtype=np.float32) scaled.append(p)