mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2026-06-24 00:39:30 +08:00
1192 lines
52 KiB
Python
1192 lines
52 KiB
Python
"""GLB export — OpenPose 18-keypoint visualization mode.
|
||
|
||
Independent of the MHR rig — sourced from pose_data's `pred_keypoints_3d`
|
||
(the model's regressed surface keypoints). Each track becomes an armature
|
||
with a sibling joint per keypoint; sphere markers + stick/capsule limbs are
|
||
skinned to those joints.
|
||
|
||
Optional hand keypoints (also from `pred_keypoints_3d`, indices 21..62) and
|
||
face landmarks (sampled from `pred_vertices` at fixed head-mesh vertex IDs)
|
||
extend the same armature.
|
||
|
||
OpenPose-shared tables / palettes / mappings live in `glb_shared.py` and are
|
||
imported below — they're also used by the 2D and 3D renderers in this package.
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
from typing import Any, Dict, List, Optional, Tuple
|
||
|
||
import numpy as np
|
||
|
||
from .glb_shared import (
|
||
DWPOSE_HAND_COLORS_21,
|
||
FACE_LANDMARK_COLORS,
|
||
FACE_LANDMARK_TARGETS,
|
||
GLBWriter,
|
||
OPENPOSE18_TO_MHR70,
|
||
OPENPOSE_18_NAMES,
|
||
OPENPOSE_18_PAIRS,
|
||
OPENPOSE_HAND21_NAMES,
|
||
OPENPOSE_HAND21_TO_MHR70_L,
|
||
OPENPOSE_HAND21_TO_MHR70_R,
|
||
OPENPOSE_HAND_COLORS_21,
|
||
OPENPOSE_HAND_PAIRS,
|
||
OPENPOSE_RAINBOW_18,
|
||
SCAIL_KEYPOINT_COLORS_18,
|
||
SCAIL_LIMB_COLORS_17,
|
||
collect_tracks,
|
||
flat_shade_mesh,
|
||
gaussian_smooth_positions,
|
||
make_lit_material,
|
||
quat_sign_fix_per_joint,
|
||
rotation_align,
|
||
rotmat_to_quat_np,
|
||
select_face_landmark_vert_ids,
|
||
smooth_shade_mesh,
|
||
unflip,
|
||
uv_sphere_unit,
|
||
)
|
||
|
||
|
||
def _finalize_skinned_mesh(
|
||
verts: np.ndarray, faces: np.ndarray,
|
||
joints: np.ndarray, weights: np.ndarray, vert_colors: np.ndarray,
|
||
smooth_shade: bool,
|
||
) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
|
||
"""Apply smooth or flat shading to an indexed sphere/stick group mesh and
|
||
pack per-vertex colors. Smooth keeps the indexed mesh + per-vertex colors;
|
||
flat duplicates verts per face and gathers face-corner colors."""
|
||
if smooth_shade:
|
||
v_f, n_f, f_f, j_f, w_f = smooth_shade_mesh(verts, faces, joints, weights)
|
||
return v_f, n_f, f_f, j_f, w_f, vert_colors.astype(np.float32)
|
||
F = faces.shape[0]
|
||
pre_faces = faces.copy()
|
||
v_f, n_f, f_f, j_f, w_f = flat_shade_mesh(verts, faces, joints, weights)
|
||
c_f = np.zeros((F * 3, 3), dtype=np.float32)
|
||
for k in range(3):
|
||
c_f[k::3] = vert_colors[pre_faces[:, k]]
|
||
return v_f, n_f, f_f, j_f, w_f, c_f
|
||
|
||
|
||
def _pair_colors_from_kp(
|
||
pairs: Tuple[Tuple[int, int], ...], kp_colors: np.ndarray, endpoint: int = 1,
|
||
) -> np.ndarray:
|
||
"""Per-limb color = endpoint-vertex color from `kp_colors`. Default
|
||
`endpoint=1` picks the second (distal) vertex of each pair, which is
|
||
the OpenPose-canonical per-finger gradient when fingers go base→tip
|
||
(wrist=0 → thumb1=1 → thumb2=2 …)."""
|
||
n = len(pairs)
|
||
out = np.zeros((n, 3), dtype=np.float32)
|
||
for i, (a, b) in enumerate(pairs):
|
||
out[i] = kp_colors[b if endpoint == 1 else a]
|
||
return out
|
||
|
||
|
||
def _openpose_bind_at_rig_rest(
|
||
pose_data: Dict[str, Any], *,
|
||
include_hands: bool, face_vert_ids: Optional[np.ndarray],
|
||
) -> Optional[np.ndarray]:
|
||
"""OpenPose keypoint positions at the rig's REST pose (T-pose at authoring
|
||
origin), built from the `_skeleton_override`'s `bind_global_m` (joint rest
|
||
TRS) and `rest_verts_m` (mesh rest verts for face landmarks).
|
||
|
||
Used as the static-bind for openpose-mode geometry so the GLB's static
|
||
POSITION attribute sits at rig origin — matching skeletal mode's bind and
|
||
producing the same 'snap from rest to scene-frame-0' transition at the
|
||
start of playback. Without this, the static geometry is at scene-frame-0
|
||
(kp_seq[0]) and viewers that auto-fit on static POSITION will center on
|
||
the scene location, hiding the per-frame motion.
|
||
|
||
Returns None when the override is missing or doesn't carry all the needed
|
||
mappings — caller falls back to per-frame extraction (kp_seq[0])."""
|
||
override = pose_data.get("_skeleton_override") if isinstance(pose_data, dict) else None
|
||
if override is None or "bind_global_m" not in override:
|
||
return None
|
||
op18 = override.get("openpose18_joint_indices")
|
||
if op18 is None:
|
||
return None
|
||
rest_pos = np.asarray(override["bind_global_m"], dtype=np.float32)[:, :3]
|
||
op18_w = override.get("openpose18_joint_weights")
|
||
parts: List[np.ndarray] = [
|
||
_resolve_openpose_keypoints_from_joints(
|
||
rest_pos, np.asarray(op18, dtype=np.int64),
|
||
weights=None if op18_w is None else np.asarray(op18_w, dtype=np.float32),
|
||
)
|
||
]
|
||
if include_hands:
|
||
op21_r = override.get("openpose_hand21_r_joint_indices")
|
||
op21_l = override.get("openpose_hand21_l_joint_indices")
|
||
if op21_r is None or op21_l is None:
|
||
return None
|
||
op21_r_w = override.get("openpose_hand21_r_joint_weights")
|
||
op21_l_w = override.get("openpose_hand21_l_joint_weights")
|
||
parts.append(_resolve_openpose_keypoints_from_joints(
|
||
rest_pos, np.asarray(op21_r, dtype=np.int64),
|
||
weights=None if op21_r_w is None else np.asarray(op21_r_w, dtype=np.float32),
|
||
))
|
||
parts.append(_resolve_openpose_keypoints_from_joints(
|
||
rest_pos, np.asarray(op21_l, dtype=np.int64),
|
||
weights=None if op21_l_w is None else np.asarray(op21_l_w, dtype=np.float32),
|
||
))
|
||
if face_vert_ids is not None:
|
||
rest_verts = override.get("rest_verts_m")
|
||
if rest_verts is None:
|
||
return None
|
||
parts.append(np.asarray(rest_verts, dtype=np.float32)[face_vert_ids])
|
||
return np.concatenate(parts, axis=0).astype(np.float32)
|
||
|
||
|
||
def _resolve_openpose_keypoints_from_joints(
|
||
joints: np.ndarray, mapping: np.ndarray,
|
||
weights: Optional[np.ndarray] = None,
|
||
) -> np.ndarray:
|
||
"""Resolve a `(K, 2)` joint-index → keypoint mapping against a per-frame
|
||
`(J, 3)` joint-position array.
|
||
|
||
Row `(a, b)` with `b == -1` uses `joints[a]` directly (any weight ignored).
|
||
Row `(a, b)` with `b >= 0` returns `w * joints[a] + (1 - w) * joints[b]`:
|
||
- default (weights=None): `w = 0.5` → plain midpoint, useful for
|
||
keypoints that genuinely lie between two joints (Nose ≈ midpoint of
|
||
eyes).
|
||
- explicit `w` outside [0, 1] EXTRAPOLATES past the line segment, which
|
||
is how we approximate landmarks that have no rig joint AND no
|
||
in-between joint pair (Ears ≈ RightEye + (RightEye − LeftEye), i.e.
|
||
`w_a = 2.0` along the eye→ear axis)."""
|
||
a = mapping[:, 0].astype(np.int64)
|
||
b = mapping[:, 1].astype(np.int64)
|
||
pos_a = joints[a]
|
||
has_b = b >= 0
|
||
if not has_b.any():
|
||
return pos_a.astype(np.float32, copy=False)
|
||
b_safe = np.where(has_b, b, a)
|
||
pos_b = joints[b_safe]
|
||
if weights is None:
|
||
w_a = np.where(has_b, 0.5, 1.0).astype(np.float32)
|
||
else:
|
||
w_a = np.where(has_b, np.asarray(weights, dtype=np.float32), 1.0)
|
||
w_b = (1.0 - w_a).astype(np.float32)
|
||
out = pos_a * w_a[:, None] + pos_b * w_b[:, None]
|
||
return out.astype(np.float32, copy=False)
|
||
|
||
|
||
def _extract_openpose_keypoints(
|
||
pose_data: Dict[str, Any], frame_indices: List[int], person_k: int,
|
||
) -> np.ndarray:
|
||
"""(N, 18, 3) OpenPose keypoint positions in rig-native Y-up metres.
|
||
|
||
Two sources, in priority order:
|
||
|
||
1. **External-skeleton path** — when pose_data has `_skeleton_override`
|
||
with `openpose18_joint_indices` ((18, 2) int32, see
|
||
`_resolve_openpose_keypoints_from_joints`), synthesize from each
|
||
person's `pred_joint_coords` directly. The override frame is already
|
||
rig-native Y-up, so no axis flip.
|
||
2. **MHR70 path** (default for SAM3DBody_Predict output) — re-index the
|
||
first 70 of 308 MHR keypoints (`pred_keypoints_3d`) to COCO-18.
|
||
Stored y-down (post `j3d[..., [1,2]] *= -1` in sam3d_body), so we
|
||
un-flip y/z to match rig-native Y-up.
|
||
"""
|
||
frames = pose_data["frames"]
|
||
N = len(frame_indices)
|
||
out = np.zeros((N, 18, 3), dtype=np.float32)
|
||
|
||
override = pose_data.get("_skeleton_override") if isinstance(pose_data, dict) else None
|
||
op18 = override.get("openpose18_joint_indices") if override is not None else None
|
||
if op18 is not None:
|
||
op18 = np.asarray(op18, dtype=np.int64)
|
||
if op18.ndim != 2 or op18.shape != (18, 2):
|
||
raise ValueError(
|
||
"build_glb_openpose: `openpose18_joint_indices` in "
|
||
"`_skeleton_override` must be shape (18, 2); got "
|
||
f"{tuple(op18.shape)}. Each row is (joint_a, joint_b); "
|
||
"use joint_b=-1 for single-joint keypoints."
|
||
)
|
||
op18_w = override.get("openpose18_joint_weights")
|
||
if op18_w is not None:
|
||
op18_w = np.asarray(op18_w, dtype=np.float32)
|
||
if op18_w.shape != (18,):
|
||
raise ValueError(
|
||
"build_glb_openpose: `openpose18_joint_weights` must be "
|
||
f"shape (18,); got {tuple(op18_w.shape)}."
|
||
)
|
||
for t_idx, t in enumerate(frame_indices):
|
||
person = frames[t][person_k]
|
||
if "pred_joint_coords" not in person:
|
||
raise ValueError(
|
||
"build_glb_openpose: external-skeleton path needs "
|
||
"per-frame `pred_joint_coords` (J, 3) on each person; "
|
||
f"missing at frame={t}, track={person_k}."
|
||
)
|
||
joints = np.asarray(person["pred_joint_coords"], dtype=np.float32)
|
||
out[t_idx] = _resolve_openpose_keypoints_from_joints(
|
||
joints, op18, weights=op18_w,
|
||
)
|
||
return out
|
||
|
||
for t_idx, t in enumerate(frame_indices):
|
||
person = frames[t][person_k]
|
||
if "pred_keypoints_3d" not in person:
|
||
# Diagnose the source: external-skeleton producers ship
|
||
# `_skeleton_override` instead of MHR70 keypoints. If the
|
||
# producer didn't populate `openpose18_joint_indices` either,
|
||
# we can't synthesize the 18-keypoint set.
|
||
if override is not None:
|
||
raise ValueError(
|
||
"build_glb_openpose: this pose_data carries "
|
||
"`_skeleton_override` but it doesn't include "
|
||
"`openpose18_joint_indices` and the per-frame person "
|
||
"dict is missing `pred_keypoints_3d`. Ask the upstream "
|
||
"node to populate `openpose18_joint_indices` on the "
|
||
"override (a (18, 2) int32 mapping into its joint list), "
|
||
"or switch SAM3DBody_ToGLB to `skeletal` mode."
|
||
)
|
||
present_keys = sorted(person.keys())
|
||
raise ValueError(
|
||
"build_glb_openpose: pose_data is missing "
|
||
"`pred_keypoints_3d` (frame=%d, track=%d). Keys present "
|
||
"on this person: %s. Re-run SAM3DBody_Predict — older "
|
||
"saved pose_data may pre-date the field, and any "
|
||
"intermediate node that rebuilds person dicts must "
|
||
"preserve it."
|
||
% (t, person_k, present_keys)
|
||
)
|
||
kp = np.asarray(person["pred_keypoints_3d"], dtype=np.float32)
|
||
out[t_idx] = kp[OPENPOSE18_TO_MHR70]
|
||
out[..., 1] *= -1.0
|
||
out[..., 2] *= -1.0
|
||
return out
|
||
|
||
|
||
def _extract_openpose_hand_keypoints(
|
||
pose_data: Dict[str, Any], frame_indices: List[int], person_k: int,
|
||
) -> np.ndarray:
|
||
"""(N, 42, 3) right+left OpenPose hand keypoints (21 + 21) in rig-native
|
||
Y-up frame.
|
||
|
||
External-skeleton path: requires `openpose_hand21_r_joint_indices` AND
|
||
`openpose_hand21_l_joint_indices` ((21, 2) int32 each) in the override.
|
||
Resolved against per-frame `pred_joint_coords` like the body path.
|
||
|
||
MHR70 path: re-orders `pred_keypoints_3d` indices 21..62 to OpenPose-21
|
||
(wrist + 5 fingers, thumb→pinky, base→tip)."""
|
||
frames = pose_data["frames"]
|
||
N = len(frame_indices)
|
||
out = np.zeros((N, 42, 3), dtype=np.float32)
|
||
|
||
override = pose_data.get("_skeleton_override") if isinstance(pose_data, dict) else None
|
||
op21_r = override.get("openpose_hand21_r_joint_indices") if override is not None else None
|
||
op21_l = override.get("openpose_hand21_l_joint_indices") if override is not None else None
|
||
if override is not None and (op21_r is not None or op21_l is not None):
|
||
if op21_r is None or op21_l is None:
|
||
raise ValueError(
|
||
"build_glb_openpose: external skeleton must supply BOTH "
|
||
"`openpose_hand21_r_joint_indices` and "
|
||
"`openpose_hand21_l_joint_indices` for include_hands=True."
|
||
)
|
||
op21_r = np.asarray(op21_r, dtype=np.int64)
|
||
op21_l = np.asarray(op21_l, dtype=np.int64)
|
||
for arr, side in ((op21_r, "r"), (op21_l, "l")):
|
||
if arr.ndim != 2 or arr.shape != (21, 2):
|
||
raise ValueError(
|
||
f"build_glb_openpose: `openpose_hand21_{side}_joint_indices` "
|
||
f"must be shape (21, 2); got {tuple(arr.shape)}."
|
||
)
|
||
op21_r_w = override.get("openpose_hand21_r_joint_weights")
|
||
op21_l_w = override.get("openpose_hand21_l_joint_weights")
|
||
op21_r_w = (np.asarray(op21_r_w, dtype=np.float32)
|
||
if op21_r_w is not None else None)
|
||
op21_l_w = (np.asarray(op21_l_w, dtype=np.float32)
|
||
if op21_l_w is not None else None)
|
||
for t_idx, t in enumerate(frame_indices):
|
||
person = frames[t][person_k]
|
||
if "pred_joint_coords" not in person:
|
||
raise ValueError(
|
||
"build_glb_openpose: external-skeleton path needs "
|
||
"per-frame `pred_joint_coords` for hands."
|
||
)
|
||
joints = np.asarray(person["pred_joint_coords"], dtype=np.float32)
|
||
out[t_idx, :21] = _resolve_openpose_keypoints_from_joints(
|
||
joints, op21_r, weights=op21_r_w,
|
||
)
|
||
out[t_idx, 21:] = _resolve_openpose_keypoints_from_joints(
|
||
joints, op21_l, weights=op21_l_w,
|
||
)
|
||
return out
|
||
|
||
for t_idx, t in enumerate(frame_indices):
|
||
person = frames[t][person_k]
|
||
if "pred_keypoints_3d" not in person:
|
||
if override is not None:
|
||
raise ValueError(
|
||
"build_glb_openpose: include_hands=True with an external "
|
||
"skeleton needs `openpose_hand21_r_joint_indices` and "
|
||
"`openpose_hand21_l_joint_indices` on `_skeleton_override`. "
|
||
"Disable hands or ask the upstream node to populate them."
|
||
)
|
||
raise ValueError(
|
||
"build_glb_openpose: pose_data is missing `pred_keypoints_3d`."
|
||
)
|
||
kp = np.asarray(person["pred_keypoints_3d"], dtype=np.float32)
|
||
out[t_idx, :21] = kp[OPENPOSE_HAND21_TO_MHR70_R]
|
||
out[t_idx, 21:] = kp[OPENPOSE_HAND21_TO_MHR70_L]
|
||
out[..., 1] *= -1.0
|
||
out[..., 2] *= -1.0
|
||
return out
|
||
|
||
|
||
def _extract_face_landmarks_from_verts(
|
||
pose_data: Dict[str, Any], frame_indices: List[int], person_k: int,
|
||
vert_ids: np.ndarray,
|
||
) -> np.ndarray:
|
||
"""(N, K_face, 3) face landmarks sampled from per-frame `pred_vertices`
|
||
at the supplied head-mesh vertex IDs, unflipped to MHR-native Y-up.
|
||
Each landmark inherits per-frame shape/expr/pose deformation for free
|
||
since `pred_vertices` already has it baked in."""
|
||
frames = pose_data["frames"]
|
||
N = len(frame_indices)
|
||
K = int(vert_ids.shape[0])
|
||
out = np.zeros((N, K, 3), dtype=np.float32)
|
||
for t_idx, t in enumerate(frame_indices):
|
||
person = frames[t][person_k]
|
||
if "pred_vertices" not in person:
|
||
raise ValueError(
|
||
"build_glb_openpose: face_source='rig' needs `pred_vertices` "
|
||
"on every frame — re-run Predict to populate it."
|
||
)
|
||
v = np.asarray(person["pred_vertices"], dtype=np.float32).reshape(-1, 3)
|
||
out[t_idx] = v[vert_ids]
|
||
out[..., 1] *= -1.0
|
||
out[..., 2] *= -1.0
|
||
return out
|
||
|
||
|
||
def _build_openpose_spheres(
|
||
bind_kp_m: np.ndarray, radius_m: float, kp_colors: np.ndarray,
|
||
base_joint_idx: int = 0,
|
||
smooth_shade: bool = False,
|
||
joint_indices: Optional[np.ndarray] = None,
|
||
) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
|
||
"""UV sphere per OpenPose keypoint, rigidly skinned to that keypoint's
|
||
joint, vertex-colored from kp_colors. `base_joint_idx` is added to the
|
||
emitted JOINTS_0 indices so callers can place this group at any offset
|
||
in the shared skin (body=0, right hand=18, etc.). `joint_indices` (when
|
||
given) overrides that with explicit per-sphere joint indices, so callers
|
||
can skip keypoints (e.g. SCAIL head dots).
|
||
|
||
`smooth_shade=True` keeps the indexed mesh and writes per-vertex
|
||
normals via face-normal averaging — round shading on the spheres.
|
||
`smooth_shade=False` (default) flat-shades by duplicating verts per
|
||
face, matching the existing OpenPose-mode look. Returns
|
||
(verts, normals, faces, joints4, weights4, vert_colors)."""
|
||
sv, sf = uv_sphere_unit()
|
||
K = bind_kp_m.shape[0]
|
||
Nv = sv.shape[0]
|
||
Nf = sf.shape[0]
|
||
out_v = np.zeros((K * Nv, 3), dtype=np.float32)
|
||
out_n = np.zeros((K * Nv, 3), dtype=np.float32)
|
||
out_f = np.zeros((K * Nf, 3), dtype=np.uint32)
|
||
out_j = np.zeros((K * Nv, 4), dtype=np.uint16)
|
||
out_w = np.zeros((K * Nv, 4), dtype=np.float32)
|
||
out_c = np.zeros((K * Nv, 3), dtype=np.float32)
|
||
for j in range(K):
|
||
v_off = j * Nv
|
||
out_v[v_off:v_off + Nv] = sv * radius_m + bind_kp_m[j]
|
||
out_n[v_off:v_off + Nv] = sv
|
||
out_f[j * Nf:(j + 1) * Nf] = sf + v_off
|
||
out_j[v_off:v_off + Nv, 0] = int(joint_indices[j]) if joint_indices is not None else j + base_joint_idx
|
||
out_w[v_off:v_off + Nv, 0] = 1.0
|
||
out_c[v_off:v_off + Nv] = kp_colors[j]
|
||
return _finalize_skinned_mesh(out_v, out_f, out_j, out_w, out_c, smooth_shade)
|
||
|
||
|
||
def _capsule_mesh_local(
|
||
L: float, W: float, *,
|
||
n_cap_lat: Optional[int] = None,
|
||
n_body: Optional[int] = None,
|
||
n_lon: Optional[int] = None,
|
||
end_width_frac: float = 0.3,
|
||
shape: str = "ellipsoid",
|
||
) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
|
||
"""Build a per-limb mesh in limb-local frame along +Y from y=0 (head
|
||
pole) to y=L (tail pole).
|
||
|
||
`shape` selects the silhouette:
|
||
- 'ellipsoid' (default): tips are small hemispheres of radius
|
||
`W * end_width_frac`; body has ellipsoidal radius profile
|
||
sin(π*u) from w_end at the junctions to W at the middle. Gives
|
||
a fat-middle / narrow-end stretched-ellipse look.
|
||
- 'capsule': SCAIL-style "rig" limb — an OPEN cylinder of constant
|
||
radius W with no hemisphere caps. Pair with sphere joint markers
|
||
of the same radius so the spheres seamlessly cap the open
|
||
cylinder ends (the cylinder cross-section ring at the endpoint
|
||
lies exactly on the sphere surface). Drawing hemisphere caps
|
||
inside the joint sphere creates a visible bump where the cap
|
||
pokes out unevenly when sphere radius ≠ cap radius — open
|
||
cylinders avoid that.
|
||
|
||
Per-limb mesh is required because the cap height (w_end) depends on
|
||
the limb width — a single canonical mesh can't produce true
|
||
hemispheres for arbitrary L:W ratios in ellipsoid mode.
|
||
|
||
Returns:
|
||
verts: (Nv, 3) float32 — limb-local positions in meters.
|
||
faces: (Nf, 3) uint32 — triangle indices.
|
||
weights: (Nv, 2) float32 — (head, tail) skinning weights, linearly
|
||
interpolated by axial position (sums to 1).
|
||
"""
|
||
W = max(1e-6, min(float(W), float(L) * 0.5 - 1e-6))
|
||
if str(shape) == "capsule":
|
||
# SCAIL-style "rig" limb: an OPEN cylinder of constant radius W,
|
||
# no hemisphere caps. The sphere joint markers at each endpoint
|
||
# provide the rounded ends of the bone — when sphere_radius ==
|
||
# cylinder_radius, the cylinder cross-section ring at the bone
|
||
# endpoint lies exactly on the sphere surface, so silhouette is
|
||
# seamless. Hemisphere caps would create a visible bump where
|
||
# the cap pokes out of the sphere if cap_r ≠ marker_r, so we
|
||
# omit them entirely.
|
||
cap_r = 0.0
|
||
body_r = W
|
||
if n_cap_lat is None:
|
||
n_cap_lat = 0
|
||
if n_body is None:
|
||
n_body = 0
|
||
if n_lon is None:
|
||
n_lon = 16
|
||
elif str(shape) == "ellipsoid":
|
||
end_frac = float(min(0.95, max(0.05, end_width_frac)))
|
||
cap_r = max(1e-7, W * end_frac)
|
||
body_r = W
|
||
# Ellipsoid defaults: more body rings to sample the sin(π·u) curve.
|
||
if n_cap_lat is None:
|
||
n_cap_lat = 3
|
||
if n_body is None:
|
||
n_body = 7
|
||
if n_lon is None:
|
||
n_lon = 12
|
||
else:
|
||
raise ValueError(
|
||
f"_capsule_mesh_local: unknown shape={shape!r} "
|
||
"(expected 'ellipsoid' or 'capsule')"
|
||
)
|
||
if 2.0 * cap_r >= L:
|
||
cap_r = max(0.0, L * 0.5 - 1e-6)
|
||
body_len = float(L) - 2.0 * cap_r
|
||
n_cap_lat = max(0, int(n_cap_lat))
|
||
n_body = max(0, int(n_body))
|
||
n_lon = max(3, int(n_lon))
|
||
|
||
has_caps = n_cap_lat > 0
|
||
|
||
verts: List[List[float]] = []
|
||
head_pole = -1
|
||
tail_pole = -1
|
||
head_rings: List[int] = []
|
||
tail_rings: List[int] = []
|
||
|
||
if has_caps:
|
||
# Head pole vertex at y=0 (south pole of head hemisphere).
|
||
head_pole = len(verts)
|
||
verts.append([0.0, 0.0, 0.0])
|
||
# Head cap rings (i = 1..n_cap_lat). Last ring (i=n_cap_lat,
|
||
# theta=π/2) is the head-body junction at y=cap_r, r=cap_r.
|
||
for i in range(1, n_cap_lat + 1):
|
||
theta = (np.pi * 0.5) * i / n_cap_lat
|
||
y = cap_r * (1.0 - np.cos(theta))
|
||
r = cap_r * np.sin(theta)
|
||
head_rings.append(len(verts))
|
||
for k in range(n_lon):
|
||
phi = 2.0 * np.pi * k / n_lon
|
||
verts.append([r * float(np.cos(phi)), float(y), r * float(np.sin(phi))])
|
||
else:
|
||
# Open cylinder: no caps, no pole. Add an end ring at y=0 directly.
|
||
head_rings.append(len(verts))
|
||
for k in range(n_lon):
|
||
phi = 2.0 * np.pi * k / n_lon
|
||
verts.append([body_r * float(np.cos(phi)), 0.0, body_r * float(np.sin(phi))])
|
||
|
||
# Body intermediate rings (between the cap junctions for capped meshes,
|
||
# between the two end rings for open cylinders). For 'capsule' mode
|
||
# n_body=0 by default — no intermediate rings needed for a constant-
|
||
# radius cylinder.
|
||
body_rings: List[int] = []
|
||
is_ellipsoid = str(shape) == "ellipsoid"
|
||
for j in range(1, n_body + 1):
|
||
u = j / (n_body + 1)
|
||
y = cap_r + body_len * u
|
||
if is_ellipsoid:
|
||
r = cap_r + (body_r - cap_r) * float(np.sin(np.pi * u))
|
||
else:
|
||
r = body_r
|
||
body_rings.append(len(verts))
|
||
for k in range(n_lon):
|
||
phi = 2.0 * np.pi * k / n_lon
|
||
verts.append([r * float(np.cos(phi)), float(y), r * float(np.sin(phi))])
|
||
|
||
if has_caps:
|
||
# Tail cap rings (i = 0..n_cap_lat-1). First ring (i=0, theta=π/2)
|
||
# is the body-tail junction at y=L-cap_r, r=cap_r; last
|
||
# (i=n_cap_lat-1) is the ring just before the pole.
|
||
for i in range(0, n_cap_lat):
|
||
theta = (np.pi * 0.5) * (n_cap_lat - i) / n_cap_lat
|
||
y = float(L) - cap_r * (1.0 - np.cos(theta))
|
||
r = cap_r * np.sin(theta)
|
||
tail_rings.append(len(verts))
|
||
for k in range(n_lon):
|
||
phi = 2.0 * np.pi * k / n_lon
|
||
verts.append([r * float(np.cos(phi)), float(y), r * float(np.sin(phi))])
|
||
tail_pole = len(verts)
|
||
verts.append([0.0, float(L), 0.0])
|
||
else:
|
||
# Open cylinder end ring at y=L.
|
||
tail_rings.append(len(verts))
|
||
for k in range(n_lon):
|
||
phi = 2.0 * np.pi * k / n_lon
|
||
verts.append([body_r * float(np.cos(phi)), float(L), body_r * float(np.sin(phi))])
|
||
|
||
faces: List[List[int]] = []
|
||
|
||
if has_caps:
|
||
# Head pole fan — outward (-Y) normal at the south pole.
|
||
r0 = head_rings[0]
|
||
for k in range(n_lon):
|
||
a = r0 + k
|
||
b = r0 + (k + 1) % n_lon
|
||
faces.append([head_pole, a, b])
|
||
|
||
# All inter-ring quads in axial order.
|
||
all_rings = head_rings + body_rings + tail_rings
|
||
for i in range(len(all_rings) - 1):
|
||
rl = all_rings[i]
|
||
rh = all_rings[i + 1]
|
||
for k in range(n_lon):
|
||
a = rl + k
|
||
b = rl + (k + 1) % n_lon
|
||
c = rh + (k + 1) % n_lon
|
||
d = rh + k
|
||
faces.append([a, c, b])
|
||
faces.append([a, d, c])
|
||
|
||
if has_caps:
|
||
# Tail pole fan — outward (+Y) normal at the north pole.
|
||
rL = tail_rings[-1]
|
||
for k in range(n_lon):
|
||
a = rL + k
|
||
b = rL + (k + 1) % n_lon
|
||
faces.append([tail_pole, b, a])
|
||
|
||
v_arr = np.asarray(verts, dtype=np.float32)
|
||
weights = np.zeros((v_arr.shape[0], 2), dtype=np.float32)
|
||
weights[:, 1] = np.clip(v_arr[:, 1] / max(float(L), 1e-12), 0.0, 1.0)
|
||
weights[:, 0] = 1.0 - weights[:, 1]
|
||
|
||
return v_arr, np.asarray(faces, dtype=np.uint32), weights
|
||
|
||
|
||
def _scail_redirect_neck_stub(body_kp: np.ndarray) -> np.ndarray:
|
||
"""Replace the nose keypoint (idx 0) of a (...,18,3) array with a short
|
||
neck stub (0.6 spine + 0.4 neck→nose), matching the capsule render."""
|
||
out = body_kp.copy()
|
||
neck = body_kp[..., 1, :]
|
||
nose = body_kp[..., 0, :]
|
||
mid_hip = 0.5 * (body_kp[..., 8, :] + body_kp[..., 11, :])
|
||
|
||
def _unit(v):
|
||
return v / np.linalg.norm(v, axis=-1, keepdims=True).clip(min=1e-6)
|
||
|
||
nose_vec = nose - neck
|
||
nose_len = np.linalg.norm(nose_vec, axis=-1, keepdims=True)
|
||
mixed = _unit(0.6 * _unit(neck - mid_hip) + 0.4 * _unit(nose_vec))
|
||
out[..., 0, :] = neck + mixed * (nose_len * 0.5)
|
||
return out
|
||
|
||
|
||
def _openpose_limb_rest_trs(
|
||
bind_kp_m: np.ndarray, pairs: Tuple[Tuple[int, int], ...],
|
||
) -> Tuple[np.ndarray, np.ndarray]:
|
||
"""Per-limb rest TRS:
|
||
midpoints (K_pairs, 3): rest midpoint between bind_kp_m[a] and bind_kp_m[b].
|
||
rest_axes (K_pairs, 3): unit direction a→b at rest (or +Y if degenerate).
|
||
Caller uses `midpoints` as each limb joint's rest translation (rotation =
|
||
identity), and `rest_axes` to compute per-frame alignment rotations."""
|
||
K_pairs = len(pairs)
|
||
mid = np.zeros((K_pairs, 3), dtype=np.float32)
|
||
axis = np.zeros((K_pairs, 3), dtype=np.float32)
|
||
axis[:, 1] = 1.0
|
||
for k, (a, b) in enumerate(pairs):
|
||
a_pos = bind_kp_m[a]
|
||
b_pos = bind_kp_m[b]
|
||
mid[k] = 0.5 * (a_pos + b_pos)
|
||
d = b_pos - a_pos
|
||
n = float(np.linalg.norm(d))
|
||
if n > 1e-9:
|
||
axis[k] = d / n
|
||
return mid, axis
|
||
|
||
|
||
def _openpose_limb_anim_trs(
|
||
kp_seq: np.ndarray, pairs: Tuple[Tuple[int, int], ...], rest_axes: np.ndarray,
|
||
) -> Tuple[np.ndarray, np.ndarray]:
|
||
"""Per-frame limb TRS:
|
||
anim_mid (N, K_pairs, 3): midpoint of (kp_seq[t][a], kp_seq[t][b]).
|
||
anim_quat (N, K_pairs, 4): rotation (xyzw) that aligns each limb's rest
|
||
axis to its frame-t axis.
|
||
Together with rest TRS, this drives `skin_matrix(t) = T(mid_t) * R_t *
|
||
T(-mid_rest)` so each capsule rigidly rotates about its rest midpoint to
|
||
track the limb's current direction — no LBS cross-section thinning."""
|
||
N = kp_seq.shape[0]
|
||
K_pairs = len(pairs)
|
||
anim_mid = np.zeros((N, K_pairs, 3), dtype=np.float32)
|
||
R = np.tile(np.eye(3, dtype=np.float32), (N, K_pairs, 1, 1))
|
||
for k, (a, b) in enumerate(pairs):
|
||
ax_rest = rest_axes[k]
|
||
for t in range(N):
|
||
a_pos = kp_seq[t, a]
|
||
b_pos = kp_seq[t, b]
|
||
anim_mid[t, k] = 0.5 * (a_pos + b_pos)
|
||
d = b_pos - a_pos
|
||
n = float(np.linalg.norm(d))
|
||
if n > 1e-9:
|
||
R[t, k] = rotation_align(ax_rest, d / n)
|
||
quat = rotmat_to_quat_np(R).astype(np.float32) # (N, K_pairs, 4) xyzw
|
||
return anim_mid, quat
|
||
|
||
|
||
def _build_openpose_sticks(
|
||
bind_kp_m: np.ndarray, pairs: Tuple[Tuple[int, int], ...],
|
||
half_width_m: float, pair_colors: np.ndarray,
|
||
limb_joint_base_idx: int = 0,
|
||
shape: str = "ellipsoid",
|
||
smooth_shade: bool = False,
|
||
end_width_frac: float = 0.3,
|
||
) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
|
||
"""Capsule (cylinder + hemispherical caps) per limb pair (a, b).
|
||
|
||
Each limb gets its own mesh sized to that limb's length and width so
|
||
the caps are TRUE hemispheres of radius `half_width_eff` — the limb
|
||
silhouette is rounded-rectangle-like, regardless of L:W ratio. Width
|
||
auto-clamped to `length * 0.1` so short limbs (face/ear) don't look
|
||
chunky next to long ones.
|
||
|
||
Skinning: rigid (weight=1) binding to a per-limb joint at
|
||
`limb_joint_base_idx + limb_idx` — the caller animates that joint with
|
||
midpoint translation + rest-to-current rotation so each capsule rotates
|
||
rigidly with its limb (avoids translation-only LBS cross-section
|
||
thinning). Returns flat-shaded (verts, normals, faces, joints4,
|
||
weights4, vert_colors)."""
|
||
canonical = np.array([0.0, 1.0, 0.0], dtype=np.float32)
|
||
|
||
out_v_chunks: List[np.ndarray] = []
|
||
out_f_chunks: List[np.ndarray] = []
|
||
out_j_chunks: List[np.ndarray] = []
|
||
out_w_chunks: List[np.ndarray] = []
|
||
out_c_chunks: List[np.ndarray] = []
|
||
v_total = 0
|
||
WIDTH_RATIO = 0.1
|
||
MIN_WIDTH = 0.001
|
||
is_capsule = str(shape) == "capsule"
|
||
for limb_idx, (a, b) in enumerate(pairs):
|
||
head = bind_kp_m[a]
|
||
tail = bind_kp_m[b]
|
||
direction = tail - head
|
||
length = float(np.linalg.norm(direction))
|
||
if length < 1e-6:
|
||
continue
|
||
unit_dir = direction / length
|
||
R = rotation_align(canonical, unit_dir)
|
||
if is_capsule:
|
||
# SCAIL-style uniform radius — every bone gets the same width.
|
||
# `_capsule_mesh_local` clamps internally to L/2-eps so very
|
||
# short bones don't go degenerate.
|
||
half_width_eff = max(MIN_WIDTH, half_width_m)
|
||
else:
|
||
# Ellipsoid mode: original auto-thinning so short face/ear
|
||
# limbs don't look chunky next to long body limbs.
|
||
half_width_eff = max(MIN_WIDTH, min(length * WIDTH_RATIO, half_width_m))
|
||
|
||
v_local, f_local, _weights_unused = _capsule_mesh_local(
|
||
length, half_width_eff, shape=shape, end_width_frac=end_width_frac,
|
||
)
|
||
v_world = v_local @ R.T + head
|
||
Nv = v_local.shape[0]
|
||
|
||
# Rigid binding to the per-limb joint. The 2-bone (head, tail) weights
|
||
# from `_capsule_mesh_local` are discarded — they're translation-only
|
||
# under glTF LBS and don't rotate the cross-section, causing visible
|
||
# thinning when the limb axis changes between rest and animated pose.
|
||
j_arr = np.zeros((Nv, 4), dtype=np.uint16)
|
||
j_arr[:, 0] = limb_idx + limb_joint_base_idx
|
||
w_arr = np.zeros((Nv, 4), dtype=np.float32)
|
||
w_arr[:, 0] = 1.0
|
||
c_arr = np.tile(pair_colors[limb_idx], (Nv, 1)).astype(np.float32)
|
||
|
||
out_v_chunks.append(v_world)
|
||
out_f_chunks.append(f_local + v_total)
|
||
out_j_chunks.append(j_arr)
|
||
out_w_chunks.append(w_arr)
|
||
out_c_chunks.append(c_arr)
|
||
v_total += Nv
|
||
|
||
if not out_v_chunks:
|
||
return (np.zeros((0, 3), dtype=np.float32), np.zeros((0, 3), dtype=np.float32),
|
||
np.zeros((0, 3), dtype=np.uint32), np.zeros((0, 4), dtype=np.uint16),
|
||
np.zeros((0, 4), dtype=np.float32), np.zeros((0, 3), dtype=np.float32))
|
||
|
||
verts = np.concatenate(out_v_chunks, axis=0)
|
||
faces = np.concatenate(out_f_chunks, axis=0)
|
||
joints = np.concatenate(out_j_chunks, axis=0)
|
||
weights = np.concatenate(out_w_chunks, axis=0)
|
||
colors = np.concatenate(out_c_chunks, axis=0)
|
||
return _finalize_skinned_mesh(verts, faces, joints, weights, colors, smooth_shade)
|
||
|
||
|
||
def build_glb_openpose(
|
||
pose_data: Dict[str, Any],
|
||
*,
|
||
fps: float = 24.0,
|
||
camera_translation: str = "off",
|
||
track_index: int = -1,
|
||
marker_radius_m: float = 0.025,
|
||
stick_radius_m: float = 0.008,
|
||
include_hands: bool = False,
|
||
hand_marker_radius_m: float = 0.0,
|
||
hand_stick_radius_m: float = 0.0,
|
||
hand_color_style: str = "dwpose",
|
||
face_style: str = "disabled",
|
||
face_marker_radius_m: float = 0.0,
|
||
palette: str = "openpose",
|
||
shape: str = "ellipsoid",
|
||
smooth_shade: bool = False,
|
||
material_roughness: float = 0.85,
|
||
material_double_sided: bool = False,
|
||
stick_end_width_frac: float = 0.6,
|
||
bone_smooth_window: int = 0,
|
||
) -> bytes:
|
||
"""Build a GLB containing an OpenPose-style 3D skeleton — sphere markers
|
||
per keypoint plus rainbow-colored sticks between standard limb pairs.
|
||
Body keypoints are sourced from pose_data's `pred_keypoints_3d` (no rig
|
||
forward needed). Optional hand keypoints (also from `pred_keypoints_3d`)
|
||
and face landmarks (sampled from `pred_vertices` at fixed head-mesh
|
||
vertex IDs) extend the same per-track armature.
|
||
|
||
Args:
|
||
include_hands: append the standard 21+21 OpenPose hand keypoints to
|
||
each track's armature (right hand at MHR70 indices 21..41,
|
||
left at 42..62).
|
||
hand_marker_radius_m: per-hand sphere radius. 0 = auto = 0.4 ×
|
||
`marker_radius_m` (hand keypoints are anatomically smaller than
|
||
body joints; matches DWPose's smaller hand dots).
|
||
hand_stick_radius_m: per-hand limb half-width. 0 = auto = 0.5 ×
|
||
`stick_radius_m`.
|
||
hand_color_style: 'dwpose' (default) = solid-blue hand dots,
|
||
rainbow per-finger sticks (controlnet_aux/dwpose convention);
|
||
'openpose' = rainbow per-finger dots AND sticks (matches
|
||
poseParameters.cpp::HAND_COLORS_RENDER).
|
||
face_style: 'disabled' (default) | 'full' | 'eyes_mouth' — face
|
||
landmarks sampled from `pred_vertices` at vertex IDs picked from
|
||
`pose_data["canonical_colors"]["positions"]`. 'full' = all ~30
|
||
contour points; 'eyes_mouth' = the eyes + outer-lip subset.
|
||
face_marker_radius_m: per-face landmark sphere radius. 0 = auto =
|
||
0.3 × `marker_radius_m` — face landmarks are densely packed
|
||
around the eyes/mouth/jaw and need to be much smaller than
|
||
body keypoints to keep the layout legible. Face landmarks are
|
||
rendered as standalone dots (no contour lines), matching
|
||
DWPose's face_pose draw style.
|
||
palette: body color scheme. 'openpose' = standard rainbow gradient
|
||
per keypoint (canonical OpenPose convention); 'scail' =
|
||
SCAIL-Pose style — warm hues right side, cool hues left side,
|
||
grey neck-to-nose centerline, distinct per-limb colors.
|
||
"""
|
||
is_scail = str(palette) == "scail"
|
||
# SCAIL drops the face bones (13..16) and eye/ear spheres; keeps nose (idx 0,
|
||
# the neck-stub tip) to cap the open cylinder. Matches the capsule render.
|
||
body_pairs = OPENPOSE_18_PAIRS[:13] if is_scail else OPENPOSE_18_PAIRS
|
||
body_sphere_kp = (np.arange(14, dtype=np.int64)
|
||
if is_scail else np.arange(18, dtype=np.int64))
|
||
if str(palette) == "scail":
|
||
body_sphere_colors = SCAIL_KEYPOINT_COLORS_18
|
||
body_stick_colors = SCAIL_LIMB_COLORS_17
|
||
elif str(palette) == "openpose":
|
||
# Existing OpenPose behavior: same rainbow array used for both
|
||
# spheres (per-keypoint) and sticks (per-limb, indexed 0..16 of
|
||
# the 18-element rainbow — yields a legible per-limb gradient).
|
||
body_sphere_colors = OPENPOSE_RAINBOW_18
|
||
body_stick_colors = OPENPOSE_RAINBOW_18
|
||
else:
|
||
raise ValueError(
|
||
f"build_glb_openpose: unknown palette={palette!r} "
|
||
"(expected 'openpose' or 'scail')"
|
||
)
|
||
|
||
if float(hand_marker_radius_m) <= 0.0:
|
||
hand_marker_radius_m = float(marker_radius_m) * 0.4
|
||
if float(hand_stick_radius_m) <= 0.0:
|
||
hand_stick_radius_m = float(stick_radius_m) * 0.5
|
||
if float(face_marker_radius_m) <= 0.0:
|
||
face_marker_radius_m = float(marker_radius_m) * 0.3
|
||
if hand_color_style == "dwpose":
|
||
hand_sphere_colors = DWPOSE_HAND_COLORS_21
|
||
elif hand_color_style == "openpose":
|
||
hand_sphere_colors = OPENPOSE_HAND_COLORS_21
|
||
else:
|
||
raise ValueError(
|
||
f"build_glb_openpose: unknown hand_color_style="
|
||
f"{hand_color_style!r} (expected 'dwpose' or 'openpose')"
|
||
)
|
||
tracks = collect_tracks(pose_data, track_index)
|
||
if not tracks:
|
||
raise ValueError("build_glb_openpose: no valid tracks in pose_data")
|
||
|
||
# Eyes (6..13) + outer-lip ring (19..22) from FACE_LANDMARK_TARGETS.
|
||
_EYES_MOUTH_IDX = np.array([6, 7, 8, 9, 10, 11, 12, 13, 19, 20, 21, 22], dtype=np.int64)
|
||
face_vert_ids: Optional[np.ndarray] = None
|
||
face_target_idx = np.arange(len(FACE_LANDMARK_TARGETS), dtype=np.int64)
|
||
if face_style in ("full", "eyes_mouth"):
|
||
canonical_colors = pose_data.get("canonical_colors") or {}
|
||
positions = canonical_colors.get("positions")
|
||
if positions is None:
|
||
raise ValueError(
|
||
"build_glb_openpose: face_style needs "
|
||
"pose_data['canonical_colors']['positions'] (computed at "
|
||
"model load and attached by Predict). Ensure the SAM3DBody "
|
||
"Loader+Predict ran upstream of this node."
|
||
)
|
||
if face_style == "eyes_mouth":
|
||
face_target_idx = _EYES_MOUTH_IDX
|
||
face_vert_ids = select_face_landmark_vert_ids(
|
||
np.asarray(positions),
|
||
face_mask=canonical_colors.get("face_mask"),
|
||
)[face_target_idx]
|
||
elif face_style != "disabled":
|
||
raise ValueError(
|
||
f"build_glb_openpose: unknown face_style={face_style!r} "
|
||
"(expected 'disabled', 'full', or 'eyes_mouth')"
|
||
)
|
||
|
||
K_body = 18
|
||
K_hands = 42 if include_hands else 0
|
||
K_face = int(face_vert_ids.shape[0]) if face_vert_ids is not None else 0
|
||
K = K_body + K_hands + K_face
|
||
|
||
# Limb counts: one joint per stick pair. Limb joints carry translation +
|
||
# rotation so each capsule rotates rigidly with its limb (no LBS thinning).
|
||
K_body_limbs = len(body_pairs)
|
||
K_hand_limbs = len(OPENPOSE_HAND_PAIRS) if include_hands else 0
|
||
K_limbs = K_body_limbs + 2 * K_hand_limbs # face has no sticks
|
||
|
||
# Joint name list mirrors the keypoint stacking order: body → hands → face.
|
||
joint_names: List[str] = [f"openpose_{n}" for n in OPENPOSE_18_NAMES]
|
||
if include_hands:
|
||
joint_names.extend([f"openpose_R_{n}" for n in OPENPOSE_HAND21_NAMES])
|
||
joint_names.extend([f"openpose_L_{n}" for n in OPENPOSE_HAND21_NAMES])
|
||
if K_face > 0:
|
||
joint_names.extend([f"openpose_face_{FACE_LANDMARK_TARGETS[i][0]}"
|
||
for i in face_target_idx])
|
||
|
||
# Limb joint names, stacked body → R-hand → L-hand to match the limb
|
||
# joint ordering in skin.joints (after the K keypoint joints).
|
||
limb_names: List[str] = [
|
||
f"openpose_limb_{OPENPOSE_18_NAMES[a]}_{OPENPOSE_18_NAMES[b]}"
|
||
for (a, b) in body_pairs
|
||
]
|
||
if include_hands:
|
||
for side in ("R", "L"):
|
||
for (a, b) in OPENPOSE_HAND_PAIRS:
|
||
limb_names.append(
|
||
f"openpose_{side}hand_limb_"
|
||
f"{OPENPOSE_HAND21_NAMES[a]}_{OPENPOSE_HAND21_NAMES[b]}"
|
||
)
|
||
|
||
w = GLBWriter()
|
||
nodes: List[dict] = []
|
||
meshes: List[dict] = []
|
||
skins: List[dict] = []
|
||
materials: List[dict] = []
|
||
animations: List[dict] = []
|
||
scene_root_indices: List[int] = []
|
||
|
||
samplers: List[dict] = []
|
||
channels: List[dict] = []
|
||
for track_i, (person_k, frame_indices) in enumerate(tracks):
|
||
body_seq = _extract_openpose_keypoints(pose_data, frame_indices, person_k)
|
||
n_frames = body_seq.shape[0]
|
||
if n_frames == 0:
|
||
continue
|
||
|
||
seq_chunks: List[np.ndarray] = [body_seq]
|
||
if include_hands:
|
||
seq_chunks.append(_extract_openpose_hand_keypoints(
|
||
pose_data, frame_indices, person_k))
|
||
if face_vert_ids is not None:
|
||
seq_chunks.append(_extract_face_landmarks_from_verts(
|
||
pose_data, frame_indices, person_k, face_vert_ids))
|
||
kp_seq = np.concatenate(seq_chunks, axis=1) # (N, K, 3)
|
||
if bone_smooth_window and bone_smooth_window > 1:
|
||
kp_seq = gaussian_smooth_positions(kp_seq, int(bone_smooth_window))
|
||
|
||
# Static-bind = rig's REST pose when available (override path); else
|
||
# fall back to frame 0 of the motion. The rest-pose bind makes the
|
||
# GLB's static POSITION attribute sit at rig origin, so viewers
|
||
# auto-fit/center on rig origin and the animation visibly snaps from
|
||
# rest to scene-frame-0 — matching skeletal mode's behavior. Without
|
||
# this, openpose's static geometry is at scene-frame-0 and viewers
|
||
# mis-center on the scene location, masking the motion entirely.
|
||
bind_kp_m_rest = _openpose_bind_at_rig_rest(
|
||
pose_data, include_hands=include_hands, face_vert_ids=face_vert_ids,
|
||
)
|
||
bind_kp_m = (bind_kp_m_rest if bind_kp_m_rest is not None
|
||
else kp_seq[0].astype(np.float32))
|
||
|
||
if is_scail: # nose → neck stub, matching the capsule render
|
||
kp_seq[:, :K_body] = _scail_redirect_neck_stub(kp_seq[:, :K_body])
|
||
bind_kp_m[:K_body] = _scail_redirect_neck_stub(bind_kp_m[:K_body])
|
||
|
||
person_root: Dict[str, Any] = {"name": f"track{track_i:02d}", "children": []}
|
||
nodes.append(person_root)
|
||
person_root_idx = len(nodes) - 1
|
||
scene_root_indices.append(person_root_idx)
|
||
|
||
# K keypoint joint nodes (spheres bind here, rigid translation only).
|
||
joint_node_indices: List[int] = []
|
||
for j in range(K):
|
||
nodes.append({
|
||
"name": joint_names[j],
|
||
"translation": bind_kp_m[j].tolist(),
|
||
"rotation": [0.0, 0.0, 0.0, 1.0],
|
||
"scale": [1.0, 1.0, 1.0],
|
||
})
|
||
joint_node_indices.append(len(nodes) - 1)
|
||
person_root["children"].extend(joint_node_indices)
|
||
|
||
# Per-limb REST TRS (midpoint + axis) and per-frame TRS (midpoint +
|
||
# quaternion that aligns rest-axis → frame-t-axis). Sticks bind
|
||
# rigidly to these joints so each capsule rotates with its limb.
|
||
limb_rest_mids_list: List[np.ndarray] = []
|
||
limb_rest_axes_list: List[np.ndarray] = []
|
||
limb_anim_mids_list: List[np.ndarray] = []
|
||
limb_anim_quats_list: List[np.ndarray] = []
|
||
rmid_b, raxis_b = _openpose_limb_rest_trs(bind_kp_m[:K_body], body_pairs)
|
||
amid_b, aquat_b = _openpose_limb_anim_trs(kp_seq[:, :K_body], body_pairs, raxis_b)
|
||
limb_rest_mids_list.append(rmid_b)
|
||
limb_rest_axes_list.append(raxis_b)
|
||
limb_anim_mids_list.append(amid_b)
|
||
limb_anim_quats_list.append(aquat_b)
|
||
if include_hands:
|
||
for h_off in (K_body, K_body + 21):
|
||
rmid_h, raxis_h = _openpose_limb_rest_trs(
|
||
bind_kp_m[h_off:h_off + 21], OPENPOSE_HAND_PAIRS,
|
||
)
|
||
amid_h, aquat_h = _openpose_limb_anim_trs(
|
||
kp_seq[:, h_off:h_off + 21], OPENPOSE_HAND_PAIRS, raxis_h,
|
||
)
|
||
limb_rest_mids_list.append(rmid_h)
|
||
limb_rest_axes_list.append(raxis_h)
|
||
limb_anim_mids_list.append(amid_h)
|
||
limb_anim_quats_list.append(aquat_h)
|
||
limb_rest_mids = np.concatenate(limb_rest_mids_list, axis=0) # (K_limbs, 3)
|
||
limb_anim_mids = np.concatenate(limb_anim_mids_list, axis=1) # (N, K_limbs, 3)
|
||
limb_anim_quats = np.concatenate(limb_anim_quats_list, axis=1) # (N, K_limbs, 4)
|
||
# Hemisphere-align consecutive quats per limb so LINEAR interpolation
|
||
# takes the short path (otherwise large per-frame rotations can flip
|
||
# signs and produce visible "twist back" artifacts mid-playback).
|
||
limb_anim_quats = quat_sign_fix_per_joint(limb_anim_quats).astype(np.float32)
|
||
|
||
limb_joint_indices: List[int] = []
|
||
for k in range(K_limbs):
|
||
nodes.append({
|
||
"name": limb_names[k],
|
||
"translation": limb_rest_mids[k].tolist(),
|
||
"rotation": [0.0, 0.0, 0.0, 1.0],
|
||
"scale": [1.0, 1.0, 1.0],
|
||
})
|
||
limb_joint_indices.append(len(nodes) - 1)
|
||
person_root["children"].extend(limb_joint_indices)
|
||
|
||
# Combined skin: keypoint joints (IBM = T(-bind_kp_m)) then limb joints
|
||
# (IBM = T(-limb_rest_mid)). Both yield identity skin_matrix at rest.
|
||
all_joint_indices = joint_node_indices + limb_joint_indices
|
||
ibm = np.tile(np.eye(4, dtype=np.float32), (K + K_limbs, 1, 1))
|
||
ibm[:K, :3, 3] = -bind_kp_m
|
||
if K_limbs > 0:
|
||
ibm[K:K + K_limbs, :3, 3] = -limb_rest_mids
|
||
ibm_acc = w.add_mat4_f32(ibm.transpose(0, 2, 1).astype(np.float32))
|
||
skins.append({
|
||
"joints": all_joint_indices,
|
||
"inverseBindMatrices": ibm_acc,
|
||
"skeleton": person_root_idx,
|
||
})
|
||
skin_idx = len(skins) - 1
|
||
|
||
# Per-group geometry. Spheres bind to keypoint joints (base_joint_idx
|
||
# ∈ [0, K)); sticks bind to limb joints (limb_joint_base_idx ∈
|
||
# [K, K + K_limbs)). Groups stack body → right hand → left hand →
|
||
# face for keypoint joints, and body → R-hand → L-hand for limbs.
|
||
group_meshes: List[Tuple[np.ndarray, np.ndarray, np.ndarray,
|
||
np.ndarray, np.ndarray, np.ndarray]] = []
|
||
sp = _build_openpose_spheres(
|
||
bind_kp_m[body_sphere_kp], float(marker_radius_m),
|
||
body_sphere_colors[body_sphere_kp], base_joint_idx=0,
|
||
smooth_shade=smooth_shade,
|
||
joint_indices=body_sphere_kp,
|
||
)
|
||
st = _build_openpose_sticks(
|
||
bind_kp_m[:K_body], body_pairs, float(stick_radius_m),
|
||
body_stick_colors, limb_joint_base_idx=K, # body limbs start at K
|
||
shape=shape,
|
||
smooth_shade=smooth_shade,
|
||
end_width_frac=stick_end_width_frac,
|
||
)
|
||
group_meshes.append(sp)
|
||
group_meshes.append(st)
|
||
|
||
if include_hands:
|
||
# Hand stick colors stay rainbow per-finger regardless of
|
||
# `hand_color_style` — only the sphere dots switch to solid
|
||
# blue under 'dwpose'. Matches controlnet_aux/dwpose/util.py.
|
||
hand_pair_colors = _pair_colors_from_kp(
|
||
OPENPOSE_HAND_PAIRS, OPENPOSE_HAND_COLORS_21, endpoint=1,
|
||
)
|
||
for hand_i, h_off in enumerate((K_body, K_body + 21)): # right, then left
|
||
h_bind = bind_kp_m[h_off:h_off + 21]
|
||
group_meshes.append(_build_openpose_spheres(
|
||
h_bind, float(hand_marker_radius_m),
|
||
hand_sphere_colors, base_joint_idx=h_off,
|
||
smooth_shade=smooth_shade,
|
||
))
|
||
group_meshes.append(_build_openpose_sticks(
|
||
h_bind, OPENPOSE_HAND_PAIRS, float(hand_stick_radius_m),
|
||
hand_pair_colors,
|
||
limb_joint_base_idx=K + K_body_limbs + hand_i * K_hand_limbs,
|
||
shape=shape,
|
||
smooth_shade=smooth_shade,
|
||
end_width_frac=stick_end_width_frac,
|
||
))
|
||
|
||
if K_face > 0:
|
||
f_off = K_body + K_hands
|
||
f_bind = bind_kp_m[f_off:f_off + K_face]
|
||
# DWPose face = dots only, no contour lines
|
||
# (controlnet_aux/dwpose/util.py::draw_facepose draws white
|
||
# circles per landmark and never connects them).
|
||
group_meshes.append(_build_openpose_spheres(
|
||
f_bind, float(face_marker_radius_m),
|
||
FACE_LANDMARK_COLORS, base_joint_idx=f_off,
|
||
smooth_shade=smooth_shade,
|
||
))
|
||
|
||
primitives: List[dict] = []
|
||
for (v_arr, n_arr, f_arr, j_arr, w_arr, c_arr) in group_meshes:
|
||
if v_arr.shape[0] == 0:
|
||
continue
|
||
attrs = {
|
||
"POSITION": w.add_vec3_f32(v_arr),
|
||
"NORMAL": w.add_vec3_f32(n_arr),
|
||
"JOINTS_0": w.add_joints_u16(j_arr),
|
||
"WEIGHTS_0": w.add_weights_f32(w_arr),
|
||
"COLOR_0": w.add_vec3_f32(c_arr),
|
||
}
|
||
materials.append(make_lit_material(
|
||
roughness=material_roughness,
|
||
double_sided=material_double_sided,
|
||
))
|
||
primitives.append({
|
||
"attributes": attrs,
|
||
"indices": w.add_indices_u32(f_arr.reshape(-1)),
|
||
"mode": 4,
|
||
"material": len(materials) - 1,
|
||
})
|
||
if not primitives:
|
||
continue
|
||
meshes.append({"primitives": primitives})
|
||
nodes.append({
|
||
"name": f"track{track_i:02d}_openpose",
|
||
"mesh": len(meshes) - 1,
|
||
"skin": skin_idx,
|
||
})
|
||
person_root["children"].append(len(nodes) - 1)
|
||
|
||
times = np.asarray(frame_indices, dtype=np.float32) / float(fps)
|
||
time_acc = w.add_scalar_f32(times)
|
||
for j in range(K):
|
||
t_j = kp_seq[:, j, :].astype(np.float32)
|
||
if (np.ptp(t_j, axis=0) < 1e-6).all():
|
||
nodes[joint_node_indices[j]]["translation"] = t_j[0].tolist()
|
||
continue
|
||
acc = w.add_vec3_f32_anim(t_j)
|
||
samplers.append({"input": time_acc, "output": acc, "interpolation": "LINEAR"})
|
||
channels.append({
|
||
"sampler": len(samplers) - 1,
|
||
"target": {"node": joint_node_indices[j], "path": "translation"},
|
||
})
|
||
|
||
# Per-limb-joint translation + rotation channels. Stationary limbs
|
||
# have their constant TRS baked into the node so they don't bloat the
|
||
# animation buffer.
|
||
for k in range(K_limbs):
|
||
t_k = limb_anim_mids[:, k, :].astype(np.float32)
|
||
if (np.ptp(t_k, axis=0) < 1e-6).all():
|
||
nodes[limb_joint_indices[k]]["translation"] = t_k[0].tolist()
|
||
else:
|
||
acc = w.add_vec3_f32_anim(t_k)
|
||
samplers.append({"input": time_acc, "output": acc,
|
||
"interpolation": "LINEAR"})
|
||
channels.append({
|
||
"sampler": len(samplers) - 1,
|
||
"target": {"node": limb_joint_indices[k], "path": "translation"},
|
||
})
|
||
q_k = limb_anim_quats[:, k, :].astype(np.float32)
|
||
# ptp on the absolute value handles the +q == -q ambiguity, but
|
||
# `quat_sign_fix_per_joint` already aligned signs so a plain ptp
|
||
# is fine here.
|
||
if (np.ptp(q_k, axis=0) < 1e-6).all():
|
||
nodes[limb_joint_indices[k]]["rotation"] = q_k[0].tolist()
|
||
else:
|
||
acc = w.add_vec4_f32(q_k)
|
||
samplers.append({"input": time_acc, "output": acc,
|
||
"interpolation": "LINEAR"})
|
||
channels.append({
|
||
"sampler": len(samplers) - 1,
|
||
"target": {"node": limb_joint_indices[k], "path": "rotation"},
|
||
})
|
||
|
||
if camera_translation != "off":
|
||
frames = pose_data["frames"]
|
||
cam_t = np.stack([
|
||
unflip(np.asarray(frames[t][person_k]["pred_cam_t"], dtype=np.float32))
|
||
for t in frame_indices
|
||
], axis=0)
|
||
if camera_translation == "centered" and cam_t.shape[0] > 0:
|
||
cam_t = cam_t - cam_t[0:1]
|
||
if (np.ptp(cam_t, axis=0) < 1e-6).all():
|
||
person_root["translation"] = cam_t[0].tolist()
|
||
else:
|
||
acc = w.add_vec3_f32_anim(cam_t)
|
||
samplers.append({"input": time_acc, "output": acc, "interpolation": "LINEAR"})
|
||
channels.append({
|
||
"sampler": len(samplers) - 1,
|
||
"target": {"node": person_root_idx, "path": "translation"},
|
||
})
|
||
|
||
if samplers:
|
||
animations.append({
|
||
"name": "all_tracks",
|
||
"samplers": samplers, "channels": channels,
|
||
})
|
||
|
||
if not scene_root_indices:
|
||
raise ValueError("build_glb_openpose: produced no tracks")
|
||
|
||
gltf: Dict[str, Any] = {
|
||
"asset": {"version": "2.0", "generator": "ComfyUI-SAM3DBody"},
|
||
"scene": 0,
|
||
"scenes": [{"nodes": scene_root_indices}],
|
||
"nodes": nodes,
|
||
"meshes": meshes,
|
||
"skins": skins,
|
||
}
|
||
if materials:
|
||
gltf["materials"] = materials
|
||
if animations:
|
||
gltf["animations"] = animations
|
||
return w.to_bytes(gltf)
|