ComfyUI/comfy_extras/sam3d_body/export/glb_openpose.py
2026-06-16 20:47:15 +03:00

1048 lines
44 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""GLB export — OpenPose 18-keypoint visualization mode.
Sourced from pose_data's `pred_keypoints_3d`, independent of the MHR rig. Each
track becomes an armature with a joint per keypoint; sphere markers and limbs
are skinned to those joints. Optional hands (`pred_keypoints_3d` 21..62) and
face landmarks (`pred_vertices` at fixed vertex IDs) extend the same armature.
Shared tables/palettes/mappings live in `glb_shared.py`.
"""
from __future__ import annotations
from typing import Any, Dict, List, Optional, Tuple
import numpy as np
from .glb_shared import (
DWPOSE_HAND_COLORS_21,
FACE_LANDMARK_COLORS,
FACE_LANDMARK_TARGETS,
GLBWriter,
OPENPOSE18_TO_MHR70,
OPENPOSE_18_NAMES,
OPENPOSE_18_PAIRS,
OPENPOSE_HAND21_NAMES,
OPENPOSE_HAND21_TO_MHR70_L,
OPENPOSE_HAND21_TO_MHR70_R,
OPENPOSE_HAND_COLORS_21,
OPENPOSE_HAND_PAIRS,
OPENPOSE_RAINBOW_18,
SCAIL_KEYPOINT_COLORS_18,
SCAIL_LIMB_COLORS_17,
collect_tracks,
flat_shade_mesh,
gaussian_smooth_positions,
make_lit_material,
quat_sign_fix_per_joint,
resolve_openpose_keypoints_from_joints,
rotation_align,
rotmat_to_quat_np,
select_face_landmark_vert_ids,
smooth_shade_mesh,
unflip,
uv_sphere_unit,
)
def _finalize_skinned_mesh(
verts: np.ndarray, faces: np.ndarray,
joints: np.ndarray, weights: np.ndarray, vert_colors: np.ndarray,
smooth_shade: bool,
) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
"""Shade a skinned group mesh and pack per-vertex colors. Smooth keeps the
indexed mesh; flat duplicates verts per face and gathers face-corner colors."""
if smooth_shade:
v_f, n_f, f_f, j_f, w_f = smooth_shade_mesh(verts, faces, joints, weights)
return v_f, n_f, f_f, j_f, w_f, vert_colors.astype(np.float32)
F = faces.shape[0]
pre_faces = faces.copy()
v_f, n_f, f_f, j_f, w_f = flat_shade_mesh(verts, faces, joints, weights)
c_f = np.zeros((F * 3, 3), dtype=np.float32)
for k in range(3):
c_f[k::3] = vert_colors[pre_faces[:, k]]
return v_f, n_f, f_f, j_f, w_f, c_f
def _pair_colors_from_kp(
pairs: Tuple[Tuple[int, int], ...], kp_colors: np.ndarray, endpoint: int = 1,
) -> np.ndarray:
"""Per-limb color from `kp_colors`. `endpoint=1` (default) picks the distal
vertex of each pair — the OpenPose per-finger gradient for base→tip fingers."""
n = len(pairs)
out = np.zeros((n, 3), dtype=np.float32)
for i, (a, b) in enumerate(pairs):
out[i] = kp_colors[b if endpoint == 1 else a]
return out
def _openpose_bind_at_rig_rest(
pose_data: Dict[str, Any], *,
include_hands: bool, face_vert_ids: Optional[np.ndarray],
) -> Optional[np.ndarray]:
"""OpenPose keypoint positions at the rig's REST pose, from the override's
`bind_global_m` (joint rest TRS) and `rest_verts_m` (face landmarks).
Used as the static-bind so the GLB's static POSITION sits at rig origin,
matching skeletal mode and producing the same rest→scene-frame-0 transition.
Returns None when the override lacks the needed mappings — caller then falls
back to per-frame extraction (kp_seq[0])."""
override = pose_data.get("_skeleton_override") if isinstance(pose_data, dict) else None
if override is None or "bind_global_m" not in override:
return None
op18 = override.get("openpose18_joint_indices")
if op18 is None:
return None
rest_pos = np.asarray(override["bind_global_m"], dtype=np.float32)[:, :3]
op18_w = override.get("openpose18_joint_weights")
parts: List[np.ndarray] = [
resolve_openpose_keypoints_from_joints(
rest_pos, np.asarray(op18, dtype=np.int64),
weights=None if op18_w is None else np.asarray(op18_w, dtype=np.float32),
)
]
if include_hands:
op21_r = override.get("openpose_hand21_r_joint_indices")
op21_l = override.get("openpose_hand21_l_joint_indices")
if op21_r is None or op21_l is None:
return None
op21_r_w = override.get("openpose_hand21_r_joint_weights")
op21_l_w = override.get("openpose_hand21_l_joint_weights")
parts.append(resolve_openpose_keypoints_from_joints(
rest_pos, np.asarray(op21_r, dtype=np.int64),
weights=None if op21_r_w is None else np.asarray(op21_r_w, dtype=np.float32),
))
parts.append(resolve_openpose_keypoints_from_joints(
rest_pos, np.asarray(op21_l, dtype=np.int64),
weights=None if op21_l_w is None else np.asarray(op21_l_w, dtype=np.float32),
))
if face_vert_ids is not None:
rest_verts = override.get("rest_verts_m")
if rest_verts is None:
return None
parts.append(np.asarray(rest_verts, dtype=np.float32)[face_vert_ids])
return np.concatenate(parts, axis=0).astype(np.float32)
def _extract_openpose_keypoints(
pose_data: Dict[str, Any], frame_indices: List[int], person_k: int,
) -> np.ndarray:
"""(N, 18, 3) OpenPose keypoints in rig-native Y-up metres.
External-skeleton path: when the override carries `openpose18_joint_indices`
((18, 2) int32), synthesize from each person's `pred_joint_coords` (already
Y-up, no flip). MHR70 path (default): re-index `pred_keypoints_3d` to COCO-18
and un-flip y/z (stored y-down by sam3d_body).
"""
frames = pose_data["frames"]
N = len(frame_indices)
out = np.zeros((N, 18, 3), dtype=np.float32)
override = pose_data.get("_skeleton_override") if isinstance(pose_data, dict) else None
op18 = override.get("openpose18_joint_indices") if override is not None else None
if op18 is not None:
op18 = np.asarray(op18, dtype=np.int64)
if op18.ndim != 2 or op18.shape != (18, 2):
raise ValueError(
"build_glb_openpose: `openpose18_joint_indices` in "
"`_skeleton_override` must be shape (18, 2); got "
f"{tuple(op18.shape)}. Each row is (joint_a, joint_b); "
"use joint_b=-1 for single-joint keypoints."
)
op18_w = override.get("openpose18_joint_weights")
if op18_w is not None:
op18_w = np.asarray(op18_w, dtype=np.float32)
if op18_w.shape != (18,):
raise ValueError(
"build_glb_openpose: `openpose18_joint_weights` must be "
f"shape (18,); got {tuple(op18_w.shape)}."
)
for t_idx, t in enumerate(frame_indices):
person = frames[t][person_k]
if "pred_joint_coords" not in person:
raise ValueError(
"build_glb_openpose: external-skeleton path needs "
"per-frame `pred_joint_coords` (J, 3) on each person; "
f"missing at frame={t}, track={person_k}."
)
joints = np.asarray(person["pred_joint_coords"], dtype=np.float32)
out[t_idx] = resolve_openpose_keypoints_from_joints(
joints, op18, weights=op18_w,
)
return out
for t_idx, t in enumerate(frame_indices):
person = frames[t][person_k]
if "pred_keypoints_3d" not in person:
# External-skeleton producer without `openpose18_joint_indices`:
# can't synthesize the 18-keypoint set.
if override is not None:
raise ValueError(
"build_glb_openpose: this pose_data carries "
"`_skeleton_override` but it doesn't include "
"`openpose18_joint_indices` and the per-frame person "
"dict is missing `pred_keypoints_3d`. Ask the upstream "
"node to populate `openpose18_joint_indices` on the "
"override (a (18, 2) int32 mapping into its joint list), "
"or switch SAM3DBody_ToGLB to `skeletal` mode."
)
present_keys = sorted(person.keys())
raise ValueError(
"build_glb_openpose: pose_data is missing "
"`pred_keypoints_3d` (frame=%d, track=%d). Keys present "
"on this person: %s. Re-run SAM3DBody_Predict — older "
"saved pose_data may pre-date the field, and any "
"intermediate node that rebuilds person dicts must "
"preserve it."
% (t, person_k, present_keys)
)
kp = np.asarray(person["pred_keypoints_3d"], dtype=np.float32)
out[t_idx] = kp[OPENPOSE18_TO_MHR70]
out[..., 1] *= -1.0
out[..., 2] *= -1.0
return out
def _extract_openpose_hand_keypoints(
pose_data: Dict[str, Any], frame_indices: List[int], person_k: int,
) -> np.ndarray:
"""(N, 42, 3) right+left OpenPose hand keypoints (21+21) in rig-native Y-up.
External-skeleton path: needs `openpose_hand21_{r,l}_joint_indices` ((21, 2)
int32) in the override, resolved against `pred_joint_coords`. MHR70 path:
re-orders `pred_keypoints_3d` 21..62 to OpenPose-21 (wrist + 5 fingers)."""
frames = pose_data["frames"]
N = len(frame_indices)
out = np.zeros((N, 42, 3), dtype=np.float32)
override = pose_data.get("_skeleton_override") if isinstance(pose_data, dict) else None
op21_r = override.get("openpose_hand21_r_joint_indices") if override is not None else None
op21_l = override.get("openpose_hand21_l_joint_indices") if override is not None else None
if override is not None and (op21_r is not None or op21_l is not None):
if op21_r is None or op21_l is None:
raise ValueError(
"build_glb_openpose: external skeleton must supply BOTH "
"`openpose_hand21_r_joint_indices` and "
"`openpose_hand21_l_joint_indices` for include_hands=True."
)
op21_r = np.asarray(op21_r, dtype=np.int64)
op21_l = np.asarray(op21_l, dtype=np.int64)
for arr, side in ((op21_r, "r"), (op21_l, "l")):
if arr.ndim != 2 or arr.shape != (21, 2):
raise ValueError(
f"build_glb_openpose: `openpose_hand21_{side}_joint_indices` "
f"must be shape (21, 2); got {tuple(arr.shape)}."
)
op21_r_w = override.get("openpose_hand21_r_joint_weights")
op21_l_w = override.get("openpose_hand21_l_joint_weights")
op21_r_w = (np.asarray(op21_r_w, dtype=np.float32)
if op21_r_w is not None else None)
op21_l_w = (np.asarray(op21_l_w, dtype=np.float32)
if op21_l_w is not None else None)
for t_idx, t in enumerate(frame_indices):
person = frames[t][person_k]
if "pred_joint_coords" not in person:
raise ValueError(
"build_glb_openpose: external-skeleton path needs "
"per-frame `pred_joint_coords` for hands."
)
joints = np.asarray(person["pred_joint_coords"], dtype=np.float32)
out[t_idx, :21] = resolve_openpose_keypoints_from_joints(
joints, op21_r, weights=op21_r_w,
)
out[t_idx, 21:] = resolve_openpose_keypoints_from_joints(
joints, op21_l, weights=op21_l_w,
)
return out
for t_idx, t in enumerate(frame_indices):
person = frames[t][person_k]
if "pred_keypoints_3d" not in person:
if override is not None:
raise ValueError(
"build_glb_openpose: include_hands=True with an external "
"skeleton needs `openpose_hand21_r_joint_indices` and "
"`openpose_hand21_l_joint_indices` on `_skeleton_override`. "
"Disable hands or ask the upstream node to populate them."
)
raise ValueError(
"build_glb_openpose: pose_data is missing `pred_keypoints_3d`."
)
kp = np.asarray(person["pred_keypoints_3d"], dtype=np.float32)
out[t_idx, :21] = kp[OPENPOSE_HAND21_TO_MHR70_R]
out[t_idx, 21:] = kp[OPENPOSE_HAND21_TO_MHR70_L]
out[..., 1] *= -1.0
out[..., 2] *= -1.0
return out
def _extract_face_landmarks_from_verts(
pose_data: Dict[str, Any], frame_indices: List[int], person_k: int,
vert_ids: np.ndarray,
) -> np.ndarray:
"""(N, K_face, 3) face landmarks sampled from `pred_vertices` at the given
vertex IDs, unflipped to Y-up. Per-frame deformation is already baked in."""
frames = pose_data["frames"]
N = len(frame_indices)
K = int(vert_ids.shape[0])
out = np.zeros((N, K, 3), dtype=np.float32)
for t_idx, t in enumerate(frame_indices):
person = frames[t][person_k]
if "pred_vertices" not in person:
raise ValueError(
"build_glb_openpose: face_source='rig' needs `pred_vertices` "
"on every frame — re-run Predict to populate it."
)
v = np.asarray(person["pred_vertices"], dtype=np.float32).reshape(-1, 3)
out[t_idx] = v[vert_ids]
out[..., 1] *= -1.0
out[..., 2] *= -1.0
return out
def _build_openpose_spheres(
bind_kp_m: np.ndarray, radius_m: float, kp_colors: np.ndarray,
base_joint_idx: int = 0,
smooth_shade: bool = False,
joint_indices: Optional[np.ndarray] = None,
) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
"""UV sphere per keypoint, rigidly skinned to that keypoint's joint and
vertex-colored from kp_colors. `base_joint_idx` offsets the emitted JOINTS_0
indices (body=0, right hand=18, …); `joint_indices`, if given, sets explicit
per-sphere indices so callers can skip keypoints (e.g. SCAIL head dots).
Returns (verts, normals, faces, joints4, weights4, vert_colors)."""
sv, sf = uv_sphere_unit()
K = bind_kp_m.shape[0]
Nv = sv.shape[0]
Nf = sf.shape[0]
out_v = np.zeros((K * Nv, 3), dtype=np.float32)
out_n = np.zeros((K * Nv, 3), dtype=np.float32)
out_f = np.zeros((K * Nf, 3), dtype=np.uint32)
out_j = np.zeros((K * Nv, 4), dtype=np.uint16)
out_w = np.zeros((K * Nv, 4), dtype=np.float32)
out_c = np.zeros((K * Nv, 3), dtype=np.float32)
for j in range(K):
v_off = j * Nv
out_v[v_off:v_off + Nv] = sv * radius_m + bind_kp_m[j]
out_n[v_off:v_off + Nv] = sv
out_f[j * Nf:(j + 1) * Nf] = sf + v_off
out_j[v_off:v_off + Nv, 0] = int(joint_indices[j]) if joint_indices is not None else j + base_joint_idx
out_w[v_off:v_off + Nv, 0] = 1.0
out_c[v_off:v_off + Nv] = kp_colors[j]
return _finalize_skinned_mesh(out_v, out_f, out_j, out_w, out_c, smooth_shade)
def _capsule_mesh_local(
L: float, W: float, *,
n_cap_lat: Optional[int] = None,
n_body: Optional[int] = None,
n_lon: Optional[int] = None,
end_width_frac: float = 0.3,
shape: str = "ellipsoid",
) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
"""Per-limb mesh in limb-local frame along +Y from y=0 (head) to y=L (tail).
`shape`:
- 'ellipsoid' (default): hemisphere tips of radius `W * end_width_frac`,
ellipsoidal sin(π·u) body profile (fat middle, narrow ends).
- 'capsule': SCAIL "rig" limb — an OPEN cylinder of constant radius W,
no caps. Pair with same-radius sphere markers so they cap the ends
seamlessly (caps would bump out when sphere radius ≠ cap radius).
A per-limb mesh is needed because cap height depends on width — one
canonical mesh can't give true hemispheres for arbitrary L:W in ellipsoid.
Returns (verts (Nv,3), faces (Nf,3), weights (Nv,2) head/tail, sums to 1).
"""
W = max(1e-6, min(float(W), float(L) * 0.5 - 1e-6))
if str(shape) == "capsule":
# Open cylinder, no caps — sphere markers cap the ends (see docstring).
cap_r = 0.0
body_r = W
if n_cap_lat is None:
n_cap_lat = 0
if n_body is None:
n_body = 0
if n_lon is None:
n_lon = 16
elif str(shape) == "ellipsoid":
end_frac = float(min(0.95, max(0.05, end_width_frac)))
cap_r = max(1e-7, W * end_frac)
body_r = W
# More body rings to sample the sin(π·u) curve.
if n_cap_lat is None:
n_cap_lat = 3
if n_body is None:
n_body = 7
if n_lon is None:
n_lon = 12
else:
raise ValueError(
f"_capsule_mesh_local: unknown shape={shape!r} "
"(expected 'ellipsoid' or 'capsule')"
)
if 2.0 * cap_r >= L:
cap_r = max(0.0, L * 0.5 - 1e-6)
body_len = float(L) - 2.0 * cap_r
n_cap_lat = max(0, int(n_cap_lat))
n_body = max(0, int(n_body))
n_lon = max(3, int(n_lon))
has_caps = n_cap_lat > 0
verts: List[List[float]] = []
head_pole = -1
tail_pole = -1
head_rings: List[int] = []
tail_rings: List[int] = []
if has_caps:
# Head pole vertex at y=0 (south pole of head hemisphere).
head_pole = len(verts)
verts.append([0.0, 0.0, 0.0])
# Head cap rings (i = 1..n_cap_lat). Last ring (i=n_cap_lat,
# theta=π/2) is the head-body junction at y=cap_r, r=cap_r.
for i in range(1, n_cap_lat + 1):
theta = (np.pi * 0.5) * i / n_cap_lat
y = cap_r * (1.0 - np.cos(theta))
r = cap_r * np.sin(theta)
head_rings.append(len(verts))
for k in range(n_lon):
phi = 2.0 * np.pi * k / n_lon
verts.append([r * float(np.cos(phi)), float(y), r * float(np.sin(phi))])
else:
# Open cylinder: no caps, no pole. Add an end ring at y=0 directly.
head_rings.append(len(verts))
for k in range(n_lon):
phi = 2.0 * np.pi * k / n_lon
verts.append([body_r * float(np.cos(phi)), 0.0, body_r * float(np.sin(phi))])
# Body intermediate rings (none for 'capsule', n_body=0 by default).
body_rings: List[int] = []
is_ellipsoid = str(shape) == "ellipsoid"
for j in range(1, n_body + 1):
u = j / (n_body + 1)
y = cap_r + body_len * u
if is_ellipsoid:
r = cap_r + (body_r - cap_r) * float(np.sin(np.pi * u))
else:
r = body_r
body_rings.append(len(verts))
for k in range(n_lon):
phi = 2.0 * np.pi * k / n_lon
verts.append([r * float(np.cos(phi)), float(y), r * float(np.sin(phi))])
if has_caps:
# Tail cap rings (i = 0..n_cap_lat-1). First ring (i=0, theta=π/2)
# is the body-tail junction at y=L-cap_r, r=cap_r; last
# (i=n_cap_lat-1) is the ring just before the pole.
for i in range(0, n_cap_lat):
theta = (np.pi * 0.5) * (n_cap_lat - i) / n_cap_lat
y = float(L) - cap_r * (1.0 - np.cos(theta))
r = cap_r * np.sin(theta)
tail_rings.append(len(verts))
for k in range(n_lon):
phi = 2.0 * np.pi * k / n_lon
verts.append([r * float(np.cos(phi)), float(y), r * float(np.sin(phi))])
tail_pole = len(verts)
verts.append([0.0, float(L), 0.0])
else:
# Open cylinder end ring at y=L.
tail_rings.append(len(verts))
for k in range(n_lon):
phi = 2.0 * np.pi * k / n_lon
verts.append([body_r * float(np.cos(phi)), float(L), body_r * float(np.sin(phi))])
faces: List[List[int]] = []
if has_caps:
# Head pole fan — outward (-Y) normal at the south pole.
r0 = head_rings[0]
for k in range(n_lon):
a = r0 + k
b = r0 + (k + 1) % n_lon
faces.append([head_pole, a, b])
# All inter-ring quads in axial order.
all_rings = head_rings + body_rings + tail_rings
for i in range(len(all_rings) - 1):
rl = all_rings[i]
rh = all_rings[i + 1]
for k in range(n_lon):
a = rl + k
b = rl + (k + 1) % n_lon
c = rh + (k + 1) % n_lon
d = rh + k
faces.append([a, c, b])
faces.append([a, d, c])
if has_caps:
# Tail pole fan — outward (+Y) normal at the north pole.
rL = tail_rings[-1]
for k in range(n_lon):
a = rL + k
b = rL + (k + 1) % n_lon
faces.append([tail_pole, b, a])
v_arr = np.asarray(verts, dtype=np.float32)
weights = np.zeros((v_arr.shape[0], 2), dtype=np.float32)
weights[:, 1] = np.clip(v_arr[:, 1] / max(float(L), 1e-12), 0.0, 1.0)
weights[:, 0] = 1.0 - weights[:, 1]
return v_arr, np.asarray(faces, dtype=np.uint32), weights
def _scail_redirect_neck_stub(body_kp: np.ndarray) -> np.ndarray:
"""Replace the nose keypoint (idx 0) of a (...,18,3) array with a short
neck stub (0.6 spine + 0.4 neck→nose), matching the capsule render."""
out = body_kp.copy()
neck = body_kp[..., 1, :]
nose = body_kp[..., 0, :]
mid_hip = 0.5 * (body_kp[..., 8, :] + body_kp[..., 11, :])
def _unit(v):
return v / np.linalg.norm(v, axis=-1, keepdims=True).clip(min=1e-6)
nose_vec = nose - neck
nose_len = np.linalg.norm(nose_vec, axis=-1, keepdims=True)
mixed = _unit(0.6 * _unit(neck - mid_hip) + 0.4 * _unit(nose_vec))
out[..., 0, :] = neck + mixed * (nose_len * 0.5)
return out
def _openpose_limb_rest_trs(
bind_kp_m: np.ndarray, pairs: Tuple[Tuple[int, int], ...],
) -> Tuple[np.ndarray, np.ndarray]:
"""Per-limb rest TRS: midpoints (K_pairs, 3) and unit a→b axes (or +Y if
degenerate). Caller uses midpoints as rest translation, axes for alignment."""
K_pairs = len(pairs)
mid = np.zeros((K_pairs, 3), dtype=np.float32)
axis = np.zeros((K_pairs, 3), dtype=np.float32)
axis[:, 1] = 1.0
for k, (a, b) in enumerate(pairs):
a_pos = bind_kp_m[a]
b_pos = bind_kp_m[b]
mid[k] = 0.5 * (a_pos + b_pos)
d = b_pos - a_pos
n = float(np.linalg.norm(d))
if n > 1e-9:
axis[k] = d / n
return mid, axis
def _openpose_limb_anim_trs(
kp_seq: np.ndarray, pairs: Tuple[Tuple[int, int], ...], rest_axes: np.ndarray,
) -> Tuple[np.ndarray, np.ndarray]:
"""Per-frame limb TRS: anim_mid (N, K_pairs, 3) midpoints and anim_quat
(N, K_pairs, 4 xyzw) aligning each limb's rest axis to its frame-t axis.
Drives skin_matrix(t) = T(mid_t)·R_t·T(-mid_rest) — rigid rotation about
the rest midpoint, no LBS cross-section thinning."""
N = kp_seq.shape[0]
K_pairs = len(pairs)
anim_mid = np.zeros((N, K_pairs, 3), dtype=np.float32)
R = np.tile(np.eye(3, dtype=np.float32), (N, K_pairs, 1, 1))
for k, (a, b) in enumerate(pairs):
ax_rest = rest_axes[k]
for t in range(N):
a_pos = kp_seq[t, a]
b_pos = kp_seq[t, b]
anim_mid[t, k] = 0.5 * (a_pos + b_pos)
d = b_pos - a_pos
n = float(np.linalg.norm(d))
if n > 1e-9:
R[t, k] = rotation_align(ax_rest, d / n)
quat = rotmat_to_quat_np(R).astype(np.float32)
return anim_mid, quat
def _build_openpose_sticks(
bind_kp_m: np.ndarray, pairs: Tuple[Tuple[int, int], ...],
half_width_m: float, pair_colors: np.ndarray,
limb_joint_base_idx: int = 0,
shape: str = "ellipsoid",
smooth_shade: bool = False,
end_width_frac: float = 0.3,
) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
"""Capsule per limb pair (a, b), each sized to its own length/width so caps
are true hemispheres regardless of L:W. Ellipsoid mode auto-clamps width to
`length * 0.1` so short limbs don't look chunky.
Rigid (weight=1) binding to a per-limb joint at `limb_joint_base_idx +
limb_idx`, which the caller animates with midpoint translation + rotation
(avoids LBS thinning). Returns (verts, normals, faces, joints4, weights4,
vert_colors)."""
canonical = np.array([0.0, 1.0, 0.0], dtype=np.float32)
out_v_chunks: List[np.ndarray] = []
out_f_chunks: List[np.ndarray] = []
out_j_chunks: List[np.ndarray] = []
out_w_chunks: List[np.ndarray] = []
out_c_chunks: List[np.ndarray] = []
v_total = 0
WIDTH_RATIO = 0.1
MIN_WIDTH = 0.001
is_capsule = str(shape) == "capsule"
for limb_idx, (a, b) in enumerate(pairs):
head = bind_kp_m[a]
tail = bind_kp_m[b]
direction = tail - head
length = float(np.linalg.norm(direction))
if length < 1e-6:
continue
unit_dir = direction / length
R = rotation_align(canonical, unit_dir)
if is_capsule:
# Uniform radius — every bone the same width (clamped internally).
half_width_eff = max(MIN_WIDTH, half_width_m)
else:
# Auto-thin so short face/ear limbs aren't chunky next to body limbs.
half_width_eff = max(MIN_WIDTH, min(length * WIDTH_RATIO, half_width_m))
v_local, f_local, _weights_unused = _capsule_mesh_local(
length, half_width_eff, shape=shape, end_width_frac=end_width_frac,
)
v_world = v_local @ R.T + head
Nv = v_local.shape[0]
# Rigid binding to the per-limb joint; the 2-bone weights are discarded
# (translation-only under LBS, would thin the cross-section).
j_arr = np.zeros((Nv, 4), dtype=np.uint16)
j_arr[:, 0] = limb_idx + limb_joint_base_idx
w_arr = np.zeros((Nv, 4), dtype=np.float32)
w_arr[:, 0] = 1.0
c_arr = np.tile(pair_colors[limb_idx], (Nv, 1)).astype(np.float32)
out_v_chunks.append(v_world)
out_f_chunks.append(f_local + v_total)
out_j_chunks.append(j_arr)
out_w_chunks.append(w_arr)
out_c_chunks.append(c_arr)
v_total += Nv
if not out_v_chunks:
return (np.zeros((0, 3), dtype=np.float32), np.zeros((0, 3), dtype=np.float32),
np.zeros((0, 3), dtype=np.uint32), np.zeros((0, 4), dtype=np.uint16),
np.zeros((0, 4), dtype=np.float32), np.zeros((0, 3), dtype=np.float32))
verts = np.concatenate(out_v_chunks, axis=0)
faces = np.concatenate(out_f_chunks, axis=0)
joints = np.concatenate(out_j_chunks, axis=0)
weights = np.concatenate(out_w_chunks, axis=0)
colors = np.concatenate(out_c_chunks, axis=0)
return _finalize_skinned_mesh(verts, faces, joints, weights, colors, smooth_shade)
def build_glb_openpose(
pose_data: Dict[str, Any],
*,
fps: float = 24.0,
camera_translation: str = "off",
track_index: int = -1,
marker_radius_m: float = 0.025,
stick_radius_m: float = 0.008,
include_hands: bool = False,
hand_marker_radius_m: float = 0.0,
hand_stick_radius_m: float = 0.0,
hand_color_style: str = "dwpose",
face_style: str = "disabled",
face_marker_radius_m: float = 0.0,
palette: str = "openpose",
shape: str = "ellipsoid",
smooth_shade: bool = False,
material_roughness: float = 0.85,
material_double_sided: bool = False,
stick_end_width_frac: float = 0.6,
bone_smooth_window: int = 0,
) -> bytes:
"""Build a GLB of an OpenPose-style 3D skeleton — sphere markers per keypoint
plus colored sticks between limb pairs, one armature per track. Body from
`pred_keypoints_3d`; optional hands (same source) and face landmarks
(`pred_vertices`) extend each armature.
Args:
include_hands: append the 21+21 OpenPose hand keypoints per track.
hand_marker_radius_m: hand sphere radius. 0 = auto = 0.4 × marker_radius_m.
hand_stick_radius_m: hand limb half-width. 0 = auto = 0.5 × stick_radius_m.
hand_color_style: 'dwpose' (default) = solid-blue dots + rainbow sticks;
'openpose' = rainbow dots AND sticks.
face_style: 'disabled' (default) | 'full' (~30 contour pts) | 'eyes_mouth'
(eyes + outer-lip subset); sampled at vertex IDs from
`canonical_colors["positions"]`.
face_marker_radius_m: face landmark sphere radius. 0 = auto = 0.3 ×
marker_radius_m. Rendered as dots only, no contour lines.
palette: 'openpose' = rainbow gradient per keypoint; 'scail' = warm right
/ cool left, grey centerline, distinct per-limb colors.
"""
is_scail = str(palette) == "scail"
# SCAIL drops the face bones (13..16) and eye/ear spheres; keeps nose (idx 0,
# the neck-stub tip) to cap the open cylinder. Matches the capsule render.
body_pairs = OPENPOSE_18_PAIRS[:13] if is_scail else OPENPOSE_18_PAIRS
body_sphere_kp = (np.arange(14, dtype=np.int64)
if is_scail else np.arange(18, dtype=np.int64))
if is_scail:
body_sphere_colors = SCAIL_KEYPOINT_COLORS_18
body_stick_colors = SCAIL_LIMB_COLORS_17
elif str(palette) == "openpose":
# Same rainbow array drives both spheres and sticks.
body_sphere_colors = OPENPOSE_RAINBOW_18
body_stick_colors = OPENPOSE_RAINBOW_18
else:
raise ValueError(
f"build_glb_openpose: unknown palette={palette!r} "
"(expected 'openpose' or 'scail')"
)
if float(hand_marker_radius_m) <= 0.0:
hand_marker_radius_m = float(marker_radius_m) * 0.4
if float(hand_stick_radius_m) <= 0.0:
hand_stick_radius_m = float(stick_radius_m) * 0.5
if float(face_marker_radius_m) <= 0.0:
face_marker_radius_m = float(marker_radius_m) * 0.3
if hand_color_style == "dwpose":
hand_sphere_colors = DWPOSE_HAND_COLORS_21
elif hand_color_style == "openpose":
hand_sphere_colors = OPENPOSE_HAND_COLORS_21
else:
raise ValueError(
f"build_glb_openpose: unknown hand_color_style="
f"{hand_color_style!r} (expected 'dwpose' or 'openpose')"
)
tracks = collect_tracks(pose_data, track_index)
if not tracks:
raise ValueError("build_glb_openpose: no valid tracks in pose_data")
# Eyes (6..13) + outer-lip ring (19..22) from FACE_LANDMARK_TARGETS.
_EYES_MOUTH_IDX = np.array([6, 7, 8, 9, 10, 11, 12, 13, 19, 20, 21, 22], dtype=np.int64)
face_vert_ids: Optional[np.ndarray] = None
face_target_idx = np.arange(len(FACE_LANDMARK_TARGETS), dtype=np.int64)
if face_style in ("full", "eyes_mouth"):
canonical_colors = pose_data.get("canonical_colors") or {}
positions = canonical_colors.get("positions")
if positions is None:
raise ValueError(
"build_glb_openpose: face_style needs "
"pose_data['canonical_colors']['positions'] (computed at "
"model load and attached by Predict). Ensure the SAM3DBody "
"Loader+Predict ran upstream of this node."
)
if face_style == "eyes_mouth":
face_target_idx = _EYES_MOUTH_IDX
face_vert_ids = select_face_landmark_vert_ids(
np.asarray(positions),
face_mask=canonical_colors.get("face_mask"),
)[face_target_idx]
elif face_style != "disabled":
raise ValueError(
f"build_glb_openpose: unknown face_style={face_style!r} "
"(expected 'disabled', 'full', or 'eyes_mouth')"
)
K_body = 18
K_hands = 42 if include_hands else 0
K_face = int(face_vert_ids.shape[0]) if face_vert_ids is not None else 0
K = K_body + K_hands + K_face
# Limb counts: one joint per stick pair. Limb joints carry translation +
# rotation so each capsule rotates rigidly with its limb (no LBS thinning).
K_body_limbs = len(body_pairs)
K_hand_limbs = len(OPENPOSE_HAND_PAIRS) if include_hands else 0
K_limbs = K_body_limbs + 2 * K_hand_limbs # face has no sticks
# Joint name list mirrors the keypoint stacking order: body → hands → face.
joint_names: List[str] = [f"openpose_{n}" for n in OPENPOSE_18_NAMES]
if include_hands:
joint_names.extend([f"openpose_R_{n}" for n in OPENPOSE_HAND21_NAMES])
joint_names.extend([f"openpose_L_{n}" for n in OPENPOSE_HAND21_NAMES])
if K_face > 0:
joint_names.extend([f"openpose_face_{FACE_LANDMARK_TARGETS[i][0]}"
for i in face_target_idx])
# Limb joint names, stacked body → R-hand → L-hand to match the limb
# joint ordering in skin.joints (after the K keypoint joints).
limb_names: List[str] = [
f"openpose_limb_{OPENPOSE_18_NAMES[a]}_{OPENPOSE_18_NAMES[b]}"
for (a, b) in body_pairs
]
if include_hands:
for side in ("R", "L"):
for (a, b) in OPENPOSE_HAND_PAIRS:
limb_names.append(
f"openpose_{side}hand_limb_"
f"{OPENPOSE_HAND21_NAMES[a]}_{OPENPOSE_HAND21_NAMES[b]}"
)
w = GLBWriter()
nodes: List[dict] = []
meshes: List[dict] = []
skins: List[dict] = []
materials: List[dict] = []
animations: List[dict] = []
scene_root_indices: List[int] = []
samplers: List[dict] = []
channels: List[dict] = []
for track_i, (person_k, frame_indices) in enumerate(tracks):
body_seq = _extract_openpose_keypoints(pose_data, frame_indices, person_k)
n_frames = body_seq.shape[0]
if n_frames == 0:
continue
seq_chunks: List[np.ndarray] = [body_seq]
if include_hands:
seq_chunks.append(_extract_openpose_hand_keypoints(
pose_data, frame_indices, person_k))
if face_vert_ids is not None:
seq_chunks.append(_extract_face_landmarks_from_verts(
pose_data, frame_indices, person_k, face_vert_ids))
kp_seq = np.concatenate(seq_chunks, axis=1) # (N, K, 3)
if bone_smooth_window and bone_smooth_window > 1:
kp_seq = gaussian_smooth_positions(kp_seq, int(bone_smooth_window))
# Static-bind = rig REST pose when available, else frame 0. The rest
# bind keeps static POSITION at rig origin so viewers auto-center there
# and the motion is visible (see _openpose_bind_at_rig_rest).
bind_kp_m_rest = _openpose_bind_at_rig_rest(
pose_data, include_hands=include_hands, face_vert_ids=face_vert_ids,
)
bind_kp_m = (bind_kp_m_rest if bind_kp_m_rest is not None
else kp_seq[0].astype(np.float32))
if is_scail: # nose → neck stub, matching the capsule render
kp_seq[:, :K_body] = _scail_redirect_neck_stub(kp_seq[:, :K_body])
bind_kp_m[:K_body] = _scail_redirect_neck_stub(bind_kp_m[:K_body])
person_root: Dict[str, Any] = {"name": f"track{track_i:02d}", "children": []}
nodes.append(person_root)
person_root_idx = len(nodes) - 1
scene_root_indices.append(person_root_idx)
# K keypoint joint nodes (spheres bind here, translation only).
joint_node_indices: List[int] = []
for j in range(K):
nodes.append({
"name": joint_names[j],
"translation": bind_kp_m[j].tolist(),
"rotation": [0.0, 0.0, 0.0, 1.0],
"scale": [1.0, 1.0, 1.0],
})
joint_node_indices.append(len(nodes) - 1)
person_root["children"].extend(joint_node_indices)
# Per-limb rest + per-frame TRS; sticks bind rigidly to these joints.
limb_rest_mids_list: List[np.ndarray] = []
limb_rest_axes_list: List[np.ndarray] = []
limb_anim_mids_list: List[np.ndarray] = []
limb_anim_quats_list: List[np.ndarray] = []
rmid_b, raxis_b = _openpose_limb_rest_trs(bind_kp_m[:K_body], body_pairs)
amid_b, aquat_b = _openpose_limb_anim_trs(kp_seq[:, :K_body], body_pairs, raxis_b)
limb_rest_mids_list.append(rmid_b)
limb_rest_axes_list.append(raxis_b)
limb_anim_mids_list.append(amid_b)
limb_anim_quats_list.append(aquat_b)
if include_hands:
for h_off in (K_body, K_body + 21):
rmid_h, raxis_h = _openpose_limb_rest_trs(
bind_kp_m[h_off:h_off + 21], OPENPOSE_HAND_PAIRS,
)
amid_h, aquat_h = _openpose_limb_anim_trs(
kp_seq[:, h_off:h_off + 21], OPENPOSE_HAND_PAIRS, raxis_h,
)
limb_rest_mids_list.append(rmid_h)
limb_rest_axes_list.append(raxis_h)
limb_anim_mids_list.append(amid_h)
limb_anim_quats_list.append(aquat_h)
limb_rest_mids = np.concatenate(limb_rest_mids_list, axis=0)
limb_anim_mids = np.concatenate(limb_anim_mids_list, axis=1)
limb_anim_quats = np.concatenate(limb_anim_quats_list, axis=1)
# Hemisphere-align consecutive quats so LINEAR interp takes the short path.
limb_anim_quats = quat_sign_fix_per_joint(limb_anim_quats).astype(np.float32)
limb_joint_indices: List[int] = []
for k in range(K_limbs):
nodes.append({
"name": limb_names[k],
"translation": limb_rest_mids[k].tolist(),
"rotation": [0.0, 0.0, 0.0, 1.0],
"scale": [1.0, 1.0, 1.0],
})
limb_joint_indices.append(len(nodes) - 1)
person_root["children"].extend(limb_joint_indices)
# Combined skin: keypoint joints then limb joints; IBM = T(-rest) for
# both, yielding identity skin_matrix at rest.
all_joint_indices = joint_node_indices + limb_joint_indices
ibm = np.tile(np.eye(4, dtype=np.float32), (K + K_limbs, 1, 1))
ibm[:K, :3, 3] = -bind_kp_m
if K_limbs > 0:
ibm[K:K + K_limbs, :3, 3] = -limb_rest_mids
ibm_acc = w.add_mat4_f32(ibm.transpose(0, 2, 1).astype(np.float32))
skins.append({
"joints": all_joint_indices,
"inverseBindMatrices": ibm_acc,
"skeleton": person_root_idx,
})
skin_idx = len(skins) - 1
# Per-group geometry. Spheres bind to keypoint joints [0, K); sticks to
# limb joints [K, K+K_limbs). Stacked body → R-hand → L-hand → face.
group_meshes: List[Tuple[np.ndarray, np.ndarray, np.ndarray,
np.ndarray, np.ndarray, np.ndarray]] = []
sp = _build_openpose_spheres(
bind_kp_m[body_sphere_kp], float(marker_radius_m),
body_sphere_colors[body_sphere_kp], base_joint_idx=0,
smooth_shade=smooth_shade,
joint_indices=body_sphere_kp,
)
st = _build_openpose_sticks(
bind_kp_m[:K_body], body_pairs, float(stick_radius_m),
body_stick_colors, limb_joint_base_idx=K, # body limbs start at K
shape=shape,
smooth_shade=smooth_shade,
end_width_frac=stick_end_width_frac,
)
group_meshes.append(sp)
group_meshes.append(st)
if include_hands:
# Hand sticks stay rainbow per-finger; only dots switch under 'dwpose'.
hand_pair_colors = _pair_colors_from_kp(
OPENPOSE_HAND_PAIRS, OPENPOSE_HAND_COLORS_21, endpoint=1,
)
for hand_i, h_off in enumerate((K_body, K_body + 21)): # right, then left
h_bind = bind_kp_m[h_off:h_off + 21]
group_meshes.append(_build_openpose_spheres(
h_bind, float(hand_marker_radius_m),
hand_sphere_colors, base_joint_idx=h_off,
smooth_shade=smooth_shade,
))
group_meshes.append(_build_openpose_sticks(
h_bind, OPENPOSE_HAND_PAIRS, float(hand_stick_radius_m),
hand_pair_colors,
limb_joint_base_idx=K + K_body_limbs + hand_i * K_hand_limbs,
shape=shape,
smooth_shade=smooth_shade,
end_width_frac=stick_end_width_frac,
))
if K_face > 0:
f_off = K_body + K_hands
f_bind = bind_kp_m[f_off:f_off + K_face]
# DWPose face = dots only, no contour lines.
group_meshes.append(_build_openpose_spheres(
f_bind, float(face_marker_radius_m),
FACE_LANDMARK_COLORS, base_joint_idx=f_off,
smooth_shade=smooth_shade,
))
primitives: List[dict] = []
for (v_arr, n_arr, f_arr, j_arr, w_arr, c_arr) in group_meshes:
if v_arr.shape[0] == 0:
continue
attrs = {
"POSITION": w.add_vec3_f32(v_arr),
"NORMAL": w.add_vec3_f32(n_arr),
"JOINTS_0": w.add_joints_u16(j_arr),
"WEIGHTS_0": w.add_weights_f32(w_arr),
"COLOR_0": w.add_vec3_f32(c_arr),
}
materials.append(make_lit_material(
roughness=material_roughness,
double_sided=material_double_sided,
))
primitives.append({
"attributes": attrs,
"indices": w.add_indices_u32(f_arr.reshape(-1)),
"mode": 4,
"material": len(materials) - 1,
})
if not primitives:
continue
meshes.append({"primitives": primitives})
nodes.append({
"name": f"track{track_i:02d}_openpose",
"mesh": len(meshes) - 1,
"skin": skin_idx,
})
person_root["children"].append(len(nodes) - 1)
times = np.asarray(frame_indices, dtype=np.float32) / float(fps)
time_acc = w.add_scalar_f32(times)
for j in range(K):
t_j = kp_seq[:, j, :].astype(np.float32)
if (np.ptp(t_j, axis=0) < 1e-6).all():
nodes[joint_node_indices[j]]["translation"] = t_j[0].tolist()
continue
acc = w.add_vec3_f32_anim(t_j)
samplers.append({"input": time_acc, "output": acc, "interpolation": "LINEAR"})
channels.append({
"sampler": len(samplers) - 1,
"target": {"node": joint_node_indices[j], "path": "translation"},
})
# Per-limb-joint translation + rotation; stationary limbs bake their
# constant TRS into the node instead of an animation channel.
for k in range(K_limbs):
t_k = limb_anim_mids[:, k, :].astype(np.float32)
if (np.ptp(t_k, axis=0) < 1e-6).all():
nodes[limb_joint_indices[k]]["translation"] = t_k[0].tolist()
else:
acc = w.add_vec3_f32_anim(t_k)
samplers.append({"input": time_acc, "output": acc,
"interpolation": "LINEAR"})
channels.append({
"sampler": len(samplers) - 1,
"target": {"node": limb_joint_indices[k], "path": "translation"},
})
q_k = limb_anim_quats[:, k, :].astype(np.float32)
# Plain ptp is fine — signs already aligned by quat_sign_fix_per_joint.
if (np.ptp(q_k, axis=0) < 1e-6).all():
nodes[limb_joint_indices[k]]["rotation"] = q_k[0].tolist()
else:
acc = w.add_vec4_f32(q_k)
samplers.append({"input": time_acc, "output": acc,
"interpolation": "LINEAR"})
channels.append({
"sampler": len(samplers) - 1,
"target": {"node": limb_joint_indices[k], "path": "rotation"},
})
if camera_translation != "off":
frames = pose_data["frames"]
cam_t = np.stack([
unflip(np.asarray(frames[t][person_k]["pred_cam_t"], dtype=np.float32))
for t in frame_indices
], axis=0)
if camera_translation == "centered" and cam_t.shape[0] > 0:
cam_t = cam_t - cam_t[0:1]
if (np.ptp(cam_t, axis=0) < 1e-6).all():
person_root["translation"] = cam_t[0].tolist()
else:
acc = w.add_vec3_f32_anim(cam_t)
samplers.append({"input": time_acc, "output": acc, "interpolation": "LINEAR"})
channels.append({
"sampler": len(samplers) - 1,
"target": {"node": person_root_idx, "path": "translation"},
})
if samplers:
animations.append({
"name": "all_tracks",
"samplers": samplers, "channels": channels,
})
if not scene_root_indices:
raise ValueError("build_glb_openpose: produced no tracks")
gltf: Dict[str, Any] = {
"asset": {"version": "2.0", "generator": "ComfyUI-SAM3DBody"},
"scene": 0,
"scenes": [{"nodes": scene_root_indices}],
"nodes": nodes,
"meshes": meshes,
"skins": skins,
}
if materials:
gltf["materials"] = materials
if animations:
gltf["animations"] = animations
return w.to_bytes(gltf)