ComfyUI/comfy_extras/sam3d_body/export/glb_shared.py

1167 lines
48 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""GLB export for SAM 3D Body pose_data.
Mode: skeletal — rebuilds the MHR 127-bone rig. Per-frame local TRS comes from
re-running param_transform on saved mhr_model_params; rest verts from a
zero-pose forward with the person's shape_params; sparse triplet skinning is
compacted to glTF's max-4-influences form; facial expression is re-exposed as
72 morph targets driven by expr_params.
pred_vertices/pred_cam_t are camera-y-down — un-flipped here so the GLB lives
in glTF-spec Y-up. Pose correctives are dropped (glTF skinning can't represent
them); deformation at extreme joint angles will differ from the SAM3DBody
renderer by the corrective amount.
"""
from __future__ import annotations
import json
import struct
from typing import Any, Dict, List, Optional, Tuple
import numpy as np
import torch
from comfy_extras.sam3d_body.rasterizer import rainbow_colors_from_canonical
# fp32-rounded ln(2). Used as `exp(x * _LN2)` to compute 2**x bit-identically
# to the rig's own `torch.exp(jp[..., 6:7] * _LN2)`
_LN2 = 0.6931471824645996
# Quaternion / rotation helpers (xyzw convention, matching MHR rig)
def _euler_xyz_to_quat_np(angles: np.ndarray) -> np.ndarray:
"""(roll, pitch, yaw) -> (x, y, z, w). Mirrors mhr_rig._euler_xyz_to_quat."""
roll, pitch, yaw = angles[..., 0], angles[..., 1], angles[..., 2]
cy, sy = np.cos(yaw * 0.5), np.sin(yaw * 0.5)
cp, sp = np.cos(pitch * 0.5), np.sin(pitch * 0.5)
cr, sr = np.cos(roll * 0.5), np.sin(roll * 0.5)
x = sr * cp * cy - cr * sp * sy
y = cr * sp * cy + sr * cp * sy
z = cr * cp * sy - sr * sp * cy
w = cr * cp * cy + sr * sp * sy
return np.stack([x, y, z, w], axis=-1)
def _quat_multiply_np(q1: np.ndarray, q2: np.ndarray) -> np.ndarray:
"""xyzw product. Mirrors mhr_rig._quat_multiply."""
x1, y1, z1, w1 = q1[..., 0], q1[..., 1], q1[..., 2], q1[..., 3]
x2, y2, z2, w2 = q2[..., 0], q2[..., 1], q2[..., 2], q2[..., 3]
x = w1 * x2 + x1 * w2 + y1 * z2 - z1 * y2
y = w1 * y2 - x1 * z2 + y1 * w2 + z1 * x2
z = w1 * z2 + x1 * y2 - y1 * x2 + z1 * w2
w = w1 * w2 - x1 * x2 - y1 * y2 - z1 * z2
return np.stack([x, y, z, w], axis=-1)
def _quat_rotate_np(q: np.ndarray, v: np.ndarray) -> np.ndarray:
"""Rotate v by unit xyzw q. Mirrors mhr_rig._quat_rotate."""
axis = q[..., :3]
r = q[..., 3:4]
av = np.cross(axis, v, axis=-1)
aav = np.cross(axis, av, axis=-1)
return v + 2.0 * (av * r + aav)
def _skel_state_inverse_np(skel_state: np.ndarray) -> np.ndarray:
"""Inverse of (t, q, s). Normalizes q first so non-unit input is OK."""
t = skel_state[..., :3]
q = skel_state[..., 3:7]
s = skel_state[..., 7:8]
q = q / np.maximum(np.linalg.norm(q, axis=-1, keepdims=True), 1e-12)
s_safe = np.where(np.abs(s) > 1e-12, s, 1.0)
s_inv = 1.0 / s_safe
q_inv = np.concatenate([-q[..., :3], q[..., 3:4]], axis=-1)
t_inv = -s_inv * _quat_rotate_np(q_inv, t)
return np.concatenate([t_inv, q_inv, s_inv], axis=-1)
def _skel_state_compose_np(s1: np.ndarray, s2: np.ndarray) -> np.ndarray:
"""s1 ∘ s2. Mirrors mhr_rig._skel_multiply."""
t1 = s1[..., :3]
q1 = s1[..., 3:7]
sc1 = s1[..., 7:8]
t2 = s2[..., :3]
q2 = s2[..., 3:7]
sc2 = s2[..., 7:8]
# Defensive normalization to match the rig's `F.normalize` calls.
q1 = q1 / np.maximum(np.linalg.norm(q1, axis=-1, keepdims=True), 1e-12)
q2 = q2 / np.maximum(np.linalg.norm(q2, axis=-1, keepdims=True), 1e-12)
t_res = t1 + sc1 * _quat_rotate_np(q1, t2)
q_res = _quat_multiply_np(q1, q2)
s_res = sc1 * sc2
return np.concatenate([t_res, q_res, s_res], axis=-1)
def gaussian_smooth_quats(q_seq: np.ndarray, window: int) -> np.ndarray:
"""Gaussian-smooth a (N, NJ, 4) quaternion sequence along time. Sign-aligns
per joint first, convolves per-component, renormalizes. Suppresses multi-
frame bone spikes at extreme poses without needing the upstream Smooth node."""
if window <= 1 or q_seq.shape[0] < 2:
return q_seq
aligned = quat_sign_fix_per_joint(q_seq).astype(np.float64)
n = q_seq.shape[0]
half = window // 2
sigma = max(0.5, window / 4.0)
x = np.arange(-half, half + 1, dtype=np.float64)
kernel = np.exp(-x * x / (2.0 * sigma * sigma))
kernel = kernel / kernel.sum()
# Edge-replicate padding so endpoints don't get pulled toward zero.
pad = half
padded = np.concatenate([
np.broadcast_to(aligned[:1], (pad,) + aligned.shape[1:]),
aligned,
np.broadcast_to(aligned[-1:], (pad,) + aligned.shape[1:]),
], axis=0)
out = np.zeros_like(aligned)
for k, w in enumerate(kernel):
out += w * padded[k:k + n]
norms = np.linalg.norm(out, axis=-1, keepdims=True)
out = out / np.maximum(norms, 1e-12)
return out.astype(np.float32)
def gaussian_smooth_positions(seq: np.ndarray, window: int) -> np.ndarray:
"""Gaussian-smooth a (N, K, 3) position sequence along time (edge-replicate
padding). Used to calm jittery keypoint tracks before the openpose rig
derives sphere translations + limb TRS from them."""
if window <= 1 or seq.shape[0] < 2:
return seq
s = np.asarray(seq, dtype=np.float64)
n = s.shape[0]
half = window // 2
sigma = max(0.5, window / 4.0)
x = np.arange(-half, half + 1, dtype=np.float64)
kernel = np.exp(-x * x / (2.0 * sigma * sigma))
kernel = kernel / kernel.sum()
padded = np.concatenate([
np.broadcast_to(s[:1], (half,) + s.shape[1:]),
s,
np.broadcast_to(s[-1:], (half,) + s.shape[1:]),
], axis=0)
out = np.zeros_like(s)
for k, wgt in enumerate(kernel):
out += wgt * padded[k:k + n]
return out.astype(np.float32)
def quat_sign_fix_per_joint(q_seq: np.ndarray) -> np.ndarray:
"""Walk (N, NJ, 4) along time, flip sign whenever consecutive frames sit
on opposite hemispheres. Eliminates long-path slerp glitches (mid-anim
cartwheel flip). fp64 to avoid drift; normalizes input defensively."""
out = np.array(q_seq, dtype=np.float64, copy=True)
norms = np.linalg.norm(out, axis=-1, keepdims=True)
out = out / np.maximum(norms, 1e-12)
for t in range(1, out.shape[0]):
dots = (out[t - 1] * out[t]).sum(axis=-1)
sign = np.where(dots < 0.0, -1.0, 1.0)[:, None]
out[t] = out[t] * sign
return out.astype(np.float32)
def bone_locals_from_globals(rig_global: np.ndarray, parents: np.ndarray) -> np.ndarray:
"""Globals (N, NJ, 8) + parents -> per-bone local TRS (N, NJ, 8) such that
FK over (parents, bone_local) reproduces rig_global. local =
inverse(parent_global) ∘ child_global makes this robust to hierarchy-
convention mismatches: glTF FK gives back exactly rig_global even if
`parents` doesn't match the rig's pmi-walk."""
N, NJ, _ = rig_global.shape
bone_local = np.zeros_like(rig_global)
for j in range(NJ):
p = int(parents[j])
if 0 <= p < NJ and p != j:
parent_g = rig_global[:, p]
parent_g_inv = _skel_state_inverse_np(parent_g)
bone_local[:, j] = _skel_state_compose_np(parent_g_inv, rig_global[:, j])
else:
bone_local[:, j] = rig_global[:, j]
return bone_local
def _quat_to_mat3_np(q: np.ndarray) -> np.ndarray:
x, y, z, w = q[..., 0], q[..., 1], q[..., 2], q[..., 3]
n = x * x + y * y + z * z + w * w
s = np.where(n > 0, 2.0 / n, 0.0)
R = np.empty(q.shape[:-1] + (3, 3), dtype=q.dtype)
R[..., 0, 0] = 1 - s * (y * y + z * z)
R[..., 0, 1] = s * (x * y - z * w)
R[..., 0, 2] = s * (x * z + y * w)
R[..., 1, 0] = s * (x * y + z * w)
R[..., 1, 1] = 1 - s * (x * x + z * z)
R[..., 1, 2] = s * (y * z - x * w)
R[..., 2, 0] = s * (x * z - y * w)
R[..., 2, 1] = s * (y * z + x * w)
R[..., 2, 2] = 1 - s * (x * x + y * y)
return R
def collect_tracks(pose_data: Dict[str, Any], track_index: int) -> List[Tuple[int, List[int]]]:
"""List of (person_index, frame_indices). track_index == -1 means every
present track; empty tracks are dropped. Same person index across frames
is assumed same subject (Smooth/Predict enforce this on tracked bboxes)."""
frames = pose_data["frames"]
max_p = max((len(f) for f in frames), default=0)
if max_p == 0:
return []
if track_index >= 0:
if track_index >= max_p:
return []
wanted = [track_index]
else:
wanted = list(range(max_p))
tracks: List[Tuple[int, List[int]]] = []
for k in wanted:
valid = [t for t, fr in enumerate(frames) if k < len(fr)]
if valid:
tracks.append((k, valid))
return tracks
# glTF binary builder
_FLOAT = 5126
_USHORT = 5123
_UINT = 5125
_BYTE_ARRAY = 34962
_BYTE_ELEMENT = 34963
def _pad4(buf: bytes, fill: bytes = b"\x00") -> bytes:
n = (4 - (len(buf) % 4)) % 4
return buf + fill * n
class GLBWriter:
"""Builds a single .glb from incremental accessor/bufferView additions."""
def __init__(self) -> None:
self._buffer = bytearray()
self.bufferViews: List[dict] = []
self.accessors: List[dict] = []
def _add_view(self, data: bytes, *, target: Optional[int] = None) -> int:
offset = len(self._buffer)
self._buffer += data
# 4-byte align so subsequent views start on a boundary.
pad = (4 - (offset + len(data)) % 4) % 4
if pad:
self._buffer += b"\x00" * pad
view = {"buffer": 0, "byteOffset": offset, "byteLength": len(data)}
if target is not None:
view["target"] = target
self.bufferViews.append(view)
return len(self.bufferViews) - 1
def add_vec3_f32(self, arr: np.ndarray, *, target: int = _BYTE_ARRAY) -> int:
a = np.ascontiguousarray(arr, dtype=np.float32)
view_idx = self._add_view(a.tobytes(), target=target)
self.accessors.append({
"bufferView": view_idx, "componentType": _FLOAT,
"count": a.shape[0], "type": "VEC3",
"min": a.min(axis=0).tolist(), "max": a.max(axis=0).tolist(),
})
return len(self.accessors) - 1
def add_vec3_f32_no_minmax(self, arr: np.ndarray) -> int:
"""Morph-target POSITIONs: spec lets us skip min/max, avoiding a
per-frame delta bbox."""
a = np.ascontiguousarray(arr, dtype=np.float32)
view_idx = self._add_view(a.tobytes(), target=_BYTE_ARRAY)
self.accessors.append({
"bufferView": view_idx, "componentType": _FLOAT,
"count": a.shape[0], "type": "VEC3",
})
return len(self.accessors) - 1
def add_indices_u32(self, arr: np.ndarray) -> int:
a = np.ascontiguousarray(arr, dtype=np.uint32).reshape(-1)
view_idx = self._add_view(a.tobytes(), target=_BYTE_ELEMENT)
self.accessors.append({
"bufferView": view_idx, "componentType": _UINT,
"count": int(a.size), "type": "SCALAR",
})
return len(self.accessors) - 1
def add_scalar_f32(self, arr: np.ndarray) -> int:
a = np.ascontiguousarray(arr, dtype=np.float32).reshape(-1)
view_idx = self._add_view(a.tobytes())
self.accessors.append({
"bufferView": view_idx, "componentType": _FLOAT,
"count": int(a.size), "type": "SCALAR",
"min": [float(a.min())] if a.size else [0.0],
"max": [float(a.max())] if a.size else [0.0],
})
return len(self.accessors) - 1
def add_scalar_f32_flat(self, arr: np.ndarray, count: int) -> int:
"""Animation-output scalars: `count` is keyframes, not floats. Morph-
target weight tracks store N_morph weights per keyframe as flat float32
with count=N_keyframes."""
a = np.ascontiguousarray(arr, dtype=np.float32).reshape(-1)
view_idx = self._add_view(a.tobytes())
self.accessors.append({
"bufferView": view_idx, "componentType": _FLOAT,
"count": int(count), "type": "SCALAR",
})
return len(self.accessors) - 1
def add_vec3_f32_anim(self, arr: np.ndarray) -> int:
a = np.ascontiguousarray(arr, dtype=np.float32)
view_idx = self._add_view(a.tobytes())
self.accessors.append({
"bufferView": view_idx, "componentType": _FLOAT,
"count": a.shape[0], "type": "VEC3",
})
return len(self.accessors) - 1
def add_vec4_f32(self, arr: np.ndarray) -> int:
a = np.ascontiguousarray(arr, dtype=np.float32)
view_idx = self._add_view(a.tobytes())
self.accessors.append({
"bufferView": view_idx, "componentType": _FLOAT,
"count": a.shape[0], "type": "VEC4",
})
return len(self.accessors) - 1
def add_mat4_f32(self, arr: np.ndarray) -> int:
a = np.ascontiguousarray(arr, dtype=np.float32)
view_idx = self._add_view(a.tobytes())
self.accessors.append({
"bufferView": view_idx, "componentType": _FLOAT,
"count": a.shape[0], "type": "MAT4",
})
return len(self.accessors) - 1
def add_joints_u16(self, arr: np.ndarray) -> int:
a = np.ascontiguousarray(arr, dtype=np.uint16)
view_idx = self._add_view(a.tobytes(), target=_BYTE_ARRAY)
self.accessors.append({
"bufferView": view_idx, "componentType": _USHORT,
"count": a.shape[0], "type": "VEC4",
})
return len(self.accessors) - 1
def add_weights_f32(self, arr: np.ndarray) -> int:
a = np.ascontiguousarray(arr, dtype=np.float32)
view_idx = self._add_view(a.tobytes(), target=_BYTE_ARRAY)
self.accessors.append({
"bufferView": view_idx, "componentType": _FLOAT,
"count": a.shape[0], "type": "VEC4",
})
return len(self.accessors) - 1
def to_bytes(self, gltf: dict) -> bytes:
gltf["buffers"] = [{"byteLength": len(self._buffer)}]
gltf["bufferViews"] = self.bufferViews
gltf["accessors"] = self.accessors
json_bytes = json.dumps(gltf, separators=(",", ":")).encode("utf-8")
json_padded = _pad4(json_bytes, fill=b" ")
bin_padded = _pad4(bytes(self._buffer))
total = 12 + 8 + len(json_padded) + 8 + len(bin_padded)
header = struct.pack("<4sII", b"glTF", 2, total)
json_chunk = struct.pack("<II", len(json_padded), 0x4E4F534A) + json_padded
bin_chunk = struct.pack("<II", len(bin_padded), 0x004E4942) + bin_padded
return header + json_chunk + bin_chunk
# Inverse of mhr_head's `verts[..., [1, 2]] *= -1`: camera-y-down → glTF Y-up.
def unflip(arr: np.ndarray) -> np.ndarray:
out = np.array(arr, dtype=np.float32, copy=True)
out[..., 1] *= -1.0
out[..., 2] *= -1.0
return out
_BAKEABLE_SHADERS = {
"default", "rainbow",
"rainbow_face_normal", "rainbow_face_semantic",
}
def bake_vertex_colors(
canonical_colors: Optional[Dict[str, np.ndarray]],
shader: str,
rainbow_tilt_x_deg: float,
rainbow_tilt_z_deg: float,
pastel_mix: float,
) -> Optional[np.ndarray]:
"""Per-vertex RGB matching the renderer's shader preset, on the canonical
mesh. Returns (N_v, 3) float32 in [0, 1], or None for `default` (let the
viewer's default material handle shading)."""
if shader == "default" or canonical_colors is None:
return None
positions = np.asarray(canonical_colors["positions"], dtype=np.float32)
vcolor = rainbow_colors_from_canonical(
positions, tilt_x_deg=rainbow_tilt_x_deg, tilt_z_deg=rainbow_tilt_z_deg,
).copy()
if shader in ("rainbow_face_normal", "rainbow_face_semantic"):
face_mask = canonical_colors.get("face_mask")
if face_mask is not None and np.asarray(face_mask).any():
if shader == "rainbow_face_normal":
norm = np.asarray(canonical_colors["norm"], dtype=np.float32)
vcolor[face_mask] = norm[face_mask]
else: # rainbow_face_semantic
sem = np.asarray(canonical_colors["face_region_rgb"], dtype=np.float32)
assigned = sem.sum(axis=1) > 0
vcolor[assigned] = sem[assigned]
# SCAIL-style per-person pastel mix toward white (track 0 = full color).
pm = max(0.0, min(1.0, float(pastel_mix)))
if pm > 0:
vcolor = vcolor * (1.0 - pm) + pm
return np.clip(vcolor, 0.0, 1.0).astype(np.float32)
def compute_pastel_mix(track_i: int, falloff: float) -> float:
"""SCAIL-style desaturation: track 0 = 0.0, track k = 1 - falloff^k."""
f = max(0.1, min(1.0, float(falloff)))
return 0.0 if track_i == 0 else (1.0 - f ** track_i)
def compute_normals(verts: np.ndarray, faces: np.ndarray) -> np.ndarray:
v0 = verts[faces[:, 0]]
v1 = verts[faces[:, 1]]
v2 = verts[faces[:, 2]]
fn = np.cross(v1 - v0, v2 - v0).astype(np.float32)
vn = np.zeros_like(verts, dtype=np.float32)
np.add.at(vn, faces[:, 0], fn)
np.add.at(vn, faces[:, 1], fn)
np.add.at(vn, faces[:, 2], fn)
ln = np.linalg.norm(vn, axis=1, keepdims=True)
ln[ln < 1e-8] = 1.0
return (vn / ln).astype(np.float32)
def _parents_from_pmi(rig: Any) -> np.ndarray:
"""Parent index per joint from skel_pmi. pmi is (2, 266): row 0 = child,
row 1 = parent, split into BFS levels by skel_pmi_buffer_sizes. Roots = -1."""
NJ = int(rig.NUM_JOINTS)
pmi = rig.skel_pmi.cpu().numpy()
sizes = rig.skel_pmi_buffer_sizes.cpu().numpy().tolist()
parents = np.full(NJ, -1, dtype=np.int32)
offset = 0
for sz in sizes:
if sz > 0:
src = pmi[0, offset:offset + sz].astype(np.int64)
tgt = pmi[1, offset:offset + sz].astype(np.int64)
parents[src] = tgt
offset += sz
return parents
def _get_skeleton_override(pose_data: Optional[Dict[str, Any]]) -> Optional[Dict[str, Any]]:
"""Return ``_skeleton_override`` dict if present. Non-MHR skeletons supply
this to bypass MHR rig extraction (see ComfyUI-Kimodo). Required keys:
parents: (NJ,) int32, -1 = root
bind_global_m: (NJ, 8) f32 — [t.xyz | q.xyzw | scale], meters
lbs_compact_joints: (V, 8) uint16 — pre-compacted skin influences
lbs_compact_weights: (V, 8) f32
lbs_compact_max_inf: int — actual max influences (≤ 8)
rest_verts_m: (V, 3) f32
faces: (F, 3) uint32
Optional:
per_frame_y_down: bool — set False if pred_joint_coords are already
rig-native Y-up (kimodo). Default True (MHR).
openpose18_joint_indices: (18, 2) int32 — body OpenPose-18 → joint
index pair, resolved against per-frame
`pred_joint_coords`. Each row is
(joint_a, joint_b); b == -1 = single
joint, else default midpoint of the two
(lets producers approximate keypoints
with no matching joint, e.g. Nose ≈
midpoint(LeftEye, RightEye)). Enables
`SAM3DBody_ToGLB(mode="openpose")` on
external rigs.
openpose18_joint_weights: (18,) f32 — optional per-keypoint blend
weight for the (a, b) mapping above.
Position = w*joints[a] + (1-w)*joints[b]
when b ≥ 0 (default w=0.5 → midpoint).
Values outside [0, 1] EXTRAPOLATE past
the line segment — used to approximate
landmarks with no nearby joint pair
(e.g. ears: w=2.0 along the eye→eye
axis puts each ear one eye-distance
outside the corresponding eye). Ignored
for single-joint rows (b = -1).
openpose_hand21_r_joint_indices: (21, 2) int32 — right-hand OpenPose-21
(wrist + 5 fingers × 4 joints, base→tip)
→ joint index pair. Required (alongside
the L counterpart) for openpose mode
with include_hands=True.
openpose_hand21_l_joint_indices: (21, 2) int32 — left-hand counterpart.
openpose_hand21_r_joint_weights: (21,) f32 — optional, same semantics as
`openpose18_joint_weights`.
openpose_hand21_l_joint_weights: (21,) f32 — optional, same as above.
"""
if pose_data is None:
return None
return pose_data.get("_skeleton_override")
def extract_rig_static(model: Any, pose_data: Optional[Dict[str, Any]] = None) -> Dict[str, np.ndarray]:
"""Static rig buffers as numpy. If `pose_data` carries `_skeleton_override`,
use that instead of MHR-specific `model.head_pose.mhr` buffers."""
override = _get_skeleton_override(pose_data)
if override is not None:
# External rig: caller pre-compacts skin and supplies bind global directly,
# so we don't need MHR's PCA pose / expression bases.
parents = np.asarray(override["parents"], dtype=np.int32)
rest_v = np.asarray(override["rest_verts_m"], dtype=np.float32)
return {
"parents": parents,
"parents_pmi": parents,
"lbs_compact_joints": np.asarray(override["lbs_compact_joints"], dtype=np.uint16),
"lbs_compact_weights": np.asarray(override["lbs_compact_weights"], dtype=np.float32),
"lbs_compact_max_inf": int(override.get("lbs_compact_max_inf", 4)),
"faces": np.asarray(override["faces"], dtype=np.uint32),
"num_joints": int(parents.shape[0]),
"num_verts": int(rest_v.shape[0]),
"num_expr": 0,
"num_shape": 0,
"_external": True,
}
inner = model.model if hasattr(model, "model") else model
rig = inner.head_pose.mhr
head = inner.head_pose
def _np(t: torch.Tensor) -> np.ndarray:
return t.cpu().numpy()
# `skel_joint_parents` encodes the anatomical hierarchy; pmi-derived order
# is BFS-optimized for parallel FK and may include traversal quirks.
explicit_parents = _np(rig.skel_joint_parents).astype(np.int32)
return {
"parents": explicit_parents, # (127,) int32, -1 = root
"parents_pmi": _parents_from_pmi(rig), # kept for FK-related uses
"joint_translation_offsets": _np(rig.skel_joint_translation_offsets), # (127, 3) cm
"joint_prerotations": _np(rig.skel_joint_prerotations), # (127, 4) xyzw
"param_transform": _np(rig.param_transform), # (889, 249)
"lbs_inverse_bind_pose": _np(rig.lbs_inverse_bind_pose), # (127, 8)
"lbs_skin_weights": _np(rig.lbs_skin_weights), # (NNZ,)
"lbs_skin_indices": _np(rig.lbs_skin_indices).astype(np.int64), # (NNZ,)
"lbs_vert_indices": _np(rig.lbs_vert_indices).astype(np.int64), # (NNZ,)
"expr_basis": _np(rig.expr_basis), # (72, 18439, 3)
"faces": _np(head.faces).astype(np.uint32), # (36874, 3)
"num_joints": int(rig.NUM_JOINTS),
"num_verts": int(rig.NUM_VERTS),
"num_expr": int(rig.NUM_EXPR),
"num_shape": int(rig.NUM_IDENTITY),
"_external": False,
}
def compact_skin_to_n(
skin_indices: np.ndarray, vert_indices: np.ndarray, weights: np.ndarray,
num_verts: int, max_inf: int = 8,
) -> Tuple[np.ndarray, np.ndarray, int]:
"""Sparse (joint, vert, weight) triplets -> dense (joints[V, max_inf],
weights[V, max_inf]). Keeps `max_inf` largest-magnitude influences,
renormalizes. `actual_max` lets the caller skip JOINTS_1/WEIGHTS_1 when
nothing exceeds 4 influences."""
joints = np.zeros((num_verts, max_inf), dtype=np.uint16)
out_w = np.zeros((num_verts, max_inf), dtype=np.float32)
counts = np.zeros(num_verts, dtype=np.int32)
if vert_indices.size:
# lexsort secondary key first: groups by vert, weights descending within group.
order = np.lexsort((-weights, vert_indices))
vi_sorted = vert_indices[order]
sk_sorted = skin_indices[order]
w_sorted = weights[order]
# Per-row rank within its vertex group: 0 at each group start, +1 elsewhere.
# group_start[i] is True when vi_sorted[i] starts a new vertex.
n = vi_sorted.size
group_start = np.empty(n, dtype=bool)
group_start[0] = True
np.not_equal(vi_sorted[1:], vi_sorted[:-1], out=group_start[1:])
pos = np.arange(n, dtype=np.int64)
# Position of each row's group start, broadcast forward.
group_start_pos = np.maximum.accumulate(np.where(group_start, pos, 0))
rank = pos - group_start_pos
keep = rank < max_inf
vk = vi_sorted[keep]
rk = rank[keep]
joints[vk, rk] = sk_sorted[keep].astype(np.uint16, copy=False)
out_w[vk, rk] = w_sorted[keep].astype(np.float32, copy=False)
true_counts = np.bincount(vi_sorted, minlength=num_verts)
np.minimum(true_counts, max_inf, out=counts, casting="unsafe")
sums = out_w.sum(axis=1, keepdims=True)
nz = sums.squeeze(-1) > 0
out_w[nz] /= sums[nz]
zero_w = ~nz
if zero_w.any():
out_w[zero_w, 0] = 1.0
actual_max = int(counts.max()) if counts.size else 0
return joints, out_w, actual_max
def zero_pose_rest_verts(
model: Any, shape_params: np.ndarray, expr_zero: bool = True,
pose_data: Optional[Dict[str, Any]] = None,
) -> np.ndarray:
"""Rig with zero pose + this subject's shape -> rest verts (V, 3) in
rig-native Y-up meters. External-skeleton path returns `rest_verts_m`
directly (no PCA shape space to expand)."""
override = _get_skeleton_override(pose_data)
if override is not None:
return np.asarray(override["rest_verts_m"], dtype=np.float32)
inner = model.model if hasattr(model, "model") else model
head = inner.head_pose
rig = head.mhr
device = rig.scale_mean.device if hasattr(rig, "scale_mean") else next(rig.parameters()).device
dtype = next(rig.parameters()).dtype
sp = torch.from_numpy(np.ascontiguousarray(shape_params, dtype=np.float32)).to(device)
if sp.ndim == 1:
sp = sp.unsqueeze(0)
# mhr.forward(identity_coeffs, model_parameters, expr_coeffs):
# identity_rest = base_shape + identity_basis @ shape;
# cat([model_params, zeros]) through param_transform; expr added.
model_params = torch.zeros(1, 204, device=device, dtype=dtype)
expr = torch.zeros(1, 72, device=device, dtype=dtype)
verts, _ = rig(sp.to(dtype), model_params, expr, apply_correctives=False)
# Rig outputs cm; mhr_head divides by 100 for meters. Match that.
verts_m = verts[0].cpu().float().numpy() / 100.0
return verts_m.astype(np.float32)
def global_skel_state_per_frame(
model: Any, mhr_model_params: np.ndarray,
) -> np.ndarray:
"""Rig FK over a batch of mhr_model_params -> (N, NJ, 8) = (t cm, q xyzw,
scale). Bones are shape- and expression-independent so we pass zeros."""
inner = model.model if hasattr(model, "model") else model
rig = inner.head_pose.mhr
device = next(rig.parameters()).device
dtype = next(rig.parameters()).dtype
N = mhr_model_params.shape[0]
mp = torch.from_numpy(np.ascontiguousarray(mhr_model_params, dtype=np.float32)).to(device=device, dtype=dtype)
sp = torch.zeros(N, rig.NUM_IDENTITY, device=device, dtype=dtype)
expr = torch.zeros(N, rig.NUM_EXPR, device=device, dtype=dtype)
_, skel_state = rig(sp, mp, expr, apply_correctives=False)
return skel_state.cpu().float().numpy() # (N, NJ, 8) cm
def rotmat_to_quat_np(R: np.ndarray) -> np.ndarray:
"""(..., 3, 3) -> (..., 4) xyzw. Shepperd 1978 branched, largest-component
pick for stability. Cross-frame sign-fixing is the caller's job."""
shape = R.shape[:-2]
Rf = R.reshape(-1, 3, 3).astype(np.float64)
M = Rf.shape[0]
q = np.zeros((M, 4), dtype=np.float64)
trace = Rf[:, 0, 0] + Rf[:, 1, 1] + Rf[:, 2, 2]
m1 = trace > 0
if m1.any():
S = np.sqrt(trace[m1] + 1.0) * 2.0
q[m1, 3] = 0.25 * S
q[m1, 0] = (Rf[m1, 2, 1] - Rf[m1, 1, 2]) / S
q[m1, 1] = (Rf[m1, 0, 2] - Rf[m1, 2, 0]) / S
q[m1, 2] = (Rf[m1, 1, 0] - Rf[m1, 0, 1]) / S
rest = ~m1
m2 = rest & (Rf[:, 0, 0] > Rf[:, 1, 1]) & (Rf[:, 0, 0] > Rf[:, 2, 2])
if m2.any():
S = np.sqrt(1.0 + Rf[m2, 0, 0] - Rf[m2, 1, 1] - Rf[m2, 2, 2]) * 2.0
q[m2, 3] = (Rf[m2, 2, 1] - Rf[m2, 1, 2]) / S
q[m2, 0] = 0.25 * S
q[m2, 1] = (Rf[m2, 0, 1] + Rf[m2, 1, 0]) / S
q[m2, 2] = (Rf[m2, 0, 2] + Rf[m2, 2, 0]) / S
m3 = rest & ~m2 & (Rf[:, 1, 1] > Rf[:, 2, 2])
if m3.any():
S = np.sqrt(1.0 + Rf[m3, 1, 1] - Rf[m3, 0, 0] - Rf[m3, 2, 2]) * 2.0
q[m3, 3] = (Rf[m3, 0, 2] - Rf[m3, 2, 0]) / S
q[m3, 0] = (Rf[m3, 0, 1] + Rf[m3, 1, 0]) / S
q[m3, 1] = 0.25 * S
q[m3, 2] = (Rf[m3, 1, 2] + Rf[m3, 2, 1]) / S
m4 = rest & ~m2 & ~m3
if m4.any():
S = np.sqrt(1.0 + Rf[m4, 2, 2] - Rf[m4, 0, 0] - Rf[m4, 1, 1]) * 2.0
q[m4, 3] = (Rf[m4, 1, 0] - Rf[m4, 0, 1]) / S
q[m4, 0] = (Rf[m4, 0, 2] + Rf[m4, 2, 0]) / S
q[m4, 1] = (Rf[m4, 1, 2] + Rf[m4, 2, 1]) / S
q[m4, 2] = 0.25 * S
return q.reshape(shape + (4,)).astype(np.float32)
def global_skel_state_from_pose_data(
pose_data: Dict[str, Any], frame_indices: List[int], person_k: int,
NJ: int, *, joint_coords_y_down: bool = True,
) -> np.ndarray:
"""Build per-frame skel_state from stored pred_global_rots + pred_joint_coords,
bypassing rig.forward. Returns (N, NJ, 8) in METERS, MHR-native frame.
pred_global_rots are MHR-native (no y/z flip). For MHR, pred_joint_coords
are stored y-down (post-flip), so un-flip when `joint_coords_y_down=True`.
External skeletons (Kimodo) store y-up already → pass False. Scale
defaults to 1 (rig scale isn't preserved in pose_data; close to 1 for
typical body poses)."""
frames = pose_data["frames"]
N = len(frame_indices)
rotmat = np.zeros((N, NJ, 3, 3), dtype=np.float32)
coords = np.zeros((N, NJ, 3), dtype=np.float32)
for t_idx, t in enumerate(frame_indices):
person = frames[t][person_k]
rotmat[t_idx] = np.asarray(person["pred_global_rots"], dtype=np.float32)[:NJ]
coords[t_idx] = np.asarray(person["pred_joint_coords"], dtype=np.float32)[:NJ]
if joint_coords_y_down:
coords[..., 1] *= -1.0
coords[..., 2] *= -1.0
quat = rotmat_to_quat_np(rotmat)
skel_state = np.zeros((N, NJ, 8), dtype=np.float32)
skel_state[..., :3] = coords
skel_state[..., 3:7] = quat
skel_state[..., 7] = 1.0
return skel_state
def bind_skel_state(model: Any, pose_data: Optional[Dict[str, Any]] = None) -> np.ndarray:
"""Rig FK with all-zero params -> bind-pose global skel state (NJ, 8) in cm.
Inverse of `lbs_inverse_bind_pose` modulo precision; used as bones' static
TRS so the rest mesh looks correct with no animation playing. External
rig: convert override's `bind_global_m` from m → cm to match this contract."""
override = _get_skeleton_override(pose_data)
if override is not None:
bind_m = np.asarray(override["bind_global_m"], dtype=np.float32).copy()
bind_m[:, :3] *= 100.0
return bind_m
zero_mp = np.zeros((1, 204), dtype=np.float32)
return global_skel_state_per_frame(model, zero_mp)[0]
def ibp_from_bind_global(bind_skel_state_m: np.ndarray) -> np.ndarray:
"""Inverse-bind MAT4 by inverting the rig's bind global (meters). Guarantees
IBP[j] = inverse(FK over bind local TRS) — exactly what glTF skinning
needs given bones default to the bind local TRS. Returns (NJ, 4, 4)
column-major."""
NJ = bind_skel_state_m.shape[0]
t = bind_skel_state_m[:, :3].astype(np.float32)
q = bind_skel_state_m[:, 3:7].astype(np.float32)
s = bind_skel_state_m[:, 7].astype(np.float32)
# Forward bind M = T * R * S (uniform scale): [s*R | t; 0 | 1]
R = _quat_to_mat3_np(q)
M = np.zeros((NJ, 4, 4), dtype=np.float32)
M[:, :3, :3] = R * s[:, None, None]
M[:, :3, 3] = t
M[:, 3, 3] = 1.0
# fp64 4x4 invert per joint for stability, back to fp32.
M_inv = np.linalg.inv(M.astype(np.float64)).astype(np.float32)
# glTF MAT4 accessor is column-major.
return M_inv.transpose(0, 2, 1).astype(np.float32)
def _local_trs_per_frame(
rig_static: Dict[str, np.ndarray], mhr_model_params: np.ndarray,
) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
"""Per-frame (local_t[N, 127, 3], local_q[N, 127, 4 xyzw], local_s[N, 127])
in rig-native frame, meters. Mirrors mhr_rig.forward without skinning."""
pt = rig_static["param_transform"] # (889, 249) = (127*7, 204+45)
t_off = rig_static["joint_translation_offsets"] # (127, 3) cm
q_pre = rig_static["joint_prerotations"] # (127, 4)
NJ = rig_static["num_joints"]
N = mhr_model_params.shape[0]
cat_in = np.zeros((N, pt.shape[1]), dtype=np.float32)
cat_in[:, :mhr_model_params.shape[1]] = mhr_model_params.astype(np.float32)
# joint_parameters[n, d] = sum_i pt[d, i] * cat_in[n, i]
jp = cat_in @ pt.T
jp = jp.reshape(N, NJ, 7)
local_t_cm = jp[..., :3] + t_off[None]
local_q_raw = _euler_xyz_to_quat_np(jp[..., 3:6])
local_q = _quat_multiply_np(q_pre[None], local_q_raw)
local_s = np.exp(jp[..., 6] * _LN2)
# rig-cm -> glTF-meters
return (local_t_cm * 0.01).astype(np.float32), local_q.astype(np.float32), local_s.astype(np.float32)
def _ibp_to_mat4(ibp_skel: np.ndarray) -> np.ndarray:
"""(127, 8) IBP skel-state -> (127, 4, 4) column-major MAT4, t in meters."""
NJ = ibp_skel.shape[0]
t = ibp_skel[:, :3] * 0.01 # cm -> m
q = ibp_skel[:, 3:7]
s = ibp_skel[:, 7]
R = _quat_to_mat3_np(q)
M = np.zeros((NJ, 4, 4), dtype=np.float32)
M[:, :3, :3] = R * s[:, None, None]
M[:, :3, 3] = t
M[:, 3, 3] = 1.0
return M.transpose(0, 2, 1).astype(np.float32)
def uv_sphere_unit(n_lat: int = 9, n_lon: int = 16) -> Tuple[np.ndarray, np.ndarray]:
"""Unit UV sphere, poles ±Y. `n_lat` kept ODD by default so one ring
lands at the equator. Default (9, 16) gives 146 verts / 288 faces — n_lon
matches the 16-segment cylinder used by capsule limbs AND the equator
ring aligns 1-to-1 with the cylinder end ring, so silhouettes meet flush."""
verts: List[List[float]] = [[0.0, -1.0, 0.0]] # south pole at index 0
for i in range(1, n_lat + 1):
lat = -0.5 * np.pi + np.pi * i / (n_lat + 1)
y = float(np.sin(lat))
r = float(np.cos(lat))
for k in range(n_lon):
phi = 2.0 * np.pi * k / n_lon
verts.append([r * float(np.cos(phi)), y, r * float(np.sin(phi))])
north_idx = len(verts)
verts.append([0.0, 1.0, 0.0])
faces: List[List[int]] = []
# South cap — winding gives -Y outward normal.
south_ring = 1
for k in range(n_lon):
a = south_ring + k
b = south_ring + (k + 1) % n_lon
faces.append([0, a, b])
# Inter-ring quads, outward radial.
for i in range(n_lat - 1):
rl = 1 + i * n_lon
rh = 1 + (i + 1) * n_lon
for k in range(n_lon):
a = rl + k
b = rl + (k + 1) % n_lon
c = rh + (k + 1) % n_lon
d = rh + k
faces.append([a, c, b])
faces.append([a, d, c])
# North cap — winding gives +Y outward normal.
rL = 1 + (n_lat - 1) * n_lon
for k in range(n_lon):
a = rL + k
b = rL + (k + 1) % n_lon
faces.append([north_idx, b, a])
return (np.asarray(verts, dtype=np.float32),
np.asarray(faces, dtype=np.uint32))
def flat_shade_mesh(
verts: np.ndarray, faces: np.ndarray, joints: np.ndarray, weights: np.ndarray,
) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
"""Smooth -> flat by duplicating verts per face; each triangle gets 3
unique verts sharing its face normal. Skinning attrs duplicated alongside."""
F = faces.shape[0]
new_v = np.zeros((F * 3, 3), dtype=np.float32)
new_n = np.zeros((F * 3, 3), dtype=np.float32)
new_j = np.zeros((F * 3, 4), dtype=np.uint16)
new_w = np.zeros((F * 3, 4), dtype=np.float32)
new_f = np.arange(F * 3, dtype=np.uint32).reshape(F, 3)
v0 = verts[faces[:, 0]]
v1 = verts[faces[:, 1]]
v2 = verts[faces[:, 2]]
fn = np.cross(v1 - v0, v2 - v0)
fn_len = np.linalg.norm(fn, axis=1, keepdims=True)
fn = np.where(fn_len > 1e-8, fn / np.maximum(fn_len, 1e-12), np.array([[0.0, 1.0, 0.0]]))
for k in range(3):
new_v[k::3] = verts[faces[:, k]]
new_n[k::3] = fn
new_j[k::3] = joints[faces[:, k]]
new_w[k::3] = weights[faces[:, k]]
return new_v, new_n, new_f, new_j, new_w
def smooth_shade_mesh(
verts: np.ndarray, faces: np.ndarray, joints: np.ndarray, weights: np.ndarray,
) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
"""Area-weighted per-vertex normals (smooth shading). Geometry, skinning,
indexing pass through unchanged so vertex colors stay aligned. Orphan
verts get +Y fallback."""
Nv = int(verts.shape[0])
v0 = verts[faces[:, 0]]
v1 = verts[faces[:, 1]]
v2 = verts[faces[:, 2]]
fn = np.cross(v1 - v0, v2 - v0).astype(np.float32)
vn = np.zeros((Nv, 3), dtype=np.float32)
np.add.at(vn, faces[:, 0], fn)
np.add.at(vn, faces[:, 1], fn)
np.add.at(vn, faces[:, 2], fn)
ln = np.linalg.norm(vn, axis=1, keepdims=True)
vn = np.where(ln > 1e-8, vn / np.maximum(ln, 1e-12), np.array([[0.0, 1.0, 0.0]], dtype=np.float32))
return (
verts.astype(np.float32),
vn.astype(np.float32),
faces.astype(np.uint32),
joints,
weights,
)
def rotation_align(from_vec: np.ndarray, to_vec: np.ndarray) -> np.ndarray:
"""3x3 rotation mapping unit `from_vec` to unit `to_vec`."""
cos_t = float(np.dot(from_vec, to_vec))
cross = np.cross(from_vec, to_vec)
sin_t = float(np.linalg.norm(cross))
if sin_t < 1e-8:
if cos_t > 0:
return np.eye(3, dtype=np.float32)
# Anti-aligned: 180° around any perpendicular. For ≈+Y, use X.
return np.diag([1.0, -1.0, -1.0]).astype(np.float32)
axis = cross / sin_t
K = np.array([
[0.0, -axis[2], axis[1]],
[axis[2], 0.0, -axis[0]],
[-axis[1], axis[0], 0.0],
], dtype=np.float32)
return (np.eye(3, dtype=np.float32) + sin_t * K + (1.0 - cos_t) * (K @ K)).astype(np.float32)
def make_lit_material(
roughness: float = 0.85, double_sided: bool = False, opacity: float = 1.0,
) -> dict:
"""Lit PBR material using vertex COLOR_0 multiplicatively. KHR_materials_unlit
is intentionally off so viewer lighting reveals surface form. metallic=0
keeps the surface dielectric so vertex colors stay readable. roughness=0.85
suits dense rainbow body meshes; 0.3 matches SCAIL-Pose's glossy rig look.
opacity < 1 switches to alpha-blend (e.g. see-through body mesh over bones)."""
a = float(max(0.0, min(1.0, opacity)))
mat = {
"pbrMetallicRoughness": {
"baseColorFactor": [1.0, 1.0, 1.0, a],
"metallicFactor": 0.0,
"roughnessFactor": float(max(0.0, min(1.0, roughness))),
},
}
if a < 1.0:
mat["alphaMode"] = "BLEND"
if double_sided:
mat["doubleSided"] = True
return mat
# OpenPose 18-keypoint viz (independent of MHR rig — uses pred_keypoints_3d,
# the model's regressed surface keypoints).
OPENPOSE_18_NAMES = (
"Nose", "Neck", "RShoulder", "RElbow", "RWrist",
"LShoulder", "LElbow", "LWrist", "RHip", "RKnee",
"RAnkle", "LHip", "LKnee", "LAnkle", "REye",
"LEye", "REar", "LEar",
)
# COCO-18 OpenPose -> MHR70. Subset of `MHR70_TO_OPENPOSE` in
# comfy/ldm/sam3d/mhr70.py (no toes/heels).
OPENPOSE18_TO_MHR70 = np.array([
0, # 0 Nose
69, # 1 Neck
6, # 2 RShoulder
8, # 3 RElbow
41, # 4 RWrist
5, # 5 LShoulder
7, # 6 LElbow
62, # 7 LWrist
10, # 8 RHip
12, # 9 RKnee
14, # 10 RAnkle
9, # 11 LHip
11, # 12 LKnee
13, # 13 LAnkle
2, # 14 REye
1, # 15 LEye
4, # 16 REar
3, # 17 LEar
], dtype=np.int64)
# OpenPose limb pairs + rainbow palette delegate to the canonical DWPose tables
# carried by `comfy_extras.pose.keypoint_draw.KeypointDraw` (also used by nodes_sdpose).
# `body_limbSeq` is 1-indexed there; we use 0-indexed throughout this module.
from comfy_extras.pose.keypoint_draw import KeypointDraw as _KeypointDraw
_KD = _KeypointDraw()
OPENPOSE_18_PAIRS = tuple((a - 1, b - 1) for a, b in _KD.body_limbSeq)
OPENPOSE_RAINBOW_18 = (np.array(_KD.colors, dtype=np.float32) / 255.0)
# SCAIL-Pose limb palette (17 limbs in `OPENPOSE_18_PAIRS` order): warm =
# right side, cool = left, grey centerline, pink/violet face. Matches
# ComfyUI-SCAIL-Pose's `nlf_render.py::ordered_colors_255`.
SCAIL_LIMB_COLORS_17 = (np.array([
[255, 0, 0], # 0 Neck → R.Shoulder (Red)
[ 0, 255, 255], # 1 Neck → L.Shoulder (Cyan)
[255, 85, 0], # 2 R.Shoulder → R.Elbow (Orange)
[255, 170, 0], # 3 R.Elbow → R.Wrist (Golden Orange)
[ 0, 170, 255], # 4 L.Shoulder → L.Elbow (Sky Blue)
[ 0, 85, 255], # 5 L.Elbow → L.Wrist (Medium Blue)
[180, 255, 0], # 6 Neck → R.Hip (Yellow-Green)
[ 0, 255, 0], # 7 R.Hip → R.Knee (Bright Green)
[ 0, 255, 85], # 8 R.Knee → R.Ankle (Light Green-Blue)
[ 0, 0, 255], # 9 Neck → L.Hip (Pure Blue)
[ 85, 0, 255], # 10 L.Hip → L.Knee (Purple-Blue)
[170, 0, 255], # 11 L.Knee → L.Ankle (Medium Purple)
[150, 150, 150], # 12 Neck → Nose (Grey)
[255, 0, 170], # 13 Nose → R.Eye (Pink-Magenta)
[ 50, 0, 255], # 14 R.Eye → R.Ear (Dark Violet)
[255, 0, 170], # 15 Nose → L.Eye (Pink-Magenta)
[ 50, 0, 255], # 16 L.Eye → L.Ear (Dark Violet)
], dtype=np.float32) / 255.0)
def _scail_keypoint_colors_18(limb_pairs: Tuple[Tuple[int, int], ...] = None) -> np.ndarray:
"""18 keypoint colors derived from 17 SCAIL limb colors. Each kp inherits
the first limb where it's the distal endpoint; mid-grey otherwise (only
the neck/nose root in OpenPose-18)."""
pairs = limb_pairs if limb_pairs is not None else OPENPOSE_18_PAIRS
out = np.tile(np.array([0.6, 0.6, 0.6], dtype=np.float32), (18, 1))
for limb_i, (_, b) in enumerate(pairs):
if (out[b] == 0.6).all():
out[b] = SCAIL_LIMB_COLORS_17[limb_i]
return out
SCAIL_KEYPOINT_COLORS_18 = _scail_keypoint_colors_18()
# OpenPose hand: 21 kp per hand = wrist + 5 fingers × 4 joints (proximal→distal).
# MHR70 stores fingers as (tip, joint1, joint2, joint3=MCP) so we reverse each
# 4-tuple. See comfy/ldm/sam3d/mhr70.py.
OPENPOSE_HAND21_NAMES = (
"wrist",
"thumb1", "thumb2", "thumb3", "thumb4",
"index1", "index2", "index3", "index4",
"middle1", "middle2", "middle3", "middle4",
"ring1", "ring2", "ring3", "ring4",
"pinky1", "pinky2", "pinky3", "pinky4",
)
OPENPOSE_HAND21_TO_MHR70_R = np.array([
41, # 0 right_wrist
24, 23, 22, 21, # thumb base→tip
28, 27, 26, 25, # index
32, 31, 30, 29, # middle
36, 35, 34, 33, # ring
40, 39, 38, 37, # pinky
], dtype=np.int64)
OPENPOSE_HAND21_TO_MHR70_L = np.array([
62, # 0 left_wrist
45, 44, 43, 42, # thumb base→tip
49, 48, 47, 46, # index
53, 52, 51, 50, # middle
57, 56, 55, 54, # ring
61, 60, 59, 58, # pinky
], dtype=np.int64)
# OpenPose hand limbs: 5 chains × 4 bones, delegated to KeypointDraw.hand_edges.
OPENPOSE_HAND_PAIRS = tuple(tuple(e) for e in _KD.hand_edges)
# OpenPose hand colors (poseParameters.cpp::HAND_COLORS_RENDER): wrist grey,
# then per-finger base→tip gradient red/yellow/green/cyan/magenta.
OPENPOSE_HAND_COLORS_21 = (np.array([
[100, 100, 100],
[100, 0, 0], [150, 0, 0], [200, 0, 0], [255, 0, 0],
[100, 100, 0], [150, 150, 0], [200, 200, 0], [255, 255, 0],
[ 0, 100, 50], [ 0, 150, 75], [ 0, 200, 100], [ 0, 255, 125],
[ 0, 100, 100], [ 0, 150, 150], [ 0, 200, 200], [ 0, 255, 255],
[100, 0, 100], [150, 0, 150], [200, 0, 200], [255, 0, 255],
], dtype=np.float32) / 255.0)
# DWPose: solid blue hand dots, rainbow per-finger bones (matches
# controlnet_aux/dwpose/util.py::draw_handpose).
DWPOSE_HAND_COLORS_21 = np.tile(
np.array([[0.0, 0.0, 1.0]], dtype=np.float32), (21, 1)
)
# Face landmarks from the MHR rig (option `face_source="rig"`).
# MHR has no face bones — face deforms via expr_params morphs — so landmarks
# are sourced from `pred_vertices` at fixed vertex IDs picked by NN against
# anatomically-plausible target xyz in canonical Y-up. Iterate visually in
# Blender and tweak targets if landmarks land off-surface.
# (name, target_xyz) in MHR canonical Y-up meters.
FACE_LANDMARK_TARGETS: Tuple[Tuple[str, Tuple[float, float, float]], ...] = (
# Brows — 3 per side, outer→inner
("r_brow_outer", (-0.058, 1.690, 0.090)),
("r_brow_mid", (-0.040, 1.695, 0.105)),
("r_brow_inner", (-0.020, 1.692, 0.115)),
("l_brow_inner", (+0.020, 1.692, 0.115)),
("l_brow_mid", (+0.040, 1.695, 0.105)),
("l_brow_outer", (+0.058, 1.690, 0.090)),
# Right eye — outer/top/inner/bottom
("r_eye_outer", (-0.058, 1.660, 0.085)),
("r_eye_top", (-0.040, 1.673, 0.090)),
("r_eye_inner", (-0.022, 1.665, 0.092)),
("r_eye_bot", (-0.040, 1.652, 0.090)),
# Left eye
("l_eye_outer", (+0.058, 1.660, 0.085)),
("l_eye_top", (+0.040, 1.673, 0.090)),
("l_eye_inner", (+0.022, 1.665, 0.092)),
("l_eye_bot", (+0.040, 1.652, 0.090)),
# Nose
("nose_bridge", (0.000, 1.660, 0.110)),
("nose_mid", (0.000, 1.620, 0.125)),
("nose_tip", (0.000, 1.585, 0.135)),
("nostril_r", (-0.014, 1.580, 0.115)),
("nostril_l", (+0.014, 1.580, 0.115)),
# Mouth — 4 outer-lip points
("mouth_r_corner", (-0.030, 1.540, 0.105)),
("upper_lip_mid", (+0.000, 1.555, 0.115)),
("mouth_l_corner", (+0.030, 1.540, 0.105)),
("lower_lip_mid", (+0.000, 1.530, 0.110)),
# Chin + jaw line — Y raised so NN search lands on chin tip / jaw underside
# (above the jaw-neck boundary at y~1.47) instead of throat verts.
("chin", (0.000, 1.498, 0.108)),
("r_jaw_low", (-0.038, 1.512, 0.100)),
("r_jaw_mid", (-0.062, 1.535, 0.080)),
("r_jaw_high", (-0.078, 1.562, 0.060)),
("l_jaw_low", (+0.038, 1.512, 0.100)),
("l_jaw_mid", (+0.062, 1.535, 0.080)),
("l_jaw_high", (+0.078, 1.562, 0.060)),
)
# Solid white face landmarks — matches DWPose, reads cleanly against the
# rainbow body palette.
def _face_landmark_colors() -> np.ndarray:
white = np.array([1.0, 1.0, 1.0], dtype=np.float32)
return np.tile(white, (len(FACE_LANDMARK_TARGETS), 1))
FACE_LANDMARK_COLORS: np.ndarray = _face_landmark_colors()
def select_face_landmark_vert_ids(
canonical_positions: np.ndarray,
face_mask: Optional[np.ndarray] = None,
) -> np.ndarray:
"""Pick MHR head vertex IDs for each `FACE_LANDMARK_TARGETS` by NN in
canonical positions. Filter: `face_mask` (verts that deform with any of
the 72 expression axes) if available — keeps chin/jaw search off the
neck. Otherwise a position bbox (less reliable; throat verts sometimes
pull chin targets)."""
P = np.asarray(canonical_positions, dtype=np.float32).reshape(-1, 3)
if face_mask is not None and np.asarray(face_mask).any():
valid = np.where(np.asarray(face_mask).reshape(-1))[0]
else:
head_mask = (P[:, 1] > 1.47) & (np.abs(P[:, 0]) < 0.11) & (P[:, 2] > 0.04)
valid = np.where(head_mask)[0]
if valid.size == 0:
raise ValueError(
"select_face_landmark_vert_ids: no head verts matched the "
"canonical filter — check that pose_data.canonical_colors "
"holds the MHR rest-pose positions / face_mask."
)
P_valid = P[valid]
out = np.empty(len(FACE_LANDMARK_TARGETS), dtype=np.int64)
for i, (_, xyz) in enumerate(FACE_LANDMARK_TARGETS):
target = np.asarray(xyz, dtype=np.float32)
d2 = np.sum((P_valid - target) ** 2, axis=1)
out[i] = int(valid[int(d2.argmin())])
return out