"""GLB export for SAM 3D Body pose_data. Mode: skeletal — rebuilds the MHR 127-bone rig. Per-frame local TRS comes from re-running param_transform on saved mhr_model_params; rest verts from a zero-pose forward with the person's shape_params; sparse triplet skinning is compacted to glTF's max-4-influences form; facial expression is re-exposed as 72 morph targets driven by expr_params. pred_vertices/pred_cam_t are camera-y-down — un-flipped here so the GLB lives in glTF-spec Y-up. Pose correctives are dropped (glTF skinning can't represent them); deformation at extreme joint angles will differ from the SAM3DBody renderer by the corrective amount. """ from __future__ import annotations import json import struct from typing import Any, Dict, List, Optional, Tuple import numpy as np import torch from comfy_extras.sam3d_body.rasterizer import rainbow_colors_from_canonical # fp32-rounded ln(2). Used as `exp(x * _LN2)` to compute 2**x bit-identically # to the rig's own `torch.exp(jp[..., 6:7] * _LN2)` _LN2 = 0.6931471824645996 # Quaternion / rotation helpers (xyzw convention, matching MHR rig) def _euler_xyz_to_quat_np(angles: np.ndarray) -> np.ndarray: """(roll, pitch, yaw) -> (x, y, z, w). Mirrors mhr_rig._euler_xyz_to_quat.""" roll, pitch, yaw = angles[..., 0], angles[..., 1], angles[..., 2] cy, sy = np.cos(yaw * 0.5), np.sin(yaw * 0.5) cp, sp = np.cos(pitch * 0.5), np.sin(pitch * 0.5) cr, sr = np.cos(roll * 0.5), np.sin(roll * 0.5) x = sr * cp * cy - cr * sp * sy y = cr * sp * cy + sr * cp * sy z = cr * cp * sy - sr * sp * cy w = cr * cp * cy + sr * sp * sy return np.stack([x, y, z, w], axis=-1) def _quat_multiply_np(q1: np.ndarray, q2: np.ndarray) -> np.ndarray: """xyzw product. Mirrors mhr_rig._quat_multiply.""" x1, y1, z1, w1 = q1[..., 0], q1[..., 1], q1[..., 2], q1[..., 3] x2, y2, z2, w2 = q2[..., 0], q2[..., 1], q2[..., 2], q2[..., 3] x = w1 * x2 + x1 * w2 + y1 * z2 - z1 * y2 y = w1 * y2 - x1 * z2 + y1 * w2 + z1 * x2 z = w1 * z2 + x1 * y2 - y1 * x2 + z1 * w2 w = w1 * w2 - x1 * x2 - y1 * y2 - z1 * z2 return np.stack([x, y, z, w], axis=-1) def _quat_rotate_np(q: np.ndarray, v: np.ndarray) -> np.ndarray: """Rotate v by unit xyzw q. Mirrors mhr_rig._quat_rotate.""" axis = q[..., :3] r = q[..., 3:4] av = np.cross(axis, v, axis=-1) aav = np.cross(axis, av, axis=-1) return v + 2.0 * (av * r + aav) def _skel_state_inverse_np(skel_state: np.ndarray) -> np.ndarray: """Inverse of (t, q, s). Normalizes q first so non-unit input is OK.""" t = skel_state[..., :3] q = skel_state[..., 3:7] s = skel_state[..., 7:8] q = q / np.maximum(np.linalg.norm(q, axis=-1, keepdims=True), 1e-12) s_safe = np.where(np.abs(s) > 1e-12, s, 1.0) s_inv = 1.0 / s_safe q_inv = np.concatenate([-q[..., :3], q[..., 3:4]], axis=-1) t_inv = -s_inv * _quat_rotate_np(q_inv, t) return np.concatenate([t_inv, q_inv, s_inv], axis=-1) def _skel_state_compose_np(s1: np.ndarray, s2: np.ndarray) -> np.ndarray: """s1 ∘ s2. Mirrors mhr_rig._skel_multiply.""" t1 = s1[..., :3] q1 = s1[..., 3:7] sc1 = s1[..., 7:8] t2 = s2[..., :3] q2 = s2[..., 3:7] sc2 = s2[..., 7:8] # Defensive normalization to match the rig's `F.normalize` calls. q1 = q1 / np.maximum(np.linalg.norm(q1, axis=-1, keepdims=True), 1e-12) q2 = q2 / np.maximum(np.linalg.norm(q2, axis=-1, keepdims=True), 1e-12) t_res = t1 + sc1 * _quat_rotate_np(q1, t2) q_res = _quat_multiply_np(q1, q2) s_res = sc1 * sc2 return np.concatenate([t_res, q_res, s_res], axis=-1) def _gaussian_smooth_time(arr: np.ndarray, window: int) -> np.ndarray: """Edge-replicate Gaussian smoothing along axis 0 (time); sigma = window/4. Endpoints replicate so they aren't pulled toward zero. Returns float64.""" a = np.asarray(arr, dtype=np.float64) n = a.shape[0] half = window // 2 sigma = max(0.5, window / 4.0) x = np.arange(-half, half + 1, dtype=np.float64) kernel = np.exp(-x * x / (2.0 * sigma * sigma)) kernel = kernel / kernel.sum() padded = np.concatenate([ np.broadcast_to(a[:1], (half,) + a.shape[1:]), a, np.broadcast_to(a[-1:], (half,) + a.shape[1:]), ], axis=0) out = np.zeros_like(a) for k, w in enumerate(kernel): out += w * padded[k:k + n] return out def gaussian_smooth_quats(q_seq: np.ndarray, window: int) -> np.ndarray: """Gaussian-smooth a (N, NJ, 4) quaternion sequence along time. Sign-aligns per joint first, convolves per-component, renormalizes. Suppresses multi- frame bone spikes at extreme poses without needing the upstream Smooth node.""" if window <= 1 or q_seq.shape[0] < 2: return q_seq out = _gaussian_smooth_time(quat_sign_fix_per_joint(q_seq), window) norms = np.linalg.norm(out, axis=-1, keepdims=True) return (out / np.maximum(norms, 1e-12)).astype(np.float32) def gaussian_smooth_positions(seq: np.ndarray, window: int) -> np.ndarray: """Gaussian-smooth a (N, K, 3) position sequence along time (edge-replicate padding). Used to calm jittery keypoint tracks before the openpose rig derives sphere translations + limb TRS from them.""" if window <= 1 or seq.shape[0] < 2: return seq return _gaussian_smooth_time(seq, window).astype(np.float32) def quat_sign_fix_per_joint(q_seq: np.ndarray) -> np.ndarray: """Walk (N, NJ, 4) along time, flip sign whenever consecutive frames sit on opposite hemispheres. Eliminates long-path slerp glitches (mid-anim cartwheel flip). fp64 to avoid drift; normalizes input defensively.""" out = np.array(q_seq, dtype=np.float64, copy=True) norms = np.linalg.norm(out, axis=-1, keepdims=True) out = out / np.maximum(norms, 1e-12) for t in range(1, out.shape[0]): dots = (out[t - 1] * out[t]).sum(axis=-1) sign = np.where(dots < 0.0, -1.0, 1.0)[:, None] out[t] = out[t] * sign return out.astype(np.float32) def bone_locals_from_globals(rig_global: np.ndarray, parents: np.ndarray) -> np.ndarray: """Globals (N, NJ, 8) + parents -> per-bone local TRS (N, NJ, 8) such that FK over (parents, bone_local) reproduces rig_global. local = inverse(parent_global) ∘ child_global makes this robust to hierarchy- convention mismatches: glTF FK gives back exactly rig_global even if `parents` doesn't match the rig's pmi-walk.""" N, NJ, _ = rig_global.shape bone_local = np.zeros_like(rig_global) for j in range(NJ): p = int(parents[j]) if 0 <= p < NJ and p != j: parent_g = rig_global[:, p] parent_g_inv = _skel_state_inverse_np(parent_g) bone_local[:, j] = _skel_state_compose_np(parent_g_inv, rig_global[:, j]) else: bone_local[:, j] = rig_global[:, j] return bone_local def _quat_to_mat3_np(q: np.ndarray) -> np.ndarray: x, y, z, w = q[..., 0], q[..., 1], q[..., 2], q[..., 3] n = x * x + y * y + z * z + w * w s = np.where(n > 0, 2.0 / n, 0.0) R = np.empty(q.shape[:-1] + (3, 3), dtype=q.dtype) R[..., 0, 0] = 1 - s * (y * y + z * z) R[..., 0, 1] = s * (x * y - z * w) R[..., 0, 2] = s * (x * z + y * w) R[..., 1, 0] = s * (x * y + z * w) R[..., 1, 1] = 1 - s * (x * x + z * z) R[..., 1, 2] = s * (y * z - x * w) R[..., 2, 0] = s * (x * z - y * w) R[..., 2, 1] = s * (y * z + x * w) R[..., 2, 2] = 1 - s * (x * x + y * y) return R def collect_tracks(pose_data: Dict[str, Any], track_index: int) -> List[Tuple[int, List[int]]]: """List of (person_index, frame_indices). track_index == -1 means every present track; empty tracks are dropped. Same person index across frames is assumed same subject (Smooth/Predict enforce this on tracked bboxes).""" frames = pose_data["frames"] max_p = max((len(f) for f in frames), default=0) if max_p == 0: return [] if track_index >= 0: if track_index >= max_p: return [] wanted = [track_index] else: wanted = list(range(max_p)) tracks: List[Tuple[int, List[int]]] = [] for k in wanted: valid = [t for t, fr in enumerate(frames) if k < len(fr)] if valid: tracks.append((k, valid)) return tracks # glTF binary builder _FLOAT = 5126 _USHORT = 5123 _UINT = 5125 _BYTE_ARRAY = 34962 _BYTE_ELEMENT = 34963 def _pad4(buf: bytes, fill: bytes = b"\x00") -> bytes: n = (4 - (len(buf) % 4)) % 4 return buf + fill * n class GLBWriter: """Builds a single .glb from incremental accessor/bufferView additions.""" def __init__(self) -> None: self._buffer = bytearray() self.bufferViews: List[dict] = [] self.accessors: List[dict] = [] def _add_view(self, data: bytes, *, target: Optional[int] = None) -> int: offset = len(self._buffer) self._buffer += data # 4-byte align so subsequent views start on a boundary. pad = (4 - (offset + len(data)) % 4) % 4 if pad: self._buffer += b"\x00" * pad view = {"buffer": 0, "byteOffset": offset, "byteLength": len(data)} if target is not None: view["target"] = target self.bufferViews.append(view) return len(self.bufferViews) - 1 def add_vec3_f32(self, arr: np.ndarray, *, target: int = _BYTE_ARRAY) -> int: a = np.ascontiguousarray(arr, dtype=np.float32) view_idx = self._add_view(a.tobytes(), target=target) self.accessors.append({ "bufferView": view_idx, "componentType": _FLOAT, "count": a.shape[0], "type": "VEC3", "min": a.min(axis=0).tolist() if a.shape[0] else [0.0, 0.0, 0.0], "max": a.max(axis=0).tolist() if a.shape[0] else [0.0, 0.0, 0.0], }) return len(self.accessors) - 1 def add_vec3_f32_no_minmax(self, arr: np.ndarray) -> int: """Morph-target POSITIONs: spec lets us skip min/max, avoiding a per-frame delta bbox.""" a = np.ascontiguousarray(arr, dtype=np.float32) view_idx = self._add_view(a.tobytes(), target=_BYTE_ARRAY) self.accessors.append({ "bufferView": view_idx, "componentType": _FLOAT, "count": a.shape[0], "type": "VEC3", }) return len(self.accessors) - 1 def add_indices_u32(self, arr: np.ndarray) -> int: a = np.ascontiguousarray(arr, dtype=np.uint32).reshape(-1) view_idx = self._add_view(a.tobytes(), target=_BYTE_ELEMENT) self.accessors.append({ "bufferView": view_idx, "componentType": _UINT, "count": int(a.size), "type": "SCALAR", }) return len(self.accessors) - 1 def add_scalar_f32(self, arr: np.ndarray) -> int: a = np.ascontiguousarray(arr, dtype=np.float32).reshape(-1) view_idx = self._add_view(a.tobytes()) self.accessors.append({ "bufferView": view_idx, "componentType": _FLOAT, "count": int(a.size), "type": "SCALAR", "min": [float(a.min())] if a.size else [0.0], "max": [float(a.max())] if a.size else [0.0], }) return len(self.accessors) - 1 def add_scalar_f32_flat(self, arr: np.ndarray, count: int) -> int: """Animation-output scalars: `count` is keyframes, not floats. Morph- target weight tracks store N_morph weights per keyframe as flat float32 with count=N_keyframes.""" a = np.ascontiguousarray(arr, dtype=np.float32).reshape(-1) view_idx = self._add_view(a.tobytes()) self.accessors.append({ "bufferView": view_idx, "componentType": _FLOAT, "count": int(count), "type": "SCALAR", }) return len(self.accessors) - 1 def add_vec3_f32_anim(self, arr: np.ndarray) -> int: a = np.ascontiguousarray(arr, dtype=np.float32) view_idx = self._add_view(a.tobytes()) self.accessors.append({ "bufferView": view_idx, "componentType": _FLOAT, "count": a.shape[0], "type": "VEC3", }) return len(self.accessors) - 1 def add_vec4_f32(self, arr: np.ndarray) -> int: a = np.ascontiguousarray(arr, dtype=np.float32) view_idx = self._add_view(a.tobytes()) self.accessors.append({ "bufferView": view_idx, "componentType": _FLOAT, "count": a.shape[0], "type": "VEC4", }) return len(self.accessors) - 1 def add_mat4_f32(self, arr: np.ndarray) -> int: a = np.ascontiguousarray(arr, dtype=np.float32) view_idx = self._add_view(a.tobytes()) self.accessors.append({ "bufferView": view_idx, "componentType": _FLOAT, "count": a.shape[0], "type": "MAT4", }) return len(self.accessors) - 1 def add_joints_u16(self, arr: np.ndarray) -> int: a = np.ascontiguousarray(arr, dtype=np.uint16) view_idx = self._add_view(a.tobytes(), target=_BYTE_ARRAY) self.accessors.append({ "bufferView": view_idx, "componentType": _USHORT, "count": a.shape[0], "type": "VEC4", }) return len(self.accessors) - 1 def add_weights_f32(self, arr: np.ndarray) -> int: a = np.ascontiguousarray(arr, dtype=np.float32) view_idx = self._add_view(a.tobytes(), target=_BYTE_ARRAY) self.accessors.append({ "bufferView": view_idx, "componentType": _FLOAT, "count": a.shape[0], "type": "VEC4", }) return len(self.accessors) - 1 def to_bytes(self, gltf: dict) -> bytes: gltf["buffers"] = [{"byteLength": len(self._buffer)}] gltf["bufferViews"] = self.bufferViews gltf["accessors"] = self.accessors json_bytes = json.dumps(gltf, separators=(",", ":")).encode("utf-8") json_padded = _pad4(json_bytes, fill=b" ") bin_padded = _pad4(bytes(self._buffer)) total = 12 + 8 + len(json_padded) + 8 + len(bin_padded) header = struct.pack("<4sII", b"glTF", 2, total) json_chunk = struct.pack(" np.ndarray: out = np.array(arr, dtype=np.float32, copy=True) out[..., 1] *= -1.0 out[..., 2] *= -1.0 return out _BAKEABLE_SHADERS = { "default", "rainbow", "rainbow_face_normal", "rainbow_face_semantic", } def bake_vertex_colors( canonical_colors: Optional[Dict[str, np.ndarray]], shader: str, rainbow_tilt_x_deg: float, rainbow_tilt_z_deg: float, pastel_mix: float, ) -> Optional[np.ndarray]: """Per-vertex RGB matching the renderer's shader preset, on the canonical mesh. Returns (N_v, 3) float32 in [0, 1], or None for `default` (let the viewer's default material handle shading).""" if shader == "default" or canonical_colors is None: return None positions = np.asarray(canonical_colors["positions"], dtype=np.float32) vcolor = rainbow_colors_from_canonical( positions, tilt_x_deg=rainbow_tilt_x_deg, tilt_z_deg=rainbow_tilt_z_deg, ).copy() if shader in ("rainbow_face_normal", "rainbow_face_semantic"): face_mask = canonical_colors.get("face_mask") if face_mask is not None and np.asarray(face_mask).any(): if shader == "rainbow_face_normal": norm = np.asarray(canonical_colors["norm"], dtype=np.float32) vcolor[face_mask] = norm[face_mask] else: # rainbow_face_semantic sem = np.asarray(canonical_colors["face_region_rgb"], dtype=np.float32) assigned = sem.sum(axis=1) > 0 vcolor[assigned] = sem[assigned] # SCAIL-style per-person pastel mix toward white (track 0 = full color). pm = max(0.0, min(1.0, float(pastel_mix))) if pm > 0: vcolor = vcolor * (1.0 - pm) + pm return np.clip(vcolor, 0.0, 1.0).astype(np.float32) def compute_pastel_mix(track_i: int, falloff: float) -> float: """SCAIL-style desaturation: track 0 = 0.0, track k = 1 - falloff^k.""" f = max(0.1, min(1.0, float(falloff))) return 0.0 if track_i == 0 else (1.0 - f ** track_i) def compute_normals(verts: np.ndarray, faces: np.ndarray) -> np.ndarray: v0 = verts[faces[:, 0]] v1 = verts[faces[:, 1]] v2 = verts[faces[:, 2]] fn = np.cross(v1 - v0, v2 - v0).astype(np.float32) vn = np.zeros_like(verts, dtype=np.float32) np.add.at(vn, faces[:, 0], fn) np.add.at(vn, faces[:, 1], fn) np.add.at(vn, faces[:, 2], fn) ln = np.linalg.norm(vn, axis=1, keepdims=True) ln[ln < 1e-8] = 1.0 return (vn / ln).astype(np.float32) def _parents_from_pmi(rig: Any) -> np.ndarray: """Parent index per joint from skel_pmi. pmi is (2, 266): row 0 = child, row 1 = parent, split into BFS levels by skel_pmi_buffer_sizes. Roots = -1.""" NJ = int(rig.NUM_JOINTS) pmi = rig.skel_pmi.cpu().numpy() sizes = rig.skel_pmi_buffer_sizes.cpu().numpy().tolist() parents = np.full(NJ, -1, dtype=np.int32) offset = 0 for sz in sizes: if sz > 0: src = pmi[0, offset:offset + sz].astype(np.int64) tgt = pmi[1, offset:offset + sz].astype(np.int64) parents[src] = tgt offset += sz return parents def _get_skeleton_override(pose_data: Optional[Dict[str, Any]]) -> Optional[Dict[str, Any]]: """Return ``_skeleton_override`` dict if present. Non-MHR skeletons supply this to bypass MHR rig extraction (see ComfyUI-Kimodo). Required keys: parents: (NJ,) int32, -1 = root bind_global_m: (NJ, 8) f32 — [t.xyz | q.xyzw | scale], meters lbs_compact_joints: (V, 8) uint16 — pre-compacted skin influences lbs_compact_weights: (V, 8) f32 lbs_compact_max_inf: int — actual max influences (≤ 8) rest_verts_m: (V, 3) f32 faces: (F, 3) uint32 Optional: per_frame_y_down: bool — set False if pred_joint_coords are already rig-native Y-up (kimodo). Default True (MHR). openpose18_joint_indices: (18, 2) int32 — body OpenPose-18 → joint index pair, resolved against per-frame `pred_joint_coords`. Each row is (joint_a, joint_b); b == -1 = single joint, else default midpoint of the two (lets producers approximate keypoints with no matching joint, e.g. Nose ≈ midpoint(LeftEye, RightEye)). Enables `SAM3DBody_ToGLB(mode="openpose")` on external rigs. openpose18_joint_weights: (18,) f32 — optional per-keypoint blend weight for the (a, b) mapping above. Position = w*joints[a] + (1-w)*joints[b] when b ≥ 0 (default w=0.5 → midpoint). Values outside [0, 1] EXTRAPOLATE past the line segment — used to approximate landmarks with no nearby joint pair (e.g. ears: w=2.0 along the eye→eye axis puts each ear one eye-distance outside the corresponding eye). Ignored for single-joint rows (b = -1). openpose_hand21_r_joint_indices: (21, 2) int32 — right-hand OpenPose-21 (wrist + 5 fingers × 4 joints, base→tip) → joint index pair. Required (alongside the L counterpart) for openpose mode with include_hands=True. openpose_hand21_l_joint_indices: (21, 2) int32 — left-hand counterpart. openpose_hand21_r_joint_weights: (21,) f32 — optional, same semantics as `openpose18_joint_weights`. openpose_hand21_l_joint_weights: (21,) f32 — optional, same as above. """ if pose_data is None: return None return pose_data.get("_skeleton_override") def extract_rig_static(model: Any, pose_data: Optional[Dict[str, Any]] = None) -> Dict[str, np.ndarray]: """Static rig buffers as numpy. If `pose_data` carries `_skeleton_override`, use that instead of MHR-specific `model.head_pose.mhr` buffers.""" override = _get_skeleton_override(pose_data) if override is not None: # External rig: caller pre-compacts skin and supplies bind global directly, # so we don't need MHR's PCA pose / expression bases. parents = np.asarray(override["parents"], dtype=np.int32) rest_v = np.asarray(override["rest_verts_m"], dtype=np.float32) return { "parents": parents, "parents_pmi": parents, "lbs_compact_joints": np.asarray(override["lbs_compact_joints"], dtype=np.uint16), "lbs_compact_weights": np.asarray(override["lbs_compact_weights"], dtype=np.float32), "lbs_compact_max_inf": int(override.get("lbs_compact_max_inf", 4)), "faces": np.asarray(override["faces"], dtype=np.uint32), "num_joints": int(parents.shape[0]), "num_verts": int(rest_v.shape[0]), "num_expr": 0, "num_shape": 0, "_external": True, } inner = model.model if hasattr(model, "model") else model rig = inner.head_pose.mhr head = inner.head_pose def _np(t: torch.Tensor) -> np.ndarray: return t.cpu().numpy() # `skel_joint_parents` encodes the anatomical hierarchy; pmi-derived order # is BFS-optimized for parallel FK and may include traversal quirks. explicit_parents = _np(rig.skel_joint_parents).astype(np.int32) return { "parents": explicit_parents, # (127,) int32, -1 = root "parents_pmi": _parents_from_pmi(rig), # kept for FK-related uses "joint_translation_offsets": _np(rig.skel_joint_translation_offsets), # (127, 3) cm "joint_prerotations": _np(rig.skel_joint_prerotations), # (127, 4) xyzw "param_transform": _np(rig.param_transform), # (889, 249) "lbs_inverse_bind_pose": _np(rig.lbs_inverse_bind_pose), # (127, 8) "lbs_skin_weights": _np(rig.lbs_skin_weights), # (NNZ,) "lbs_skin_indices": _np(rig.lbs_skin_indices).astype(np.int64), # (NNZ,) "lbs_vert_indices": _np(rig.lbs_vert_indices).astype(np.int64), # (NNZ,) "expr_basis": _np(rig.expr_basis), # (72, 18439, 3) "faces": _np(head.faces).astype(np.uint32), # (36874, 3) "num_joints": int(rig.NUM_JOINTS), "num_verts": int(rig.NUM_VERTS), "num_expr": int(rig.NUM_EXPR), "num_shape": int(rig.NUM_IDENTITY), "_external": False, } def compact_skin_to_n( skin_indices: np.ndarray, vert_indices: np.ndarray, weights: np.ndarray, num_verts: int, max_inf: int = 8, ) -> Tuple[np.ndarray, np.ndarray, int]: """Sparse (joint, vert, weight) triplets -> dense (joints[V, max_inf], weights[V, max_inf]). Keeps `max_inf` largest-magnitude influences, renormalizes. `actual_max` lets the caller skip JOINTS_1/WEIGHTS_1 when nothing exceeds 4 influences.""" joints = np.zeros((num_verts, max_inf), dtype=np.uint16) out_w = np.zeros((num_verts, max_inf), dtype=np.float32) counts = np.zeros(num_verts, dtype=np.int32) if vert_indices.size: # lexsort secondary key first: groups by vert, weights descending within group. order = np.lexsort((-weights, vert_indices)) vi_sorted = vert_indices[order] sk_sorted = skin_indices[order] w_sorted = weights[order] # Per-row rank within its vertex group: 0 at each group start, +1 elsewhere. # group_start[i] is True when vi_sorted[i] starts a new vertex. n = vi_sorted.size group_start = np.empty(n, dtype=bool) group_start[0] = True np.not_equal(vi_sorted[1:], vi_sorted[:-1], out=group_start[1:]) pos = np.arange(n, dtype=np.int64) # Position of each row's group start, broadcast forward. group_start_pos = np.maximum.accumulate(np.where(group_start, pos, 0)) rank = pos - group_start_pos keep = rank < max_inf vk = vi_sorted[keep] rk = rank[keep] joints[vk, rk] = sk_sorted[keep].astype(np.uint16, copy=False) out_w[vk, rk] = w_sorted[keep].astype(np.float32, copy=False) true_counts = np.bincount(vi_sorted, minlength=num_verts) np.minimum(true_counts, max_inf, out=counts, casting="unsafe") sums = out_w.sum(axis=1, keepdims=True) nz = sums.squeeze(-1) > 0 out_w[nz] /= sums[nz] zero_w = ~nz if zero_w.any(): out_w[zero_w, 0] = 1.0 actual_max = int(counts.max()) if counts.size else 0 return joints, out_w, actual_max def zero_pose_rest_verts( model: Any, shape_params: np.ndarray, expr_zero: bool = True, pose_data: Optional[Dict[str, Any]] = None, ) -> np.ndarray: """Rig with zero pose + this subject's shape -> rest verts (V, 3) in rig-native Y-up meters. External-skeleton path returns `rest_verts_m` directly (no PCA shape space to expand).""" override = _get_skeleton_override(pose_data) if override is not None: return np.asarray(override["rest_verts_m"], dtype=np.float32) inner = model.model if hasattr(model, "model") else model head = inner.head_pose rig = head.mhr device = rig.scale_mean.device if hasattr(rig, "scale_mean") else next(rig.parameters()).device dtype = next(rig.parameters()).dtype sp = torch.from_numpy(np.ascontiguousarray(shape_params, dtype=np.float32)).to(device) if sp.ndim == 1: sp = sp.unsqueeze(0) # mhr.forward(identity_coeffs, model_parameters, expr_coeffs): # identity_rest = base_shape + identity_basis @ shape; # cat([model_params, zeros]) through param_transform; expr added. model_params = torch.zeros(1, 204, device=device, dtype=dtype) expr = torch.zeros(1, 72, device=device, dtype=dtype) verts, _ = rig(sp.to(dtype), model_params, expr, apply_correctives=False) # Rig outputs cm; mhr_head divides by 100 for meters. Match that. verts_m = verts[0].cpu().float().numpy() / 100.0 return verts_m.astype(np.float32) def global_skel_state_per_frame( model: Any, mhr_model_params: np.ndarray, ) -> np.ndarray: """Rig FK over a batch of mhr_model_params -> (N, NJ, 8) = (t cm, q xyzw, scale). Bones are shape- and expression-independent so we pass zeros.""" inner = model.model if hasattr(model, "model") else model rig = inner.head_pose.mhr device = next(rig.parameters()).device dtype = next(rig.parameters()).dtype N = mhr_model_params.shape[0] mp = torch.from_numpy(np.ascontiguousarray(mhr_model_params, dtype=np.float32)).to(device=device, dtype=dtype) sp = torch.zeros(N, rig.NUM_IDENTITY, device=device, dtype=dtype) expr = torch.zeros(N, rig.NUM_EXPR, device=device, dtype=dtype) _, skel_state = rig(sp, mp, expr, apply_correctives=False) return skel_state.cpu().float().numpy() # (N, NJ, 8) cm def rotmat_to_quat_np(R: np.ndarray) -> np.ndarray: """(..., 3, 3) -> (..., 4) xyzw. Shepperd 1978 branched, largest-component pick for stability. Cross-frame sign-fixing is the caller's job.""" shape = R.shape[:-2] Rf = R.reshape(-1, 3, 3).astype(np.float64) M = Rf.shape[0] q = np.zeros((M, 4), dtype=np.float64) trace = Rf[:, 0, 0] + Rf[:, 1, 1] + Rf[:, 2, 2] m1 = trace > 0 if m1.any(): S = np.sqrt(trace[m1] + 1.0) * 2.0 q[m1, 3] = 0.25 * S q[m1, 0] = (Rf[m1, 2, 1] - Rf[m1, 1, 2]) / S q[m1, 1] = (Rf[m1, 0, 2] - Rf[m1, 2, 0]) / S q[m1, 2] = (Rf[m1, 1, 0] - Rf[m1, 0, 1]) / S rest = ~m1 m2 = rest & (Rf[:, 0, 0] > Rf[:, 1, 1]) & (Rf[:, 0, 0] > Rf[:, 2, 2]) if m2.any(): S = np.sqrt(1.0 + Rf[m2, 0, 0] - Rf[m2, 1, 1] - Rf[m2, 2, 2]) * 2.0 q[m2, 3] = (Rf[m2, 2, 1] - Rf[m2, 1, 2]) / S q[m2, 0] = 0.25 * S q[m2, 1] = (Rf[m2, 0, 1] + Rf[m2, 1, 0]) / S q[m2, 2] = (Rf[m2, 0, 2] + Rf[m2, 2, 0]) / S m3 = rest & ~m2 & (Rf[:, 1, 1] > Rf[:, 2, 2]) if m3.any(): S = np.sqrt(1.0 + Rf[m3, 1, 1] - Rf[m3, 0, 0] - Rf[m3, 2, 2]) * 2.0 q[m3, 3] = (Rf[m3, 0, 2] - Rf[m3, 2, 0]) / S q[m3, 0] = (Rf[m3, 0, 1] + Rf[m3, 1, 0]) / S q[m3, 1] = 0.25 * S q[m3, 2] = (Rf[m3, 1, 2] + Rf[m3, 2, 1]) / S m4 = rest & ~m2 & ~m3 if m4.any(): S = np.sqrt(1.0 + Rf[m4, 2, 2] - Rf[m4, 0, 0] - Rf[m4, 1, 1]) * 2.0 q[m4, 3] = (Rf[m4, 1, 0] - Rf[m4, 0, 1]) / S q[m4, 0] = (Rf[m4, 0, 2] + Rf[m4, 2, 0]) / S q[m4, 1] = (Rf[m4, 1, 2] + Rf[m4, 2, 1]) / S q[m4, 2] = 0.25 * S return q.reshape(shape + (4,)).astype(np.float32) def global_skel_state_from_pose_data( pose_data: Dict[str, Any], frame_indices: List[int], person_k: int, NJ: int, *, joint_coords_y_down: bool = True, ) -> np.ndarray: """Build per-frame skel_state from stored pred_global_rots + pred_joint_coords, bypassing rig.forward. Returns (N, NJ, 8) in METERS, MHR-native frame. pred_global_rots are MHR-native (no y/z flip). For MHR, pred_joint_coords are stored y-down (post-flip), so un-flip when `joint_coords_y_down=True`. External skeletons (Kimodo) store y-up already → pass False. Scale defaults to 1 (rig scale isn't preserved in pose_data; close to 1 for typical body poses).""" frames = pose_data["frames"] N = len(frame_indices) rotmat = np.zeros((N, NJ, 3, 3), dtype=np.float32) coords = np.zeros((N, NJ, 3), dtype=np.float32) for t_idx, t in enumerate(frame_indices): person = frames[t][person_k] rotmat[t_idx] = np.asarray(person["pred_global_rots"], dtype=np.float32)[:NJ] coords[t_idx] = np.asarray(person["pred_joint_coords"], dtype=np.float32)[:NJ] if joint_coords_y_down: coords[..., 1] *= -1.0 coords[..., 2] *= -1.0 quat = rotmat_to_quat_np(rotmat) skel_state = np.zeros((N, NJ, 8), dtype=np.float32) skel_state[..., :3] = coords skel_state[..., 3:7] = quat skel_state[..., 7] = 1.0 return skel_state def bind_skel_state(model: Any, pose_data: Optional[Dict[str, Any]] = None) -> np.ndarray: """Rig FK with all-zero params -> bind-pose global skel state (NJ, 8) in cm. Inverse of `lbs_inverse_bind_pose` modulo precision; used as bones' static TRS so the rest mesh looks correct with no animation playing. External rig: convert override's `bind_global_m` from m → cm to match this contract.""" override = _get_skeleton_override(pose_data) if override is not None: bind_m = np.asarray(override["bind_global_m"], dtype=np.float32).copy() bind_m[:, :3] *= 100.0 return bind_m zero_mp = np.zeros((1, 204), dtype=np.float32) return global_skel_state_per_frame(model, zero_mp)[0] def ibp_from_bind_global(bind_skel_state_m: np.ndarray) -> np.ndarray: """Inverse-bind MAT4 by inverting the rig's bind global (meters). Guarantees IBP[j] = inverse(FK over bind local TRS) — exactly what glTF skinning needs given bones default to the bind local TRS. Returns (NJ, 4, 4) column-major.""" NJ = bind_skel_state_m.shape[0] t = bind_skel_state_m[:, :3].astype(np.float32) q = bind_skel_state_m[:, 3:7].astype(np.float32) s = bind_skel_state_m[:, 7].astype(np.float32) # Forward bind M = T * R * S (uniform scale): [s*R | t; 0 | 1] R = _quat_to_mat3_np(q) M = np.zeros((NJ, 4, 4), dtype=np.float32) M[:, :3, :3] = R * s[:, None, None] M[:, :3, 3] = t M[:, 3, 3] = 1.0 # fp64 4x4 invert per joint for stability, back to fp32. M_inv = np.linalg.inv(M.astype(np.float64)).astype(np.float32) # glTF MAT4 accessor is column-major. return M_inv.transpose(0, 2, 1).astype(np.float32) def _local_trs_per_frame( rig_static: Dict[str, np.ndarray], mhr_model_params: np.ndarray, ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: """Per-frame (local_t[N, 127, 3], local_q[N, 127, 4 xyzw], local_s[N, 127]) in rig-native frame, meters. Mirrors mhr_rig.forward without skinning.""" pt = rig_static["param_transform"] # (889, 249) = (127*7, 204+45) t_off = rig_static["joint_translation_offsets"] # (127, 3) cm q_pre = rig_static["joint_prerotations"] # (127, 4) NJ = rig_static["num_joints"] N = mhr_model_params.shape[0] cat_in = np.zeros((N, pt.shape[1]), dtype=np.float32) cat_in[:, :mhr_model_params.shape[1]] = mhr_model_params.astype(np.float32) # joint_parameters[n, d] = sum_i pt[d, i] * cat_in[n, i] jp = cat_in @ pt.T jp = jp.reshape(N, NJ, 7) local_t_cm = jp[..., :3] + t_off[None] local_q_raw = _euler_xyz_to_quat_np(jp[..., 3:6]) local_q = _quat_multiply_np(q_pre[None], local_q_raw) local_s = np.exp(jp[..., 6] * _LN2) # rig-cm -> glTF-meters return (local_t_cm * 0.01).astype(np.float32), local_q.astype(np.float32), local_s.astype(np.float32) def _ibp_to_mat4(ibp_skel: np.ndarray) -> np.ndarray: """(127, 8) IBP skel-state -> (127, 4, 4) column-major MAT4, t in meters.""" NJ = ibp_skel.shape[0] t = ibp_skel[:, :3] * 0.01 # cm -> m q = ibp_skel[:, 3:7] s = ibp_skel[:, 7] R = _quat_to_mat3_np(q) M = np.zeros((NJ, 4, 4), dtype=np.float32) M[:, :3, :3] = R * s[:, None, None] M[:, :3, 3] = t M[:, 3, 3] = 1.0 return M.transpose(0, 2, 1).astype(np.float32) def uv_sphere_unit(n_lat: int = 9, n_lon: int = 16) -> Tuple[np.ndarray, np.ndarray]: """Unit UV sphere, poles ±Y. `n_lat` kept ODD by default so one ring lands at the equator. Default (9, 16) gives 146 verts / 288 faces — n_lon matches the 16-segment cylinder used by capsule limbs AND the equator ring aligns 1-to-1 with the cylinder end ring, so silhouettes meet flush.""" verts: List[List[float]] = [[0.0, -1.0, 0.0]] # south pole at index 0 for i in range(1, n_lat + 1): lat = -0.5 * np.pi + np.pi * i / (n_lat + 1) y = float(np.sin(lat)) r = float(np.cos(lat)) for k in range(n_lon): phi = 2.0 * np.pi * k / n_lon verts.append([r * float(np.cos(phi)), y, r * float(np.sin(phi))]) north_idx = len(verts) verts.append([0.0, 1.0, 0.0]) faces: List[List[int]] = [] # South cap — winding gives -Y outward normal. south_ring = 1 for k in range(n_lon): a = south_ring + k b = south_ring + (k + 1) % n_lon faces.append([0, a, b]) # Inter-ring quads, outward radial. for i in range(n_lat - 1): rl = 1 + i * n_lon rh = 1 + (i + 1) * n_lon for k in range(n_lon): a = rl + k b = rl + (k + 1) % n_lon c = rh + (k + 1) % n_lon d = rh + k faces.append([a, c, b]) faces.append([a, d, c]) # North cap — winding gives +Y outward normal. rL = 1 + (n_lat - 1) * n_lon for k in range(n_lon): a = rL + k b = rL + (k + 1) % n_lon faces.append([north_idx, b, a]) return (np.asarray(verts, dtype=np.float32), np.asarray(faces, dtype=np.uint32)) def flat_shade_mesh( verts: np.ndarray, faces: np.ndarray, joints: np.ndarray, weights: np.ndarray, ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray]: """Smooth -> flat by duplicating verts per face; each triangle gets 3 unique verts sharing its face normal. Skinning attrs duplicated alongside.""" F = faces.shape[0] new_v = np.zeros((F * 3, 3), dtype=np.float32) new_n = np.zeros((F * 3, 3), dtype=np.float32) new_j = np.zeros((F * 3, 4), dtype=np.uint16) new_w = np.zeros((F * 3, 4), dtype=np.float32) new_f = np.arange(F * 3, dtype=np.uint32).reshape(F, 3) v0 = verts[faces[:, 0]] v1 = verts[faces[:, 1]] v2 = verts[faces[:, 2]] fn = np.cross(v1 - v0, v2 - v0) fn_len = np.linalg.norm(fn, axis=1, keepdims=True) fn = np.where(fn_len > 1e-8, fn / np.maximum(fn_len, 1e-12), np.array([[0.0, 1.0, 0.0]])) for k in range(3): new_v[k::3] = verts[faces[:, k]] new_n[k::3] = fn new_j[k::3] = joints[faces[:, k]] new_w[k::3] = weights[faces[:, k]] return new_v, new_n, new_f, new_j, new_w def smooth_shade_mesh( verts: np.ndarray, faces: np.ndarray, joints: np.ndarray, weights: np.ndarray, ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray]: """Area-weighted per-vertex normals (smooth shading). Geometry, skinning, indexing pass through unchanged so vertex colors stay aligned. Orphan verts get +Y fallback.""" Nv = int(verts.shape[0]) v0 = verts[faces[:, 0]] v1 = verts[faces[:, 1]] v2 = verts[faces[:, 2]] fn = np.cross(v1 - v0, v2 - v0).astype(np.float32) vn = np.zeros((Nv, 3), dtype=np.float32) np.add.at(vn, faces[:, 0], fn) np.add.at(vn, faces[:, 1], fn) np.add.at(vn, faces[:, 2], fn) ln = np.linalg.norm(vn, axis=1, keepdims=True) vn = np.where(ln > 1e-8, vn / np.maximum(ln, 1e-12), np.array([[0.0, 1.0, 0.0]], dtype=np.float32)) return ( verts.astype(np.float32), vn.astype(np.float32), faces.astype(np.uint32), joints, weights, ) def rotation_align(from_vec: np.ndarray, to_vec: np.ndarray) -> np.ndarray: """3x3 rotation mapping unit `from_vec` to unit `to_vec`.""" cos_t = float(np.dot(from_vec, to_vec)) cross = np.cross(from_vec, to_vec) sin_t = float(np.linalg.norm(cross)) if sin_t < 1e-8: if cos_t > 0: return np.eye(3, dtype=np.float32) # Anti-aligned: 180° around any perpendicular. For ≈+Y, use X. return np.diag([1.0, -1.0, -1.0]).astype(np.float32) axis = cross / sin_t K = np.array([ [0.0, -axis[2], axis[1]], [axis[2], 0.0, -axis[0]], [-axis[1], axis[0], 0.0], ], dtype=np.float32) return (np.eye(3, dtype=np.float32) + sin_t * K + (1.0 - cos_t) * (K @ K)).astype(np.float32) def make_lit_material( roughness: float = 0.85, double_sided: bool = False, opacity: float = 1.0, ) -> dict: """Lit PBR material using vertex COLOR_0 multiplicatively. KHR_materials_unlit is intentionally off so viewer lighting reveals surface form. metallic=0 keeps the surface dielectric so vertex colors stay readable. roughness=0.85 suits dense rainbow body meshes; 0.3 matches SCAIL-Pose's glossy rig look. opacity < 1 switches to alpha-blend (e.g. see-through body mesh over bones).""" a = float(max(0.0, min(1.0, opacity))) mat = { "pbrMetallicRoughness": { "baseColorFactor": [1.0, 1.0, 1.0, a], "metallicFactor": 0.0, "roughnessFactor": float(max(0.0, min(1.0, roughness))), }, } if a < 1.0: mat["alphaMode"] = "BLEND" if double_sided: mat["doubleSided"] = True return mat # OpenPose 18-keypoint viz (independent of MHR rig — uses pred_keypoints_3d, # the model's regressed surface keypoints). OPENPOSE_18_NAMES = ( "Nose", "Neck", "RShoulder", "RElbow", "RWrist", "LShoulder", "LElbow", "LWrist", "RHip", "RKnee", "RAnkle", "LHip", "LKnee", "LAnkle", "REye", "LEye", "REar", "LEar", ) # COCO-18 OpenPose -> MHR70. Subset of `MHR70_TO_OPENPOSE` in # comfy/ldm/sam3d/mhr70.py (no toes/heels). OPENPOSE18_TO_MHR70 = np.array([ 0, # 0 Nose 69, # 1 Neck 6, # 2 RShoulder 8, # 3 RElbow 41, # 4 RWrist 5, # 5 LShoulder 7, # 6 LElbow 62, # 7 LWrist 10, # 8 RHip 12, # 9 RKnee 14, # 10 RAnkle 9, # 11 LHip 11, # 12 LKnee 13, # 13 LAnkle 2, # 14 REye 1, # 15 LEye 4, # 16 REar 3, # 17 LEar ], dtype=np.int64) # OpenPose limb pairs + rainbow palette delegate to the canonical DWPose tables # carried by `comfy_extras.pose.keypoint_draw.KeypointDraw` (also used by nodes_sdpose). # `body_limbSeq` is 1-indexed there; we use 0-indexed throughout this module. from comfy_extras.pose.keypoint_draw import KeypointDraw as _KeypointDraw _KD = _KeypointDraw() OPENPOSE_18_PAIRS = tuple((a - 1, b - 1) for a, b in _KD.body_limbSeq) OPENPOSE_RAINBOW_18 = (np.array(_KD.colors, dtype=np.float32) / 255.0) # SCAIL-Pose limb palette (17 limbs in `OPENPOSE_18_PAIRS` order): warm = # right side, cool = left, grey centerline, pink/violet face. Matches # ComfyUI-SCAIL-Pose's `nlf_render.py::ordered_colors_255`. SCAIL_LIMB_COLORS_17 = (np.array([ [255, 0, 0], # 0 Neck → R.Shoulder (Red) [ 0, 255, 255], # 1 Neck → L.Shoulder (Cyan) [255, 85, 0], # 2 R.Shoulder → R.Elbow (Orange) [255, 170, 0], # 3 R.Elbow → R.Wrist (Golden Orange) [ 0, 170, 255], # 4 L.Shoulder → L.Elbow (Sky Blue) [ 0, 85, 255], # 5 L.Elbow → L.Wrist (Medium Blue) [180, 255, 0], # 6 Neck → R.Hip (Yellow-Green) [ 0, 255, 0], # 7 R.Hip → R.Knee (Bright Green) [ 0, 255, 85], # 8 R.Knee → R.Ankle (Light Green-Blue) [ 0, 0, 255], # 9 Neck → L.Hip (Pure Blue) [ 85, 0, 255], # 10 L.Hip → L.Knee (Purple-Blue) [170, 0, 255], # 11 L.Knee → L.Ankle (Medium Purple) [150, 150, 150], # 12 Neck → Nose (Grey) [255, 0, 170], # 13 Nose → R.Eye (Pink-Magenta) [ 50, 0, 255], # 14 R.Eye → R.Ear (Dark Violet) [255, 0, 170], # 15 Nose → L.Eye (Pink-Magenta) [ 50, 0, 255], # 16 L.Eye → L.Ear (Dark Violet) ], dtype=np.float32) / 255.0) def _scail_keypoint_colors_18(limb_pairs: Tuple[Tuple[int, int], ...] = None) -> np.ndarray: """18 keypoint colors derived from 17 SCAIL limb colors. Each kp inherits the first limb where it's the distal endpoint; mid-grey otherwise (only the neck/nose root in OpenPose-18).""" pairs = limb_pairs if limb_pairs is not None else OPENPOSE_18_PAIRS out = np.tile(np.array([0.6, 0.6, 0.6], dtype=np.float32), (18, 1)) for limb_i, (_, b) in enumerate(pairs): if (out[b] == 0.6).all(): out[b] = SCAIL_LIMB_COLORS_17[limb_i] return out SCAIL_KEYPOINT_COLORS_18 = _scail_keypoint_colors_18() # OpenPose hand: 21 kp per hand = wrist + 5 fingers × 4 joints (proximal→distal). # MHR70 stores fingers as (tip, joint1, joint2, joint3=MCP) so we reverse each # 4-tuple. See comfy/ldm/sam3d/mhr70.py. OPENPOSE_HAND21_NAMES = ( "wrist", "thumb1", "thumb2", "thumb3", "thumb4", "index1", "index2", "index3", "index4", "middle1", "middle2", "middle3", "middle4", "ring1", "ring2", "ring3", "ring4", "pinky1", "pinky2", "pinky3", "pinky4", ) OPENPOSE_HAND21_TO_MHR70_R = np.array([ 41, # 0 right_wrist 24, 23, 22, 21, # thumb base→tip 28, 27, 26, 25, # index 32, 31, 30, 29, # middle 36, 35, 34, 33, # ring 40, 39, 38, 37, # pinky ], dtype=np.int64) OPENPOSE_HAND21_TO_MHR70_L = np.array([ 62, # 0 left_wrist 45, 44, 43, 42, # thumb base→tip 49, 48, 47, 46, # index 53, 52, 51, 50, # middle 57, 56, 55, 54, # ring 61, 60, 59, 58, # pinky ], dtype=np.int64) # OpenPose hand limbs: 5 chains × 4 bones, delegated to KeypointDraw.hand_edges. OPENPOSE_HAND_PAIRS = tuple(tuple(e) for e in _KD.hand_edges) # OpenPose hand colors (poseParameters.cpp::HAND_COLORS_RENDER): wrist grey, # then per-finger base→tip gradient red/yellow/green/cyan/magenta. OPENPOSE_HAND_COLORS_21 = (np.array([ [100, 100, 100], [100, 0, 0], [150, 0, 0], [200, 0, 0], [255, 0, 0], [100, 100, 0], [150, 150, 0], [200, 200, 0], [255, 255, 0], [ 0, 100, 50], [ 0, 150, 75], [ 0, 200, 100], [ 0, 255, 125], [ 0, 100, 100], [ 0, 150, 150], [ 0, 200, 200], [ 0, 255, 255], [100, 0, 100], [150, 0, 150], [200, 0, 200], [255, 0, 255], ], dtype=np.float32) / 255.0) # DWPose: solid blue hand dots, rainbow per-finger bones (matches # controlnet_aux/dwpose/util.py::draw_handpose). DWPOSE_HAND_COLORS_21 = np.tile( np.array([[0.0, 0.0, 1.0]], dtype=np.float32), (21, 1) ) # Face landmarks from the MHR rig (option `face_source="rig"`). # MHR has no face bones — face deforms via expr_params morphs — so landmarks # are sourced from `pred_vertices` at fixed vertex IDs picked by NN against # anatomically-plausible target xyz in canonical Y-up. Iterate visually in # Blender and tweak targets if landmarks land off-surface. # (name, target_xyz) in MHR canonical Y-up meters. FACE_LANDMARK_TARGETS: Tuple[Tuple[str, Tuple[float, float, float]], ...] = ( # Brows — 3 per side, outer→inner ("r_brow_outer", (-0.058, 1.690, 0.090)), ("r_brow_mid", (-0.040, 1.695, 0.105)), ("r_brow_inner", (-0.020, 1.692, 0.115)), ("l_brow_inner", (+0.020, 1.692, 0.115)), ("l_brow_mid", (+0.040, 1.695, 0.105)), ("l_brow_outer", (+0.058, 1.690, 0.090)), # Right eye — outer/top/inner/bottom ("r_eye_outer", (-0.058, 1.660, 0.085)), ("r_eye_top", (-0.040, 1.673, 0.090)), ("r_eye_inner", (-0.022, 1.665, 0.092)), ("r_eye_bot", (-0.040, 1.652, 0.090)), # Left eye ("l_eye_outer", (+0.058, 1.660, 0.085)), ("l_eye_top", (+0.040, 1.673, 0.090)), ("l_eye_inner", (+0.022, 1.665, 0.092)), ("l_eye_bot", (+0.040, 1.652, 0.090)), # Nose ("nose_bridge", (0.000, 1.660, 0.110)), ("nose_mid", (0.000, 1.620, 0.125)), ("nose_tip", (0.000, 1.585, 0.135)), ("nostril_r", (-0.014, 1.580, 0.115)), ("nostril_l", (+0.014, 1.580, 0.115)), # Mouth — 4 outer-lip points ("mouth_r_corner", (-0.030, 1.540, 0.105)), ("upper_lip_mid", (+0.000, 1.555, 0.115)), ("mouth_l_corner", (+0.030, 1.540, 0.105)), ("lower_lip_mid", (+0.000, 1.530, 0.110)), # Chin + jaw line — Y raised so NN search lands on chin tip / jaw underside # (above the jaw-neck boundary at y~1.47) instead of throat verts. ("chin", (0.000, 1.498, 0.108)), ("r_jaw_low", (-0.038, 1.512, 0.100)), ("r_jaw_mid", (-0.062, 1.535, 0.080)), ("r_jaw_high", (-0.078, 1.562, 0.060)), ("l_jaw_low", (+0.038, 1.512, 0.100)), ("l_jaw_mid", (+0.062, 1.535, 0.080)), ("l_jaw_high", (+0.078, 1.562, 0.060)), ) # Solid white face landmarks — matches DWPose, reads cleanly against the # rainbow body palette. def _face_landmark_colors() -> np.ndarray: white = np.array([1.0, 1.0, 1.0], dtype=np.float32) return np.tile(white, (len(FACE_LANDMARK_TARGETS), 1)) FACE_LANDMARK_COLORS: np.ndarray = _face_landmark_colors() def select_face_landmark_vert_ids( canonical_positions: np.ndarray, face_mask: Optional[np.ndarray] = None, ) -> np.ndarray: """Pick MHR head vertex IDs for each `FACE_LANDMARK_TARGETS` by NN in canonical positions. Filter: `face_mask` (verts that deform with any of the 72 expression axes) if available — keeps chin/jaw search off the neck. Otherwise a position bbox (less reliable; throat verts sometimes pull chin targets).""" P = np.asarray(canonical_positions, dtype=np.float32).reshape(-1, 3) if face_mask is not None and np.asarray(face_mask).any(): valid = np.where(np.asarray(face_mask).reshape(-1))[0] else: head_mask = (P[:, 1] > 1.47) & (np.abs(P[:, 0]) < 0.11) & (P[:, 2] > 0.04) valid = np.where(head_mask)[0] if valid.size == 0: raise ValueError( "select_face_landmark_vert_ids: no head verts matched the " "canonical filter — check that pose_data.canonical_colors " "holds the MHR rest-pose positions / face_mask." ) P_valid = P[valid] out = np.empty(len(FACE_LANDMARK_TARGETS), dtype=np.int64) for i, (_, xyz) in enumerate(FACE_LANDMARK_TARGETS): target = np.asarray(xyz, dtype=np.float32) d2 = np.sum((P_valid - target) ** 2, axis=1) out[i] = int(valid[int(d2.argmin())]) return out