"""Pure-numpy port of MediaPipe's face_geometry (FACE_LANDMARK_PIPELINE mode) + weighted Procrustes solver. Computes the 4x4 facial transformation matrix. """ from __future__ import annotations import math import numpy as np def _solve_weighted_orthogonal_problem(src: np.ndarray, tgt: np.ndarray, weights: np.ndarray) -> np.ndarray: """Weighted orthogonal Procrustes (similarity). Returns 4x4 M with `target ≈ M @ homogeneous(source)` in the weighted LS sense. fp64 for SVD stability. Port of procrustes_solver.cc.""" sqrt_w = np.sqrt(weights.astype(np.float64)) w_total = float((sqrt_w ** 2).sum()) ws = src.astype(np.float64) * sqrt_w wt = tgt.astype(np.float64) * sqrt_w c_w = (ws @ sqrt_w) / w_total centered = ws - np.outer(c_w, sqrt_w) U, _S, Vt = np.linalg.svd(wt @ centered.T, full_matrices=True) # Disallow reflection: flip the least-significant axis when det(U)·det(V)<0. post, pre = U.copy(), Vt.T.copy() if np.linalg.det(post) * np.linalg.det(pre) < 0: post[:, 2] *= -1.0 R = post @ pre.T denom = float((centered * ws).sum()) if denom < 1e-12: raise ValueError("Procrustes denominator collapsed (degenerate source).") scale = float((R @ centered * wt).sum()) / denom translation = ((wt - scale * (R @ ws)) @ sqrt_w) / w_total M = np.eye(4, dtype=np.float64) M[:3, :3] = scale * R M[:3, 3] = translation return M def _estimate_scale(canonical: np.ndarray, runtime: np.ndarray, weights: np.ndarray) -> float: """scale = ‖first column of M[:3]‖ per geometry_pipeline.cc::EstimateScale.""" return float(np.linalg.norm(_solve_weighted_orthogonal_problem(canonical, runtime, weights)[:3, 0])) def solve_facial_transformation_matrix( landmarks_normalized: np.ndarray, canonical_vertices: np.ndarray, procrustes_indices: np.ndarray, procrustes_weights: np.ndarray, image_width: int, image_height: int, # face_geometry_calculator_options.pbtxt defaults vertical_fov_degrees: float = 63.0, near: float = 1.0, ) -> np.ndarray: """4x4 facial transformation matrix via two-pass scale recovery `landmarks_normalized` is (N, 3) in MediaPipe normalized convention: x, y in [0,1] with TOP-LEFT origin, z in width-scaled units. """ h_near = 2.0 * near * math.tan(0.5 * math.radians(vertical_fov_degrees)) w_near = image_width * h_near / image_height sub = procrustes_indices.astype(np.int64) screen = landmarks_normalized[sub].T.astype(np.float64).copy() canon = canonical_vertices[sub].T.astype(np.float64).copy() weights = procrustes_weights.astype(np.float64) # ProjectXY (TOP_LEFT y-flip, then scale all 3 axes; z uses x-scale). screen[1] = 1.0 - screen[1] screen[0] = screen[0] * w_near - 0.5 * w_near screen[1] = screen[1] * h_near - 0.5 * h_near screen[2] = screen[2] * w_near depth_offset = float(screen[2].mean()) def _unproject(s: np.ndarray, scale: float) -> np.ndarray: s = s.copy() s[2] = (s[2] - depth_offset + near) / scale s[0] *= s[2] / near s[1] *= s[2] / near s[2] *= -1.0 return s first = screen.copy() first[2] *= -1.0 s1 = _estimate_scale(canon, first, weights) # 1st pass: Procrustes on projected XY s2 = _estimate_scale(canon, _unproject(screen, s1), weights) # 2nd pass: rescale z by s1, un-project XY return _solve_weighted_orthogonal_problem(canon, _unproject(screen, s1 * s2), weights).astype(np.float32) def transformation_matrix_from_detection(face_dict: dict, image_width: int, image_height: int, canonical_data: dict) -> np.ndarray: """Adapt a FaceLandmarker face dict to MP's normalized convention and solve. FaceMesh emits (x, y, z) in 192-canonical units; MP's geometry expects z_norm = z_canonical * scale_x / image_width""" lmks_xy, lmks_3d = face_dict["landmarks_xy"], face_dict["landmarks_3d"] aug = np.concatenate([lmks_3d[:, :2].astype(np.float64), np.ones((lmks_xy.shape[0], 1))], axis=1) M, *_ = np.linalg.lstsq(aug, lmks_xy.astype(np.float64), rcond=None) scale_x = float(np.linalg.norm(M[0])) z_scale = scale_x / image_width if scale_x > 1e-6 else 1.0 / image_width normalized = np.empty((lmks_xy.shape[0], 3), dtype=np.float32) normalized[:, 0] = lmks_xy[:, 0] / image_width normalized[:, 1] = lmks_xy[:, 1] / image_height normalized[:, 2] = lmks_3d[:, 2] * z_scale return solve_facial_transformation_matrix( normalized, canonical_data["canonical_vertices"], canonical_data["procrustes_indices"], canonical_data["procrustes_weights"], image_width=image_width, image_height=image_height, )