mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2026-05-24 07:57:29 +08:00
* Initial mediapipe face detection support * Update face_geometry.py * Account for diff sized batch input * Model folder placeholder
112 lines
4.6 KiB
Python
112 lines
4.6 KiB
Python
"""Pure-numpy port of MediaPipe's face_geometry (FACE_LANDMARK_PIPELINE mode)
|
|
+ weighted Procrustes solver. Computes the 4x4 facial transformation matrix.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import math
|
|
import numpy as np
|
|
|
|
|
|
def _solve_weighted_orthogonal_problem(src: np.ndarray, tgt: np.ndarray, weights: np.ndarray) -> np.ndarray:
|
|
"""Weighted orthogonal Procrustes (similarity). Returns 4x4 M with
|
|
`target ≈ M @ homogeneous(source)` in the weighted LS sense. fp64 for
|
|
SVD stability. Port of procrustes_solver.cc."""
|
|
sqrt_w = np.sqrt(weights.astype(np.float64))
|
|
w_total = float((sqrt_w ** 2).sum())
|
|
ws = src.astype(np.float64) * sqrt_w
|
|
wt = tgt.astype(np.float64) * sqrt_w
|
|
|
|
c_w = (ws @ sqrt_w) / w_total
|
|
centered = ws - np.outer(c_w, sqrt_w)
|
|
U, _S, Vt = np.linalg.svd(wt @ centered.T, full_matrices=True)
|
|
# Disallow reflection: flip the least-significant axis when det(U)·det(V)<0.
|
|
post, pre = U.copy(), Vt.T.copy()
|
|
if np.linalg.det(post) * np.linalg.det(pre) < 0:
|
|
post[:, 2] *= -1.0
|
|
R = post @ pre.T
|
|
|
|
denom = float((centered * ws).sum())
|
|
if denom < 1e-12:
|
|
raise ValueError("Procrustes denominator collapsed (degenerate source).")
|
|
scale = float((R @ centered * wt).sum()) / denom
|
|
translation = ((wt - scale * (R @ ws)) @ sqrt_w) / w_total
|
|
|
|
M = np.eye(4, dtype=np.float64)
|
|
M[:3, :3] = scale * R
|
|
M[:3, 3] = translation
|
|
return M
|
|
|
|
|
|
def _estimate_scale(canonical: np.ndarray, runtime: np.ndarray, weights: np.ndarray) -> float:
|
|
"""scale = ‖first column of M[:3]‖ per geometry_pipeline.cc::EstimateScale."""
|
|
return float(np.linalg.norm(_solve_weighted_orthogonal_problem(canonical, runtime, weights)[:3, 0]))
|
|
|
|
|
|
def solve_facial_transformation_matrix(
|
|
landmarks_normalized: np.ndarray,
|
|
canonical_vertices: np.ndarray,
|
|
procrustes_indices: np.ndarray,
|
|
procrustes_weights: np.ndarray,
|
|
image_width: int,
|
|
image_height: int,
|
|
# face_geometry_calculator_options.pbtxt defaults
|
|
vertical_fov_degrees: float = 63.0,
|
|
near: float = 1.0,
|
|
) -> np.ndarray:
|
|
"""4x4 facial transformation matrix via two-pass scale recovery
|
|
`landmarks_normalized` is (N, 3) in MediaPipe normalized convention: x, y
|
|
in [0,1] with TOP-LEFT origin, z in width-scaled units.
|
|
"""
|
|
|
|
h_near = 2.0 * near * math.tan(0.5 * math.radians(vertical_fov_degrees))
|
|
w_near = image_width * h_near / image_height
|
|
|
|
sub = procrustes_indices.astype(np.int64)
|
|
screen = landmarks_normalized[sub].T.astype(np.float64).copy()
|
|
canon = canonical_vertices[sub].T.astype(np.float64).copy()
|
|
weights = procrustes_weights.astype(np.float64)
|
|
|
|
# ProjectXY (TOP_LEFT y-flip, then scale all 3 axes; z uses x-scale).
|
|
screen[1] = 1.0 - screen[1]
|
|
screen[0] = screen[0] * w_near - 0.5 * w_near
|
|
screen[1] = screen[1] * h_near - 0.5 * h_near
|
|
screen[2] = screen[2] * w_near
|
|
depth_offset = float(screen[2].mean())
|
|
|
|
def _unproject(s: np.ndarray, scale: float) -> np.ndarray:
|
|
s = s.copy()
|
|
s[2] = (s[2] - depth_offset + near) / scale
|
|
s[0] *= s[2] / near
|
|
s[1] *= s[2] / near
|
|
s[2] *= -1.0
|
|
return s
|
|
|
|
first = screen.copy()
|
|
first[2] *= -1.0
|
|
s1 = _estimate_scale(canon, first, weights) # 1st pass: Procrustes on projected XY
|
|
s2 = _estimate_scale(canon, _unproject(screen, s1), weights) # 2nd pass: rescale z by s1, un-project XY
|
|
return _solve_weighted_orthogonal_problem(canon, _unproject(screen, s1 * s2), weights).astype(np.float32)
|
|
|
|
|
|
def transformation_matrix_from_detection(face_dict: dict, image_width: int, image_height: int, canonical_data: dict) -> np.ndarray:
|
|
"""Adapt a FaceLandmarker face dict to MP's normalized convention and solve.
|
|
FaceMesh emits (x, y, z) in 192-canonical units; MP's geometry expects
|
|
z_norm = z_canonical * scale_x / image_width"""
|
|
|
|
lmks_xy, lmks_3d = face_dict["landmarks_xy"], face_dict["landmarks_3d"]
|
|
aug = np.concatenate([lmks_3d[:, :2].astype(np.float64), np.ones((lmks_xy.shape[0], 1))], axis=1)
|
|
M, *_ = np.linalg.lstsq(aug, lmks_xy.astype(np.float64), rcond=None)
|
|
scale_x = float(np.linalg.norm(M[0]))
|
|
z_scale = scale_x / image_width if scale_x > 1e-6 else 1.0 / image_width
|
|
|
|
normalized = np.empty((lmks_xy.shape[0], 3), dtype=np.float32)
|
|
normalized[:, 0] = lmks_xy[:, 0] / image_width
|
|
normalized[:, 1] = lmks_xy[:, 1] / image_height
|
|
normalized[:, 2] = lmks_3d[:, 2] * z_scale
|
|
return solve_facial_transformation_matrix(
|
|
normalized, canonical_data["canonical_vertices"],
|
|
canonical_data["procrustes_indices"], canonical_data["procrustes_weights"],
|
|
image_width=image_width, image_height=image_height,
|
|
)
|