mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2026-07-03 21:20:49 +08:00
Align to Kimodo
This commit is contained in:
parent
ebaf0a71fa
commit
d754977178
@ -34,6 +34,7 @@ from comfy_extras.sam3d_body.utils import image_to_uint8
|
||||
|
||||
SAM3TrackData = io.Custom("SAM3_TRACK_DATA")
|
||||
MHRPoseData = io.Custom("MHR_POSE_DATA") # mhr_model_params, shape_params, expr_params, MHR70 keypoint layout, canonical_colors keyed to MHR mesh, hand_vert_mask from MHR LBS).
|
||||
KimodoPoseData = io.Custom("KIMODO_POSE_DATA") # external Y-up rig (ComfyUI-Kimodo); carries per-frame pred_vertices/pred_cam_t/canonical_colors so the mesh rasterizer is rig-agnostic.
|
||||
SAM3DBodyModel = io.Custom("SAM3D_BODY_MODEL")
|
||||
|
||||
# Loader
|
||||
@ -827,10 +828,18 @@ class SAM3DBody_Render(io.ComfyNode):
|
||||
def define_schema(cls):
|
||||
return io.Schema(
|
||||
node_id="SAM3DBody_Render",
|
||||
display_name="Render SAM3D Body",
|
||||
display_name="Render 3D Body Pose",
|
||||
search_aliases=["Render SAM3D Body", "sam3d render", "kimodo render"],
|
||||
category="image/detection",
|
||||
inputs=[
|
||||
MHRPoseData.Input("mhr_pose_data"),
|
||||
io.MultiType.Input(
|
||||
"pose_data", types=[MHRPoseData, KimodoPoseData],
|
||||
tooltip=(
|
||||
"MHR pose data, or external Y-up rig pose data (KimodoSample). "
|
||||
"All render styles work for external rigs that carry OpenPose "
|
||||
"joint maps in their _skeleton_override (KimodoSample does)."
|
||||
),
|
||||
),
|
||||
io.Image.Input(
|
||||
"background",
|
||||
optional=True,
|
||||
@ -882,11 +891,11 @@ class SAM3DBody_Render(io.ComfyNode):
|
||||
|
||||
|
||||
@classmethod
|
||||
def execute(cls, mhr_pose_data, background=None, width=0, height=0, camera_info=None, render_style=None) -> io.NodeOutput:
|
||||
def execute(cls, pose_data, background=None, width=0, height=0, camera_info=None, render_style=None) -> io.NodeOutput:
|
||||
render_style = render_style or {"render_style": "mesh"}
|
||||
mode_key = render_style.get("render_style", "mesh")
|
||||
|
||||
native_H, native_W = mhr_pose_data["image_size"]
|
||||
native_H, native_W = pose_data["image_size"]
|
||||
new_W, new_H = int(width), int(height)
|
||||
if new_W == 0 and new_H == 0:
|
||||
H, W = native_H, native_W
|
||||
@ -896,14 +905,14 @@ class SAM3DBody_Render(io.ComfyNode):
|
||||
new_W = max(1, round(native_W * new_H / native_H))
|
||||
elif new_H == 0:
|
||||
new_H = max(1, round(native_H * new_W / native_W))
|
||||
mhr_pose_data = _scale_pose_data(mhr_pose_data, new_H, new_W)
|
||||
pose_data = _scale_pose_data(pose_data, new_H, new_W)
|
||||
H, W = new_H, new_W
|
||||
px_scale = min(new_W / native_W, new_H / native_H)
|
||||
|
||||
if camera_info is not None:
|
||||
mhr_pose_data = apply_camera_override(mhr_pose_data, camera_info, H, W)
|
||||
pose_data = apply_camera_override(pose_data, camera_info, H, W)
|
||||
|
||||
B = len(mhr_pose_data["frames"])
|
||||
B = len(pose_data["frames"])
|
||||
if B == 0:
|
||||
return io.NodeOutput(torch.zeros(1, H, W, 3, dtype=torch.float32))
|
||||
|
||||
@ -951,11 +960,11 @@ class SAM3DBody_Render(io.ComfyNode):
|
||||
region = str(render_style.get("region", "full_body"))
|
||||
|
||||
if region == "hands_only":
|
||||
hand_mask = mhr_pose_data["hand_vert_mask"]
|
||||
faces_full = np.asarray(mhr_pose_data["faces"])
|
||||
hand_mask = pose_data["hand_vert_mask"]
|
||||
faces_full = np.asarray(pose_data["faces"])
|
||||
keep = hand_mask[faces_full].all(axis=1)
|
||||
mhr_pose_data = dict(mhr_pose_data)
|
||||
mhr_pose_data["faces"] = np.ascontiguousarray(
|
||||
pose_data = dict(pose_data)
|
||||
pose_data["faces"] = np.ascontiguousarray(
|
||||
faces_full[keep], dtype=faces_full.dtype,
|
||||
)
|
||||
else: # silhouette — no shader/opacity controls, mask is binary
|
||||
@ -980,7 +989,7 @@ class SAM3DBody_Render(io.ComfyNode):
|
||||
bg_f = bg_t[min(f, bg_t.shape[0] - 1)]
|
||||
if mode_key == "openpose_2d":
|
||||
img = render_pose_data_openpose(
|
||||
mhr_pose_data, frame_idx=f, W=W, H=H,
|
||||
pose_data, frame_idx=f, W=W, H=H,
|
||||
background=bg_f,
|
||||
composite=composite,
|
||||
marker_radius_px=marker_radius_px,
|
||||
@ -993,7 +1002,7 @@ class SAM3DBody_Render(io.ComfyNode):
|
||||
)
|
||||
elif mode_key == "openpose_3d":
|
||||
img = render_pose_data_capsules(
|
||||
mhr_pose_data, frame_idx=f, W=W, H=H,
|
||||
pose_data, frame_idx=f, W=W, H=H,
|
||||
background=bg_f,
|
||||
composite=composite,
|
||||
radius_m=op3d_radius_m,
|
||||
@ -1005,7 +1014,7 @@ class SAM3DBody_Render(io.ComfyNode):
|
||||
elif mode_key == "scail":
|
||||
# SCAIL renders body as 3D capsules + 2D openpose hands on top
|
||||
img = render_pose_data_capsules(
|
||||
mhr_pose_data, frame_idx=f, W=W, H=H,
|
||||
pose_data, frame_idx=f, W=W, H=H,
|
||||
background=bg_f,
|
||||
composite=composite,
|
||||
radius_m=cap_radius_m,
|
||||
@ -1017,7 +1026,7 @@ class SAM3DBody_Render(io.ComfyNode):
|
||||
scail_overlay_px = max(1, int(round(4 * px_scale)))
|
||||
scail_face_px = max(1, int(round(1 * px_scale)))
|
||||
img = render_pose_data_openpose(
|
||||
mhr_pose_data, frame_idx=f, W=W, H=H,
|
||||
pose_data, frame_idx=f, W=W, H=H,
|
||||
background=img,
|
||||
composite="over",
|
||||
include_body=False,
|
||||
@ -1031,7 +1040,7 @@ class SAM3DBody_Render(io.ComfyNode):
|
||||
)
|
||||
else:
|
||||
img = render_pose_data(
|
||||
mhr_pose_data, frame_idx=f, W=W, H=H,
|
||||
pose_data, frame_idx=f, W=W, H=H,
|
||||
background=bg_f, composite=composite, opacity=opacity,
|
||||
shader_preset=shader_key,
|
||||
rainbow_tilt_x_deg=rainbow_tilt_x,
|
||||
|
||||
@ -684,8 +684,13 @@ class BuildPoseFile(IO.ComfyNode):
|
||||
fmt = format.get("format", "glb")
|
||||
|
||||
if fmt == "bvh":
|
||||
if sam3d_body_model is None:
|
||||
raise ValueError("Create 3D Animation: 'bvh' format needs the `sam3d_body_model` input.")
|
||||
# External rigs (e.g. Kimodo) supply pose_data["_skeleton_override"]
|
||||
has_external_rig = isinstance(pose_data, dict) and ("_skeleton_override" in pose_data)
|
||||
if sam3d_body_model is None and not has_external_rig:
|
||||
raise ValueError(
|
||||
"Create 3D Animation: 'bvh' format needs the `sam3d_body_model` input OR a "
|
||||
"`_skeleton_override` dict in pose_data (e.g. from KimodoSample)."
|
||||
)
|
||||
# BVH carries one skeleton; -1 (all tracks) collapses to the first.
|
||||
ti = int(track_index)
|
||||
if ti < 0:
|
||||
|
||||
@ -16,10 +16,9 @@ from typing import Any, Dict, List
|
||||
import numpy as np
|
||||
|
||||
from .glb_shared import (
|
||||
bind_skel_state,
|
||||
Rig,
|
||||
bone_locals_from_globals,
|
||||
collect_tracks,
|
||||
extract_rig_static,
|
||||
global_skel_state_from_pose_data,
|
||||
quat_sign_fix_per_joint,
|
||||
unflip,
|
||||
@ -49,9 +48,14 @@ def _quat_to_zxy_euler_deg(quat: np.ndarray) -> np.ndarray:
|
||||
return out.astype(np.float32)
|
||||
|
||||
|
||||
def _find_bvh_root(parents: np.ndarray) -> int:
|
||||
def _find_bvh_root(parents: np.ndarray, is_external: bool = False) -> int:
|
||||
"""First child of the rig's world anchor so the static origin→body stick
|
||||
bone gets left out. Falls back to the first root joint."""
|
||||
bone gets left out. Falls back to the first root joint.
|
||||
|
||||
MHR's joint 0 is a static world anchor whose single child is the pelvis, so
|
||||
skipping it is correct. External rigs (e.g. SOMA-77) whose root is already
|
||||
the articulated body root with multiple child chains must keep the root —
|
||||
descending into one child would drop the sibling limbs from the BVH."""
|
||||
NJ = parents.shape[0]
|
||||
world_anchors = [j for j in range(NJ)
|
||||
if not (0 <= int(parents[j]) < NJ and int(parents[j]) != j)]
|
||||
@ -64,6 +68,8 @@ def _find_bvh_root(parents: np.ndarray) -> int:
|
||||
children[p].append(j)
|
||||
wa = world_anchors[0]
|
||||
if children[wa]:
|
||||
if is_external and len(children[wa]) > 1:
|
||||
return wa
|
||||
return children[wa][0]
|
||||
return wa
|
||||
|
||||
@ -80,7 +86,7 @@ def _build_children_map(parents: np.ndarray) -> List[List[int]]:
|
||||
|
||||
def build_bvh(
|
||||
pose_data: Dict[str, Any],
|
||||
model: Any,
|
||||
model: Any = None,
|
||||
*,
|
||||
fps: float = 24.0,
|
||||
camera_translation: str = "off",
|
||||
@ -89,6 +95,10 @@ def build_bvh(
|
||||
) -> bytes:
|
||||
"""Build a BVH file from pose_data. Returns UTF-8 encoded text bytes.
|
||||
|
||||
`model` may be None when pose_data carries a `_skeleton_override` (external
|
||||
rigs, e.g. Kimodo); the rig hierarchy/offsets/bind are read from the
|
||||
override instead of the MHR model.
|
||||
|
||||
`units` is "cm" (default, standard mocap convention) or "m". Affects the
|
||||
OFFSET and root-position values; rotations are independent of units.
|
||||
"""
|
||||
@ -96,9 +106,10 @@ def build_bvh(
|
||||
raise ValueError(f"build_bvh: units must be 'cm' or 'm', got {units!r}")
|
||||
unit_scale = 100.0 if units == "cm" else 1.0
|
||||
|
||||
rig_static = extract_rig_static(model)
|
||||
NJ = int(rig_static["num_joints"])
|
||||
parents = rig_static["parents"]
|
||||
rig = Rig.from_pose_data(pose_data, model)
|
||||
is_external = not rig.can_rerun_fk
|
||||
NJ = rig.num_joints
|
||||
parents = rig.parents
|
||||
frames = pose_data["frames"]
|
||||
|
||||
tracks = collect_tracks(pose_data, track_index)
|
||||
@ -109,16 +120,16 @@ def build_bvh(
|
||||
if n_frames == 0:
|
||||
raise ValueError("build_bvh: track has zero frames")
|
||||
|
||||
body_root = _find_bvh_root(parents)
|
||||
body_root = _find_bvh_root(parents, is_external)
|
||||
children_map = _build_children_map(parents)
|
||||
|
||||
# Bone OFFSETs come from MHR's translation_offsets (joint position
|
||||
# relative to parent in parent's local-bind frame). For the BVH root,
|
||||
# we use its bind world position so the skeleton sits at the right
|
||||
# spot when imported.
|
||||
bind_global = bind_skel_state(model) # (NJ, 8) cm
|
||||
bind_global = rig.bind_global_cm # (NJ, 8) cm
|
||||
bind_pos_m = bind_global[:, :3].astype(np.float64) * 0.01 # (NJ, 3) m
|
||||
offset_m = rig_static["joint_translation_offsets"].astype(np.float64) * 0.01
|
||||
offset_m = rig.joint_offsets_cm.astype(np.float64) * 0.01
|
||||
|
||||
# DFS order rooted at body_root — matches per-frame channel order.
|
||||
bvh_order: List[int] = []
|
||||
@ -133,6 +144,7 @@ def build_bvh(
|
||||
# treated as the hierarchy root in BVH-space.
|
||||
rig_global_m = global_skel_state_from_pose_data(
|
||||
pose_data, frame_indices, person_k, NJ,
|
||||
joint_coords_y_down=rig.per_frame_y_down,
|
||||
)
|
||||
rig_global_m[..., 3:7] = quat_sign_fix_per_joint(rig_global_m[..., 3:7])
|
||||
bvh_parents = parents.copy()
|
||||
|
||||
@ -18,13 +18,11 @@ import comfy.model_management
|
||||
|
||||
from .glb_shared import (
|
||||
OPENPOSE_18_PAIRS,
|
||||
OPENPOSE18_TO_MHR70,
|
||||
OPENPOSE_RAINBOW_18,
|
||||
SCAIL_LIMB_COLORS_17,
|
||||
OPENPOSE_HAND_PAIRS,
|
||||
OPENPOSE_HAND21_TO_MHR70_R,
|
||||
OPENPOSE_HAND21_TO_MHR70_L,
|
||||
OPENPOSE_HAND_COLORS_21,
|
||||
openpose_render_keypoints,
|
||||
)
|
||||
|
||||
|
||||
@ -37,6 +35,7 @@ def _limb_palette_rgb01(palette: str) -> np.ndarray:
|
||||
|
||||
def _build_specs_from_pose(
|
||||
persons: List[Dict[str, Any]],
|
||||
pose_data: Dict[str, Any],
|
||||
*,
|
||||
include_hands: bool,
|
||||
palette: str,
|
||||
@ -61,17 +60,14 @@ def _build_specs_from_pose(
|
||||
falloff = max(0.0, min(1.0, float(person_brightness_falloff)))
|
||||
|
||||
for k, person in enumerate(persons):
|
||||
kp2d_full = person.get("pred_keypoints_3d")
|
||||
cam_t = person.get("pred_cam_t")
|
||||
if kp2d_full is None or cam_t is None:
|
||||
continue
|
||||
kp = np.asarray(kp2d_full, dtype=np.float32)
|
||||
if kp.ndim != 2 or kp.shape[1] != 3 or kp.shape[0] < 70:
|
||||
body_op = openpose_render_keypoints(person, pose_data, "body", dim=3)
|
||||
if body_op is None or cam_t is None:
|
||||
continue
|
||||
cam_t_np = np.asarray(cam_t, dtype=np.float32).reshape(3)
|
||||
# pred_keypoints_3d is camera frame (Y-down post-flip); add cam_t to
|
||||
# place the subject in front of the camera.
|
||||
kp_cam = kp + cam_t_np[None, :]
|
||||
# op-keypoints are camera frame (Y-down); add cam_t to place the
|
||||
# subject in front of the camera.
|
||||
body_kp = body_op + cam_t_np[None, :]
|
||||
|
||||
pastel = 0.0 if k == 0 else (1.0 - falloff ** k)
|
||||
|
||||
@ -83,7 +79,6 @@ def _build_specs_from_pose(
|
||||
# SCAIL skips face bones (13..16) and redirects limb 12 into a short
|
||||
# head stub blending spine + neck→nose direction.
|
||||
body_limb_count = 13 if palette == "scail" else len(OPENPOSE_18_PAIRS)
|
||||
body_kp = kp_cam[OPENPOSE18_TO_MHR70]
|
||||
spine_dir = None
|
||||
if palette == "scail":
|
||||
mid_hip = 0.5 * (body_kp[8] + body_kp[11]) # 8=RHip, 11=LHip
|
||||
@ -117,10 +112,11 @@ def _build_specs_from_pose(
|
||||
dtype=np.float32))
|
||||
|
||||
if include_hands:
|
||||
r_kp = kp_cam[OPENPOSE_HAND21_TO_MHR70_R]
|
||||
l_kp = kp_cam[OPENPOSE_HAND21_TO_MHR70_L]
|
||||
hand_ops = [openpose_render_keypoints(person, pose_data, p, dim=3)
|
||||
for p in ("hand_r", "hand_l")]
|
||||
hand_kps = [h + cam_t_np[None, :] for h in hand_ops if h is not None]
|
||||
for limb_i, (a, b) in enumerate(OPENPOSE_HAND_PAIRS):
|
||||
for hand_kp in (r_kp, l_kp):
|
||||
for hand_kp in hand_kps:
|
||||
sa, sb = hand_kp[a], hand_kp[b]
|
||||
if not (np.all(np.isfinite(sa)) and np.all(np.isfinite(sb))):
|
||||
continue
|
||||
@ -380,7 +376,7 @@ def render_pose_data_capsules(
|
||||
cx, cy = W * 0.5, H * 0.5
|
||||
|
||||
starts_np, ends_np, colors_np, is_hand_np = _build_specs_from_pose(
|
||||
persons, include_hands=include_hands, palette=palette,
|
||||
persons, pose_data, include_hands=include_hands, palette=palette,
|
||||
person_brightness_falloff=person_brightness_falloff,
|
||||
)
|
||||
|
||||
|
||||
@ -40,6 +40,7 @@ from .glb_shared import (
|
||||
gaussian_smooth_positions,
|
||||
make_lit_material,
|
||||
quat_sign_fix_per_joint,
|
||||
resolve_openpose_keypoints_from_joints,
|
||||
rotation_align,
|
||||
rotmat_to_quat_np,
|
||||
select_face_landmark_vert_ids,
|
||||
@ -109,7 +110,7 @@ def _openpose_bind_at_rig_rest(
|
||||
rest_pos = np.asarray(override["bind_global_m"], dtype=np.float32)[:, :3]
|
||||
op18_w = override.get("openpose18_joint_weights")
|
||||
parts: List[np.ndarray] = [
|
||||
_resolve_openpose_keypoints_from_joints(
|
||||
resolve_openpose_keypoints_from_joints(
|
||||
rest_pos, np.asarray(op18, dtype=np.int64),
|
||||
weights=None if op18_w is None else np.asarray(op18_w, dtype=np.float32),
|
||||
)
|
||||
@ -121,11 +122,11 @@ def _openpose_bind_at_rig_rest(
|
||||
return None
|
||||
op21_r_w = override.get("openpose_hand21_r_joint_weights")
|
||||
op21_l_w = override.get("openpose_hand21_l_joint_weights")
|
||||
parts.append(_resolve_openpose_keypoints_from_joints(
|
||||
parts.append(resolve_openpose_keypoints_from_joints(
|
||||
rest_pos, np.asarray(op21_r, dtype=np.int64),
|
||||
weights=None if op21_r_w is None else np.asarray(op21_r_w, dtype=np.float32),
|
||||
))
|
||||
parts.append(_resolve_openpose_keypoints_from_joints(
|
||||
parts.append(resolve_openpose_keypoints_from_joints(
|
||||
rest_pos, np.asarray(op21_l, dtype=np.int64),
|
||||
weights=None if op21_l_w is None else np.asarray(op21_l_w, dtype=np.float32),
|
||||
))
|
||||
@ -137,39 +138,6 @@ def _openpose_bind_at_rig_rest(
|
||||
return np.concatenate(parts, axis=0).astype(np.float32)
|
||||
|
||||
|
||||
def _resolve_openpose_keypoints_from_joints(
|
||||
joints: np.ndarray, mapping: np.ndarray,
|
||||
weights: Optional[np.ndarray] = None,
|
||||
) -> np.ndarray:
|
||||
"""Resolve a `(K, 2)` joint-index → keypoint mapping against a per-frame
|
||||
`(J, 3)` joint-position array.
|
||||
|
||||
Row `(a, b)` with `b == -1` uses `joints[a]` directly (any weight ignored).
|
||||
Row `(a, b)` with `b >= 0` returns `w * joints[a] + (1 - w) * joints[b]`:
|
||||
- default (weights=None): `w = 0.5` → plain midpoint, useful for
|
||||
keypoints that genuinely lie between two joints (Nose ≈ midpoint of
|
||||
eyes).
|
||||
- explicit `w` outside [0, 1] EXTRAPOLATES past the line segment, which
|
||||
is how we approximate landmarks that have no rig joint AND no
|
||||
in-between joint pair (Ears ≈ RightEye + (RightEye − LeftEye), i.e.
|
||||
`w_a = 2.0` along the eye→ear axis)."""
|
||||
a = mapping[:, 0].astype(np.int64)
|
||||
b = mapping[:, 1].astype(np.int64)
|
||||
pos_a = joints[a]
|
||||
has_b = b >= 0
|
||||
if not has_b.any():
|
||||
return pos_a.astype(np.float32, copy=False)
|
||||
b_safe = np.where(has_b, b, a)
|
||||
pos_b = joints[b_safe]
|
||||
if weights is None:
|
||||
w_a = np.where(has_b, 0.5, 1.0).astype(np.float32)
|
||||
else:
|
||||
w_a = np.where(has_b, np.asarray(weights, dtype=np.float32), 1.0)
|
||||
w_b = (1.0 - w_a).astype(np.float32)
|
||||
out = pos_a * w_a[:, None] + pos_b * w_b[:, None]
|
||||
return out.astype(np.float32, copy=False)
|
||||
|
||||
|
||||
def _extract_openpose_keypoints(
|
||||
pose_data: Dict[str, Any], frame_indices: List[int], person_k: int,
|
||||
) -> np.ndarray:
|
||||
@ -219,7 +187,7 @@ def _extract_openpose_keypoints(
|
||||
f"missing at frame={t}, track={person_k}."
|
||||
)
|
||||
joints = np.asarray(person["pred_joint_coords"], dtype=np.float32)
|
||||
out[t_idx] = _resolve_openpose_keypoints_from_joints(
|
||||
out[t_idx] = resolve_openpose_keypoints_from_joints(
|
||||
joints, op18, weights=op18_w,
|
||||
)
|
||||
return out
|
||||
@ -306,10 +274,10 @@ def _extract_openpose_hand_keypoints(
|
||||
"per-frame `pred_joint_coords` for hands."
|
||||
)
|
||||
joints = np.asarray(person["pred_joint_coords"], dtype=np.float32)
|
||||
out[t_idx, :21] = _resolve_openpose_keypoints_from_joints(
|
||||
out[t_idx, :21] = resolve_openpose_keypoints_from_joints(
|
||||
joints, op21_r, weights=op21_r_w,
|
||||
)
|
||||
out[t_idx, 21:] = _resolve_openpose_keypoints_from_joints(
|
||||
out[t_idx, 21:] = resolve_openpose_keypoints_from_joints(
|
||||
joints, op21_l, weights=op21_l_w,
|
||||
)
|
||||
return out
|
||||
|
||||
@ -16,6 +16,7 @@ from __future__ import annotations
|
||||
|
||||
import json
|
||||
import struct
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
import numpy as np
|
||||
@ -505,9 +506,15 @@ def extract_rig_static(model: Any, pose_data: Optional[Dict[str, Any]] = None) -
|
||||
# so we don't need MHR's PCA pose / expression bases.
|
||||
parents = np.asarray(override["parents"], dtype=np.int32)
|
||||
rest_v = np.asarray(override["rest_verts_m"], dtype=np.float32)
|
||||
# BVH needs parent-relative bone OFFSETs (cm). MHR ships these directly;
|
||||
# external rigs only give bind globals, so derive locals from them.
|
||||
bind_global_m = np.asarray(override["bind_global_m"], dtype=np.float32)
|
||||
local_bind = bone_locals_from_globals(bind_global_m[None], parents)[0]
|
||||
joint_translation_offsets = (local_bind[:, :3] * 100.0).astype(np.float32)
|
||||
return {
|
||||
"parents": parents,
|
||||
"parents_pmi": parents,
|
||||
"joint_translation_offsets": joint_translation_offsets, # (NJ, 3) cm
|
||||
"lbs_compact_joints": np.asarray(override["lbs_compact_joints"], dtype=np.uint16),
|
||||
"lbs_compact_weights": np.asarray(override["lbs_compact_weights"], dtype=np.float32),
|
||||
"lbs_compact_max_inf": int(override.get("lbs_compact_max_inf", 4)),
|
||||
@ -737,6 +744,77 @@ def bind_skel_state(model: Any, pose_data: Optional[Dict[str, Any]] = None) -> n
|
||||
return global_skel_state_per_frame(model, zero_mp)[0]
|
||||
|
||||
|
||||
@dataclass
|
||||
class Rig:
|
||||
"""Normalized static rig for the GLB/BVH exporters, independent of where it
|
||||
came from: an MHR model (`Rig.from_pose_data(pose_data, model)`) or an inline
|
||||
`pose_data["_skeleton_override"]` (external rigs, e.g. ComfyUI-Kimodo).
|
||||
|
||||
Consumers read these fields and never branch on the source. The only
|
||||
source-dependent operation is `rest_verts_m` — MHR rest verts depend on the
|
||||
subject's `shape_params`; external rigs ship fixed rest verts.
|
||||
"""
|
||||
parents: np.ndarray # (NJ,) int32, -1 = root
|
||||
joint_offsets_cm: np.ndarray # (NJ, 3) parent-relative bind offsets, cm
|
||||
bind_global_cm: np.ndarray # (NJ, 8) bind global [t cm | q xyzw | s]
|
||||
lbs_joints: np.ndarray # (V, 8) uint16 — compacted skin influences
|
||||
lbs_weights: np.ndarray # (V, 8) f32
|
||||
lbs_max_inf: int # ≤ 8; lets callers skip JOINTS_1 when ≤ 4
|
||||
faces: np.ndarray # (F, 3) uint32
|
||||
num_joints: int
|
||||
num_verts: int
|
||||
num_expr: int # 0 = no face morphs
|
||||
per_frame_y_down: bool # pred_joint_coords stored y-down (MHR) vs y-up (external)
|
||||
can_rerun_fk: bool # True = per-frame FK re-runnable from mhr_model_params
|
||||
expr_basis: Optional[np.ndarray] = None # (E, V, 3) cm — MHR only
|
||||
_model: Any = None
|
||||
_rest_override: Optional[np.ndarray] = None # (V, 3) m — external only
|
||||
|
||||
@property
|
||||
def bind_global_m(self) -> np.ndarray:
|
||||
b = self.bind_global_cm.astype(np.float32).copy()
|
||||
b[:, :3] *= 0.01
|
||||
return b
|
||||
|
||||
def rest_verts_m(self, shape_params: np.ndarray) -> np.ndarray:
|
||||
"""Zero-pose rest verts (V, 3) in rig-native Y-up metres."""
|
||||
if self._rest_override is not None:
|
||||
return self._rest_override
|
||||
return zero_pose_rest_verts(self._model, shape_params)
|
||||
|
||||
@classmethod
|
||||
def from_pose_data(cls, pose_data: Optional[Dict[str, Any]], model: Any = None) -> "Rig":
|
||||
rs = extract_rig_static(model, pose_data)
|
||||
external = bool(rs.get("_external", False))
|
||||
if external:
|
||||
joints8 = np.asarray(rs["lbs_compact_joints"], dtype=np.uint16)
|
||||
weights8 = np.asarray(rs["lbs_compact_weights"], dtype=np.float32)
|
||||
max_inf = int(rs["lbs_compact_max_inf"])
|
||||
override = _get_skeleton_override(pose_data) or {}
|
||||
per_y_down = bool(override.get("per_frame_y_down", False))
|
||||
rest_override = np.asarray(override["rest_verts_m"], dtype=np.float32)
|
||||
expr_basis = None
|
||||
else:
|
||||
joints8, weights8, max_inf = compact_skin_to_n(
|
||||
rs["lbs_skin_indices"], rs["lbs_vert_indices"],
|
||||
rs["lbs_skin_weights"], int(rs["num_verts"]), max_inf=8,
|
||||
)
|
||||
per_y_down = True
|
||||
rest_override = None
|
||||
expr_basis = rs["expr_basis"] if int(rs["num_expr"]) > 0 else None
|
||||
return cls(
|
||||
parents=np.asarray(rs["parents"], dtype=np.int32),
|
||||
joint_offsets_cm=np.asarray(rs["joint_translation_offsets"], dtype=np.float32),
|
||||
bind_global_cm=np.asarray(bind_skel_state(model, pose_data), dtype=np.float32),
|
||||
lbs_joints=joints8, lbs_weights=weights8, lbs_max_inf=max_inf,
|
||||
faces=np.asarray(rs["faces"], dtype=np.uint32),
|
||||
num_joints=int(rs["num_joints"]), num_verts=int(rs["num_verts"]),
|
||||
num_expr=int(rs["num_expr"]),
|
||||
per_frame_y_down=per_y_down, can_rerun_fk=not external,
|
||||
expr_basis=expr_basis, _model=model, _rest_override=rest_override,
|
||||
)
|
||||
|
||||
|
||||
def ibp_from_bind_global(bind_skel_state_m: np.ndarray) -> np.ndarray:
|
||||
"""Inverse-bind MAT4 by inverting the rig's bind global (meters). Guarantees
|
||||
IBP[j] = inverse(FK over bind local TRS) — exactly what glTF skinning
|
||||
@ -1069,6 +1147,87 @@ DWPOSE_HAND_COLORS_21 = np.tile(
|
||||
)
|
||||
|
||||
|
||||
def resolve_openpose_keypoints_from_joints(
|
||||
joints: np.ndarray, mapping: np.ndarray, weights: Optional[np.ndarray] = None,
|
||||
) -> np.ndarray:
|
||||
"""(K, 2) joint-index map resolved against (J, D) joint positions -> (K, D).
|
||||
Row (a, b): b == -1 uses joints[a]; b >= 0 returns w*joints[a]+(1-w)*joints[b]
|
||||
(w defaults 0.5 = midpoint; w outside [0, 1] extrapolates past the segment)."""
|
||||
a = mapping[:, 0].astype(np.int64)
|
||||
b = mapping[:, 1].astype(np.int64)
|
||||
pos_a = joints[a]
|
||||
has_b = b >= 0
|
||||
if not has_b.any():
|
||||
return pos_a.astype(np.float32, copy=False)
|
||||
b_safe = np.where(has_b, b, a)
|
||||
pos_b = joints[b_safe]
|
||||
if weights is None:
|
||||
w_a = np.where(has_b, 0.5, 1.0).astype(np.float32)
|
||||
else:
|
||||
w_a = np.where(has_b, np.asarray(weights, dtype=np.float32), 1.0)
|
||||
w_b = (1.0 - w_a).astype(np.float32)
|
||||
out = pos_a * w_a[:, None] + pos_b * w_b[:, None]
|
||||
return out.astype(np.float32, copy=False)
|
||||
|
||||
|
||||
# part -> (override map key, override weight key, MHR70 reindex map)
|
||||
_OPENPOSE_RENDER_MAPS = {
|
||||
"body": ("openpose18_joint_indices", "openpose18_joint_weights", OPENPOSE18_TO_MHR70),
|
||||
"hand_r": ("openpose_hand21_r_joint_indices", "openpose_hand21_r_joint_weights", OPENPOSE_HAND21_TO_MHR70_R),
|
||||
"hand_l": ("openpose_hand21_l_joint_indices", "openpose_hand21_l_joint_weights", OPENPOSE_HAND21_TO_MHR70_L),
|
||||
}
|
||||
|
||||
|
||||
def openpose_render_keypoints(
|
||||
person: Dict[str, Any], pose_data: Optional[Dict[str, Any]], part: str,
|
||||
*, dim: int, H: int = 0, W: int = 0,
|
||||
) -> Optional[np.ndarray]:
|
||||
"""OpenPose keypoints for one person, in op-layout, CAMERA frame (Y-down).
|
||||
`part` in {'body','hand_r','hand_l'}. dim=3 -> (K, 3) metres pre-cam_t-add;
|
||||
dim=2 -> (K, 2) image pixels. Returns None when the source data is missing.
|
||||
|
||||
External rigs (override carries the joint-index map) resolve from per-frame
|
||||
`pred_joint_coords` (rig-native Y-up -> flipped to camera Y-down, matching
|
||||
the pred_vertices convention). MHR reindexes the stored
|
||||
`pred_keypoints_{3d,2d}` via the MHR70 map."""
|
||||
map_key, w_key, mhr_map = _OPENPOSE_RENDER_MAPS[part]
|
||||
override = _get_skeleton_override(pose_data)
|
||||
ext_map = override.get(map_key) if override is not None else None
|
||||
|
||||
if ext_map is not None:
|
||||
joints = person.get("pred_joint_coords")
|
||||
if joints is None:
|
||||
return None
|
||||
w = override.get(w_key)
|
||||
kp3d = resolve_openpose_keypoints_from_joints(
|
||||
np.asarray(joints, dtype=np.float32),
|
||||
np.asarray(ext_map, dtype=np.int64),
|
||||
None if w is None else np.asarray(w, dtype=np.float32),
|
||||
).copy()
|
||||
kp3d[:, 1] *= -1.0 # rig-native Y-up -> camera Y-down
|
||||
kp3d[:, 2] *= -1.0
|
||||
if dim == 3:
|
||||
return kp3d
|
||||
cam_t = person.get("pred_cam_t")
|
||||
focal = person.get("focal_length")
|
||||
if cam_t is None or focal is None:
|
||||
return None
|
||||
pts3 = kp3d + np.asarray(cam_t, dtype=np.float32).reshape(1, 3)
|
||||
z = np.maximum(pts3[:, 2:3], 1e-6)
|
||||
f = float(np.asarray(focal, dtype=np.float32).reshape(-1)[0])
|
||||
xy = pts3[:, :2] * f + np.array([W * 0.5, H * 0.5], dtype=np.float32)[None, :] * z
|
||||
return (xy / z).astype(np.float32)
|
||||
|
||||
key = "pred_keypoints_3d" if dim == 3 else "pred_keypoints_2d"
|
||||
kp_full = person.get(key)
|
||||
if kp_full is None:
|
||||
return None
|
||||
kp_full = np.asarray(kp_full, dtype=np.float32)
|
||||
if kp_full.ndim != 2 or kp_full.shape[0] < 70:
|
||||
return None
|
||||
return kp_full[mhr_map]
|
||||
|
||||
|
||||
# Face landmarks from the MHR rig (option `face_source="rig"`).
|
||||
# MHR has no face bones — face deforms via expr_params morphs — so landmarks
|
||||
# are sourced from `pred_vertices` at fixed vertex IDs picked by NN against
|
||||
|
||||
@ -24,14 +24,12 @@ import numpy as np
|
||||
|
||||
from .glb_shared import (
|
||||
GLBWriter,
|
||||
Rig,
|
||||
bake_vertex_colors,
|
||||
bind_skel_state,
|
||||
bone_locals_from_globals,
|
||||
collect_tracks,
|
||||
compact_skin_to_n,
|
||||
compute_normals,
|
||||
compute_pastel_mix,
|
||||
extract_rig_static,
|
||||
flat_shade_mesh,
|
||||
gaussian_smooth_quats,
|
||||
global_skel_state_from_pose_data,
|
||||
@ -41,7 +39,6 @@ from .glb_shared import (
|
||||
quat_sign_fix_per_joint,
|
||||
rotation_align,
|
||||
unflip,
|
||||
zero_pose_rest_verts,
|
||||
)
|
||||
|
||||
from comfy_extras.sam3d_body.utils import jet_colormap
|
||||
@ -220,30 +217,22 @@ def build_glb_skeletal(
|
||||
if not tracks:
|
||||
raise ValueError("build_glb_skeletal: no valid tracks in pose_data")
|
||||
|
||||
rig_static = extract_rig_static(model, pose_data)
|
||||
NJ = rig_static["num_joints"]
|
||||
NV = rig_static["num_verts"]
|
||||
NEXPR = rig_static["num_expr"]
|
||||
parents = rig_static["parents"]
|
||||
is_external = bool(rig_static.get("_external", False))
|
||||
if is_external:
|
||||
rig = Rig.from_pose_data(pose_data, model)
|
||||
NJ = rig.num_joints
|
||||
NV = rig.num_verts
|
||||
NEXPR = rig.num_expr
|
||||
parents = rig.parents
|
||||
if not rig.can_rerun_fk:
|
||||
# External rigs have no PCA pose params to re-run; only stored globals
|
||||
# are available, and kimodo stores joint coords already Y-up.
|
||||
# are available, and they store joint coords already Y-up.
|
||||
use_stored_global_rots = True
|
||||
joint_coords_y_down = not is_external
|
||||
# Compact sparse skinning to 8 influences per vertex into glTF's two
|
||||
# JOINTS_*/WEIGHTS_* sets. MHR averages ~2.8 influences/vert but some
|
||||
# shoulder/hip verts have 5-8 where multiple joints cancel — keeping only
|
||||
# 4 there leaks per-bone rotation noise into the rendered mesh.
|
||||
if is_external:
|
||||
joints_8 = rig_static["lbs_compact_joints"]
|
||||
weights_8 = rig_static["lbs_compact_weights"]
|
||||
actual_max_inf = rig_static["lbs_compact_max_inf"]
|
||||
else:
|
||||
joints_8, weights_8, actual_max_inf = compact_skin_to_n(
|
||||
rig_static["lbs_skin_indices"], rig_static["lbs_vert_indices"],
|
||||
rig_static["lbs_skin_weights"], NV, max_inf=8,
|
||||
)
|
||||
joint_coords_y_down = rig.per_frame_y_down
|
||||
# Skinning is already compacted to ≤8 influences per vertex (MHR averages
|
||||
# ~2.8 but some shoulder/hip verts hit 5-8; keeping only 4 there leaks
|
||||
# per-bone rotation noise into the rendered mesh).
|
||||
joints_8 = rig.lbs_joints
|
||||
weights_8 = rig.lbs_weights
|
||||
actual_max_inf = rig.lbs_max_inf
|
||||
joints_set0 = np.ascontiguousarray(joints_8[:, :4])
|
||||
weights_set0 = np.ascontiguousarray(weights_8[:, :4])
|
||||
use_set1 = actual_max_inf > 4
|
||||
@ -252,10 +241,8 @@ def build_glb_skeletal(
|
||||
# Derive bone locals from the rig's bind globals rather than recomputing
|
||||
# FK ourselves, so any mismatch between `parents` and the rig's actual FK
|
||||
# is absorbed into the local TRS instead of producing wrong globals.
|
||||
bind_global_cm = bind_skel_state(model, pose_data)
|
||||
bind_global_m = bind_global_cm.copy().astype(np.float32)
|
||||
bind_global_m[:, :3] *= 0.01
|
||||
bind_local = bone_locals_from_globals(bind_global_m[None], rig_static["parents"])[0]
|
||||
bind_global_m = rig.bind_global_m
|
||||
bind_local = bone_locals_from_globals(bind_global_m[None], parents)[0]
|
||||
|
||||
# IBP = inverse of bind global. With bone defaults set to bind_local and
|
||||
# FK composed via `parents`, skin_matrix at rest = identity.
|
||||
@ -280,7 +267,7 @@ def build_glb_skeletal(
|
||||
|
||||
expr_morph_accs: List[int] = []
|
||||
if include_face_morphs and NEXPR > 0:
|
||||
eb = rig_static["expr_basis"].astype(np.float32) * 0.01
|
||||
eb = rig.expr_basis.astype(np.float32) * 0.01
|
||||
for e in range(NEXPR):
|
||||
expr_morph_accs.append(w.add_vec3_f32_no_minmax(eb[e]))
|
||||
|
||||
@ -329,16 +316,14 @@ def build_glb_skeletal(
|
||||
body_mesh_node_idx: Optional[int] = None
|
||||
|
||||
if include_body:
|
||||
# External rigs have no PCA shape — `zero_pose_rest_verts` short-
|
||||
# circuits to `pose_data["_skeleton_override"]["rest_verts_m"]`,
|
||||
# so zeroed shape_params is safe there.
|
||||
if is_external:
|
||||
shape_params_arr = np.zeros(0, dtype=np.float32)
|
||||
else:
|
||||
shape_params_arr = np.asarray(
|
||||
frames[frame_indices[0]][person_k]["shape_params"], dtype=np.float32,
|
||||
)
|
||||
rest_v = zero_pose_rest_verts(model, shape_params_arr, pose_data=pose_data)
|
||||
# MHR rest verts depend on the subject's shape_params; external rigs
|
||||
# ship fixed rest verts and ignore the arg (so the empty external
|
||||
# `shape_params` is harmless).
|
||||
shape_params_arr = np.asarray(
|
||||
frames[frame_indices[0]][person_k].get("shape_params", []),
|
||||
dtype=np.float32,
|
||||
)
|
||||
rest_v = rig.rest_verts_m(shape_params_arr)
|
||||
normals = compute_normals(rest_v, faces_native)
|
||||
positions_acc = w.add_vec3_f32(rest_v)
|
||||
normals_acc = w.add_vec3_f32(normals)
|
||||
@ -393,7 +378,7 @@ def build_glb_skeletal(
|
||||
color_idx_per_vert: Optional[np.ndarray] = None
|
||||
hw = float(bone_vis_radius_m)
|
||||
bv_v, bv_n, bv_f, bv_j, bv_w, child_per_vert = _build_bone_octahedrons_mesh(
|
||||
bind_global_m[:, :3], rig_static["parents"], half_width_m=hw,
|
||||
bind_global_m[:, :3], parents, half_width_m=hw,
|
||||
)
|
||||
if bv_v.shape[0] > 0:
|
||||
F = bv_f.shape[0]
|
||||
@ -458,7 +443,7 @@ def build_glb_skeletal(
|
||||
# local translation (t_local inherits parent sign via q_parent_inv)
|
||||
# and produces visible "axis resets" mid-animation.
|
||||
rig_global_m[..., 3:7] = quat_sign_fix_per_joint(rig_global_m[..., 3:7])
|
||||
bone_local_anim = bone_locals_from_globals(rig_global_m, rig_static["parents"])
|
||||
bone_local_anim = bone_locals_from_globals(rig_global_m, parents)
|
||||
local_t = bone_local_anim[..., :3].astype(np.float32)
|
||||
local_q = bone_local_anim[..., 3:7].astype(np.float32)
|
||||
local_s = bone_local_anim[..., 7].astype(np.float32)
|
||||
|
||||
@ -19,10 +19,8 @@ from PIL import Image
|
||||
from comfy_extras.pose.keypoint_draw import KeypointDraw
|
||||
|
||||
from .glb_shared import (
|
||||
OPENPOSE18_TO_MHR70,
|
||||
OPENPOSE_HAND21_TO_MHR70_L,
|
||||
OPENPOSE_HAND21_TO_MHR70_R,
|
||||
OPENPOSE_HAND_COLORS_21,
|
||||
openpose_render_keypoints,
|
||||
select_face_landmark_vert_ids,
|
||||
)
|
||||
|
||||
@ -53,32 +51,31 @@ def _project_face_landmarks_2d(
|
||||
|
||||
|
||||
def _pack_dwpose_134(
|
||||
person: Dict[str, Any], *, include_body: bool, include_hands: bool,
|
||||
person: Dict[str, Any], pose_data: Dict[str, Any], *,
|
||||
include_body: bool, include_hands: bool, H: int, W: int,
|
||||
) -> Tuple[np.ndarray, np.ndarray]:
|
||||
"""Pack a SAM3D person dict into (kp, scores): (134, 2) DWPose-layout
|
||||
coords + (134,) confidence. Face slot (24-91) is left zeroed; face dots
|
||||
are drawn separately so SAM3D's 238-sapiens / rig-fallback counts work.
|
||||
Non-finite or out-of-band entries get score=0 and are filtered downstream."""
|
||||
Non-finite or out-of-band entries get score=0 and are filtered downstream.
|
||||
|
||||
Keypoints come from the shared provider: MHR reindexes `pred_keypoints_2d`,
|
||||
external rigs (Kimodo) resolve + project from `pred_joint_coords`."""
|
||||
kp = np.zeros((134, 2), dtype=np.float32)
|
||||
scores = np.zeros(134, dtype=np.float32)
|
||||
|
||||
kp2d_full = person.get("pred_keypoints_2d")
|
||||
if kp2d_full is None:
|
||||
return kp, scores
|
||||
kp2d = np.asarray(kp2d_full, dtype=np.float32)
|
||||
if kp2d.ndim != 2 or kp2d.shape[1] != 2 or kp2d.shape[0] < 70:
|
||||
return kp, scores
|
||||
|
||||
if include_body:
|
||||
body_xy = kp2d[OPENPOSE18_TO_MHR70]
|
||||
finite = np.isfinite(body_xy).all(axis=1)
|
||||
kp[:18][finite] = body_xy[finite]
|
||||
scores[:18][finite] = 1.0
|
||||
body_xy = openpose_render_keypoints(person, pose_data, "body", dim=2, H=H, W=W)
|
||||
if body_xy is not None:
|
||||
finite = np.isfinite(body_xy).all(axis=1)
|
||||
kp[:18][finite] = body_xy[finite]
|
||||
scores[:18][finite] = 1.0
|
||||
|
||||
if include_hands:
|
||||
for slot_start, mhr_idx in ((92, OPENPOSE_HAND21_TO_MHR70_R),
|
||||
(113, OPENPOSE_HAND21_TO_MHR70_L)):
|
||||
hand_xy = kp2d[mhr_idx]
|
||||
for slot_start, part in ((92, "hand_r"), (113, "hand_l")):
|
||||
hand_xy = openpose_render_keypoints(person, pose_data, part, dim=2, H=H, W=W)
|
||||
if hand_xy is None:
|
||||
continue
|
||||
finite = np.isfinite(hand_xy).all(axis=1)
|
||||
kp[slot_start:slot_start + 21][finite] = hand_xy[finite]
|
||||
scores[slot_start:slot_start + 21][finite] = 1.0
|
||||
@ -190,7 +187,8 @@ def render_pose_data_openpose(
|
||||
pre = canvas.copy() if pastel > 0 else None
|
||||
|
||||
kp134, scores134 = _pack_dwpose_134(
|
||||
person, include_body=include_body, include_hands=include_hands,
|
||||
person, pose_data, include_body=include_body,
|
||||
include_hands=include_hands, H=H, W=W,
|
||||
)
|
||||
_KD.draw_wholebody_keypoints(
|
||||
canvas, kp134, scores=scores134, threshold=0.5,
|
||||
|
||||
@ -178,6 +178,12 @@ def apply_camera_override(mhr_pose_data: Dict[str, Any], camera_info: Dict[str,
|
||||
|
||||
center = np.array([W * 0.5, H * 0.5], dtype=np.float32)
|
||||
reproj = {"pred_keypoints_3d": "pred_keypoints_2d", "pred_face_keypoints_3d": "pred_face_keypoints_2d"}
|
||||
# External rigs (e.g. Kimodo) store pred_joint_coords rig-native Y-up; the
|
||||
# render openpose/scail keypoint provider resolves from them and flips Y/Z.
|
||||
# Transform them through the camera too (in camera space, then back to Y-up)
|
||||
# so those keypoints follow the override instead of staying in the old frame.
|
||||
override = mhr_pose_data.get("_skeleton_override")
|
||||
joints_y_up = override is not None and not bool(override.get("per_frame_y_down", False))
|
||||
new_frames: List[List[Dict[str, Any]]] = []
|
||||
for frame in mhr_pose_data["frames"]:
|
||||
scaled = []
|
||||
@ -197,6 +203,17 @@ def apply_camera_override(mhr_pose_data: Dict[str, Any], camera_info: Dict[str,
|
||||
if k in reproj: # re-project the new 3D to 2D image coords
|
||||
z = np.maximum(cam[..., 2:3], 1e-6)
|
||||
p[reproj[k]] = (cam[..., :2] * new_focal / z + center).astype(np.float32)
|
||||
jc = p.get("pred_joint_coords")
|
||||
if jc is not None:
|
||||
jc = np.asarray(jc, dtype=np.float32).copy()
|
||||
if joints_y_up:
|
||||
jc[..., 1] *= -1.0
|
||||
jc[..., 2] *= -1.0
|
||||
jc = (jc + cam_t - eye) @ R.T
|
||||
if joints_y_up:
|
||||
jc[..., 1] *= -1.0
|
||||
jc[..., 2] *= -1.0
|
||||
p["pred_joint_coords"] = jc.astype(np.float32)
|
||||
p["pred_cam_t"] = np.zeros(3, dtype=np.float32)
|
||||
p["focal_length"] = np.array(new_focal, dtype=np.float32)
|
||||
scaled.append(p)
|
||||
|
||||
Loading…
Reference in New Issue
Block a user