mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2026-06-24 00:39:30 +08:00
235 lines
9.3 KiB
Python
235 lines
9.3 KiB
Python
"""2D OpenPose-style skeleton rendering for SAM 3D Body pose_data.
|
|
|
|
Body / hand drawing is delegated to `KeypointDraw.draw_wholebody_keypoints`
|
|
(shared with SDPose). SAM3D-specific: MHR70 -> DWPose-134 keypoint packing,
|
|
plus optional rig-projected face landmarks when `pred_face_keypoints_2d`
|
|
isn't present (and arbitrary-count face dots, since sapiens-238 doesn't fit
|
|
the DWPose face slot).
|
|
|
|
Output: (H, W, 3) fp32 torch.Tensor in [0, 1].
|
|
"""
|
|
|
|
import logging
|
|
from typing import Any, Dict, Optional, Tuple
|
|
|
|
import numpy as np
|
|
import torch
|
|
from PIL import Image
|
|
|
|
from comfy_extras.pose.keypoint_draw import KeypointDraw
|
|
|
|
from .glb_shared import (
|
|
OPENPOSE18_TO_MHR70,
|
|
OPENPOSE_HAND21_TO_MHR70_L,
|
|
OPENPOSE_HAND21_TO_MHR70_R,
|
|
OPENPOSE_HAND_COLORS_21,
|
|
select_face_landmark_vert_ids,
|
|
)
|
|
|
|
|
|
_KD = KeypointDraw()
|
|
# OpenPose hand palette as a (21, 3) int array (0..255) for KeypointDraw.
|
|
_HAND_DOT_PALETTE_OPENPOSE = (OPENPOSE_HAND_COLORS_21 * 255.0).astype(int)
|
|
|
|
|
|
def _project_face_landmarks_2d(
|
|
person: Dict[str, Any], face_vert_ids: np.ndarray, H: int, W: int,
|
|
) -> Optional[np.ndarray]:
|
|
"""Project `pred_vertices[face_vert_ids]` to 2D using each person's
|
|
pred_cam_t + focal_length. Same projection used by `_replay_mhr_with_overrides`."""
|
|
verts = person.get("pred_vertices")
|
|
cam_t = person.get("pred_cam_t")
|
|
focal = person.get("focal_length")
|
|
if verts is None or cam_t is None or focal is None:
|
|
return None
|
|
verts = np.asarray(verts, dtype=np.float32)
|
|
cam_t = np.asarray(cam_t, dtype=np.float32).reshape(3)
|
|
f = float(np.asarray(focal, dtype=np.float32).reshape(-1)[0])
|
|
pts3 = verts[face_vert_ids] + cam_t[None, :]
|
|
z = np.maximum(pts3[:, 2:3], 1e-6)
|
|
xy = pts3[:, :2] * f
|
|
xy = xy + np.array([W * 0.5, H * 0.5], dtype=np.float32)[None, :] * z
|
|
return (xy / z).astype(np.float32)
|
|
|
|
|
|
def _pack_dwpose_134(
|
|
person: Dict[str, Any], *, include_body: bool, include_hands: bool,
|
|
) -> Tuple[np.ndarray, np.ndarray]:
|
|
"""Pack a SAM3D person dict into (kp, scores): (134, 2) DWPose-layout
|
|
coords + (134,) confidence. Face slot (24-91) is left zeroed; face dots
|
|
are drawn separately so SAM3D's 238-sapiens / rig-fallback counts work.
|
|
Non-finite or out-of-band entries get score=0 and are filtered downstream."""
|
|
kp = np.zeros((134, 2), dtype=np.float32)
|
|
scores = np.zeros(134, dtype=np.float32)
|
|
|
|
kp2d_full = person.get("pred_keypoints_2d")
|
|
if kp2d_full is None:
|
|
return kp, scores
|
|
kp2d = np.asarray(kp2d_full, dtype=np.float32)
|
|
if kp2d.ndim != 2 or kp2d.shape[1] != 2 or kp2d.shape[0] < 70:
|
|
return kp, scores
|
|
|
|
if include_body:
|
|
body_xy = kp2d[OPENPOSE18_TO_MHR70]
|
|
finite = np.isfinite(body_xy).all(axis=1)
|
|
kp[:18][finite] = body_xy[finite]
|
|
scores[:18][finite] = 1.0
|
|
|
|
if include_hands:
|
|
for slot_start, mhr_idx in ((92, OPENPOSE_HAND21_TO_MHR70_R),
|
|
(113, OPENPOSE_HAND21_TO_MHR70_L)):
|
|
hand_xy = kp2d[mhr_idx]
|
|
finite = np.isfinite(hand_xy).all(axis=1)
|
|
kp[slot_start:slot_start + 21][finite] = hand_xy[finite]
|
|
scores[slot_start:slot_start + 21][finite] = 1.0
|
|
|
|
return kp, scores
|
|
|
|
|
|
def _draw_face_dots(
|
|
canvas: np.ndarray, face_xy: np.ndarray, marker_radius_px: int,
|
|
) -> None:
|
|
"""White face dots, variable count (238 sapiens / ~30 rig-projected)."""
|
|
H, W = canvas.shape[:2]
|
|
pad = int(marker_radius_px)
|
|
white = (255, 255, 255)
|
|
for i in range(face_xy.shape[0]):
|
|
x_, y_ = float(face_xy[i, 0]), float(face_xy[i, 1])
|
|
if not (np.isfinite(x_) and np.isfinite(y_)):
|
|
continue
|
|
x, y = int(round(x_)), int(round(y_))
|
|
if x + pad < 0 or x - pad >= W or y + pad < 0 or y - pad >= H:
|
|
continue
|
|
_KD.draw.circle(canvas, (x, y), int(marker_radius_px), white, thickness=-1)
|
|
|
|
|
|
def render_pose_data_openpose(
|
|
pose_data: Dict[str, Any],
|
|
*,
|
|
frame_idx: int,
|
|
W: int,
|
|
H: int,
|
|
background: Optional[torch.Tensor] = None,
|
|
composite: str = "over",
|
|
marker_radius_px: int = 4,
|
|
stick_width_px: int = 4,
|
|
limb_alpha: float = 0.6,
|
|
include_body: bool = True,
|
|
include_hands: bool = False,
|
|
face_style: str = "disabled",
|
|
hand_color_style: str = "dwpose",
|
|
hand_marker_radius_px: int = 0,
|
|
hand_stick_width_px: int = 0,
|
|
face_marker_radius_px: int = 3,
|
|
person_brightness_falloff: float = 0.0,
|
|
) -> torch.Tensor:
|
|
"""Render a 2D OpenPose-style skeleton onto an (H, W, 3) canvas.
|
|
|
|
`composite='over'` paints over `background` (else black canvas).
|
|
`hand_marker_radius_px` / `hand_stick_width_px`: 0 = auto = 0.7x / 0.5x
|
|
of the body sizes.
|
|
`face_style`: 'disabled' = no face dots, 'full' = all face landmarks
|
|
(prefers sapiens-238 if present, else rig-fallback ~30), 'eyes_mouth' =
|
|
rig-fallback subset (~12 dots: eyes + mouth only). The subset only has a
|
|
documented layout for the rig fallback so eyes_mouth always uses it,
|
|
regardless of whether sapiens-238 is available.
|
|
`person_brightness_falloff` mixes each person's drawn pixels toward white
|
|
by `1 - falloff^k` (track 0 stays vivid). Applied post-draw so per-limb
|
|
alpha blending against the existing canvas remains correct.
|
|
"""
|
|
persons = pose_data["frames"][frame_idx]
|
|
|
|
if composite == "over" and background is not None:
|
|
bg = background.cpu().numpy()
|
|
canvas = (np.clip(bg, 0.0, 1.0) * 255.0).astype(np.uint8)
|
|
if canvas.shape[:2] != (H, W):
|
|
canvas = np.array(Image.fromarray(canvas).resize((W, H), Image.LANCZOS))
|
|
else:
|
|
canvas = np.zeros((H, W, 3), dtype=np.uint8)
|
|
# In-place draw needs a contiguous writable buffer.
|
|
canvas = np.ascontiguousarray(canvas)
|
|
|
|
if int(hand_marker_radius_px) <= 0:
|
|
hand_marker_radius_px = max(1, int(round(marker_radius_px * 0.7)))
|
|
if int(hand_stick_width_px) <= 0:
|
|
hand_stick_width_px = max(1, int(round(stick_width_px * 0.5)))
|
|
|
|
# Eyes+mouth indices into the rig fallback (FACE_LANDMARK_TARGETS): 6..13
|
|
# = both eyes, 19..22 = outer-lip ring. Brows/nose/chin/jaw are dropped.
|
|
_EYES_MOUTH_IDX = np.array([6, 7, 8, 9, 10, 11, 12, 13, 19, 20, 21, 22], dtype=np.int64)
|
|
|
|
include_face = face_style != "disabled"
|
|
use_rig_only = face_style == "eyes_mouth"
|
|
|
|
# Real 238 sapiens face KPs take priority for 'full'; 'eyes_mouth' always
|
|
# falls through to the rig path since sapiens has no documented subset.
|
|
face_vert_ids: Optional[np.ndarray] = None
|
|
if include_face:
|
|
any_real = (not use_rig_only) and any(
|
|
p.get("pred_face_keypoints_2d") is not None for p in persons
|
|
)
|
|
if not any_real:
|
|
cc = pose_data.get("canonical_colors") or {}
|
|
positions = cc.get("positions")
|
|
if positions is not None:
|
|
try:
|
|
face_vert_ids = select_face_landmark_vert_ids(
|
|
np.asarray(positions), face_mask=cc.get("face_mask"),
|
|
)
|
|
if use_rig_only:
|
|
face_vert_ids = face_vert_ids[_EYES_MOUTH_IDX]
|
|
except Exception as e:
|
|
logging.warning(f"[SAM3DBody] face landmarks disabled - {e}")
|
|
face_vert_ids = None
|
|
|
|
hand_dot_color = (
|
|
_HAND_DOT_PALETTE_OPENPOSE if hand_color_style == "openpose"
|
|
else (0, 0, 255)
|
|
)
|
|
|
|
falloff = max(0.0, min(1.0, float(person_brightness_falloff)))
|
|
|
|
for k, person in enumerate(persons):
|
|
pastel = 0.0 if k == 0 else (1.0 - falloff ** k)
|
|
# Snapshot before this person's strokes so we can identify the pixels
|
|
# they touched and blend just those toward white. Drawing happens
|
|
# against the live canvas first so limb_alpha blends correctly.
|
|
pre = canvas.copy() if pastel > 0 else None
|
|
|
|
kp134, scores134 = _pack_dwpose_134(
|
|
person, include_body=include_body, include_hands=include_hands,
|
|
)
|
|
_KD.draw_wholebody_keypoints(
|
|
canvas, kp134, scores=scores134, threshold=0.5,
|
|
draw_body=include_body, draw_feet=False,
|
|
draw_face=False, # SAM3D draws face dots separately (variable count)
|
|
draw_hands=include_hands,
|
|
stick_width=stick_width_px,
|
|
marker_radius=marker_radius_px,
|
|
hand_stick_width=hand_stick_width_px,
|
|
hand_marker_radius=hand_marker_radius_px,
|
|
limb_alpha=limb_alpha,
|
|
hand_dot_color=hand_dot_color,
|
|
)
|
|
|
|
if include_face:
|
|
face_xy = None
|
|
real_face = person.get("pred_face_keypoints_2d")
|
|
if real_face is not None:
|
|
arr = np.asarray(real_face, dtype=np.float32)
|
|
if arr.ndim == 2 and arr.shape[1] == 2:
|
|
face_xy = arr
|
|
elif face_vert_ids is not None:
|
|
face_xy = _project_face_landmarks_2d(person, face_vert_ids, H, W)
|
|
if face_xy is not None:
|
|
_draw_face_dots(canvas, face_xy, face_marker_radius_px)
|
|
|
|
if pre is not None:
|
|
changed = (canvas != pre).any(axis=-1)
|
|
if changed.any():
|
|
touched = canvas[changed].astype(np.float32)
|
|
blended = touched * (1.0 - pastel) + 255.0 * pastel
|
|
canvas[changed] = np.clip(blended, 0.0, 255.0).astype(np.uint8)
|
|
|
|
return torch.from_numpy(canvas.astype(np.float32) / 255.0)
|