mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2026-06-23 16:29:25 +08:00
Camera info to render nodes, fixes and tweaks
This commit is contained in:
parent
828016a274
commit
8acb162c31
@ -17,6 +17,7 @@ import folder_paths
|
||||
from comfy.ldm.sam3d_body.model.model import SAM3DBody
|
||||
from comfy.ldm.sam3d_body.model.dinov3 import apply_dinov3_qkv_bias_mask
|
||||
from comfy_extras.sam3d_body.utils import (
|
||||
apply_camera_override,
|
||||
cam_int_from_fov,
|
||||
cam_int_from_moge,
|
||||
inputs_from_sam3_track,
|
||||
@ -99,6 +100,32 @@ class SAM3DBody_Loader(io.ComfyNode):
|
||||
|
||||
# Predict
|
||||
|
||||
def _per_frame_bboxes_from_detections(bboxes, B: int):
|
||||
# BoundingBox payload (RT-DETR etc.): dict | list[dict] | list[list[dict]].
|
||||
if isinstance(bboxes, dict):
|
||||
norm = [[bboxes]]
|
||||
elif not bboxes:
|
||||
return None
|
||||
elif isinstance(bboxes[0], dict):
|
||||
norm = [bboxes] # flat list → same detections every frame
|
||||
else:
|
||||
norm = list(bboxes)
|
||||
if len(norm) == 1:
|
||||
norm = norm * B
|
||||
norm = (norm + [[]] * B)[:B]
|
||||
out = []
|
||||
for frame in norm:
|
||||
if frame:
|
||||
boxes = torch.tensor(
|
||||
[[d["x"], d["y"], d["x"] + d["width"], d["y"] + d["height"]] for d in frame],
|
||||
dtype=torch.float32,
|
||||
)
|
||||
else:
|
||||
boxes = torch.zeros((0, 4), dtype=torch.float32)
|
||||
out.append(boxes)
|
||||
return out
|
||||
|
||||
|
||||
class SAM3DBody_Predict(io.ComfyNode):
|
||||
@classmethod
|
||||
def define_schema(cls):
|
||||
@ -113,6 +140,14 @@ class SAM3DBody_Predict(io.ComfyNode):
|
||||
"sam3_track_data", optional=True,
|
||||
tooltip=("Output of SAM3 Video Track, required for multi-person detection"),
|
||||
),
|
||||
io.BoundingBox.Input(
|
||||
"bboxes", optional=True, force_input=True,
|
||||
tooltip=(
|
||||
"Per-frame person boxes (e.g. RT-DETR Detect with class_name='person'). "
|
||||
"Used when no SAM3 track is wired — gives the top-down model a tight, "
|
||||
"person-centered crop. Multi-person supported (one box = one person)."
|
||||
),
|
||||
),
|
||||
io.Boolean.Input(
|
||||
"run_hand_refinement", default=True,
|
||||
tooltip="Improves hand pose at the cost of extra inference time and memory use"),
|
||||
@ -146,19 +181,22 @@ class SAM3DBody_Predict(io.ComfyNode):
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def execute(cls, sam3d_body_model, image, sam3_track_data=None, run_hand_refinement=True, fov_degrees=0.0, moge_geometry=None, chunk_size=144) -> io.NodeOutput:
|
||||
def execute(cls, sam3d_body_model, image, sam3_track_data=None, bboxes=None, run_hand_refinement=True, fov_degrees=0.0, moge_geometry=None, chunk_size=64) -> io.NodeOutput:
|
||||
comfy.model_management.load_model_gpu(sam3d_body_model)
|
||||
inner: SAM3DBody = sam3d_body_model.model
|
||||
|
||||
B, H, W, _ = image.shape
|
||||
image_size = getattr(inner, "_sam3d_image_size", (512, 512))
|
||||
|
||||
# Precedence: SAM3 track (masks + boxes) > detector boxes > full-frame fallback.
|
||||
per_frame_bboxes, per_frame_masks = (None, None)
|
||||
if sam3_track_data is not None:
|
||||
per_frame_bboxes, per_frame_masks = inputs_from_sam3_track(sam3_track_data, B, H, W)
|
||||
if per_frame_bboxes is None and bboxes:
|
||||
per_frame_bboxes = _per_frame_bboxes_from_detections(bboxes, B)
|
||||
per_frame_masks = None
|
||||
if per_frame_bboxes is None:
|
||||
# No track wired (or empty / frame count mismatch) — single-person
|
||||
# full-frame fallback. Multi-person scenes need SAM3 Video Track.
|
||||
# No track or detector boxes — single-person full-frame fallback.
|
||||
full_frame_bbox = torch.tensor([[0.0, 0.0, float(W), float(H)]], dtype=torch.float32)
|
||||
per_frame_bboxes = [full_frame_bbox.clone() for _ in range(B)]
|
||||
per_frame_masks = None
|
||||
@ -711,6 +749,26 @@ def _render_capsules_mode_inputs():
|
||||
]
|
||||
|
||||
|
||||
def _render_openpose3d_mode_inputs():
|
||||
return [
|
||||
io.Float.Input(
|
||||
"radius_m", default=0.015, min=0.004, max=0.1, step=0.001,
|
||||
tooltip="Limb capsule radius in meters (thin = stick-like).",
|
||||
),
|
||||
io.Boolean.Input(
|
||||
"include_hands", default=True,
|
||||
tooltip="Draw 21+21 hand keypoints as 3D capsules.",
|
||||
),
|
||||
io.Float.Input(
|
||||
"person_palette_falloff", default=0.6, min=0.1, max=1.0, step=0.05,
|
||||
tooltip=(
|
||||
"Per-person desaturation: track k blends toward white by "
|
||||
"1 - falloff^k. Track 0 stays vivid; 1.0 disables falloff."
|
||||
),
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
def _render_openpose_mode_inputs():
|
||||
return [
|
||||
io.Int.Input(
|
||||
@ -755,15 +813,8 @@ def _render_openpose_mode_inputs():
|
||||
|
||||
|
||||
def _scale_pose_data(mhr_pose_data: Dict[str, Any], new_H: int, new_W: int) -> Dict[str, Any]:
|
||||
"""Rescale per-person camera intrinsics + 2D coords to a new canvas size.
|
||||
Pose data records focal_length in pixels of the original image; without
|
||||
scaling, the FOV would change and subjects would be cropped/zoomed.
|
||||
|
||||
When the new aspect differs from the original, the body (3D-projected
|
||||
through focal_length on a centered principal point) lands in a
|
||||
letterboxed region of the new canvas. 2D-prestored coords must follow
|
||||
the same uniform scale + center offset so face/hand overlays align with
|
||||
the body — per-axis stretching would split them apart."""
|
||||
# 2D coords must match the body's letterbox transform (uniform scale +
|
||||
# center offset), else face/hand overlays drift off the body.
|
||||
old_H, old_W = mhr_pose_data["image_size"]
|
||||
if new_H == old_H and new_W == old_W:
|
||||
return mhr_pose_data
|
||||
@ -831,20 +882,38 @@ class SAM3DBody_Render(io.ComfyNode):
|
||||
"other is derived preserving the original aspect."
|
||||
),
|
||||
),
|
||||
io.Load3DCamera.Input(
|
||||
"camera_info", optional=True,
|
||||
tooltip=(
|
||||
"Free 6DOF camera override. When wired, the pose is re-projected through this camera "
|
||||
"(position/target/zoom) instead of the predicted one. "
|
||||
),
|
||||
),
|
||||
io.Float.Input(
|
||||
"camera_fov", default=0.0, min=0.0, max=170.0, step=0.5, advanced=True,
|
||||
tooltip=(
|
||||
"Vertical FOV for the camera_info override. 0 = keep the SAM3D "
|
||||
"predicted camera's FOV (only the viewpoint changes). Any non-zero "
|
||||
"value overrides the lens. Ignored when camera_info is unwired."
|
||||
),
|
||||
),
|
||||
io.DynamicCombo.Input(
|
||||
"render_style",
|
||||
options=[
|
||||
io.DynamicCombo.Option("mesh", _render_mesh_mode_inputs()),
|
||||
io.DynamicCombo.Option("silhouette", []),
|
||||
io.DynamicCombo.Option("openpose", _render_openpose_mode_inputs()),
|
||||
io.DynamicCombo.Option("openpose_2d", _render_openpose_mode_inputs()),
|
||||
io.DynamicCombo.Option("openpose_3d", _render_openpose3d_mode_inputs()),
|
||||
io.DynamicCombo.Option("scail", _render_capsules_mode_inputs()),
|
||||
],
|
||||
tooltip=(
|
||||
"'mesh' = 3D MHR mesh rasterized through the camera. "
|
||||
"'silhouette' = binary mask of the mesh (white-on-black, "
|
||||
"background ignored). 'openpose' = flat 2D skeleton "
|
||||
"from pred_keypoints_2d (DWPose look). 'scail' = SCAIL "
|
||||
"3D capsules via torch SDF ray-march (proper occlusion / depth)."
|
||||
"background ignored). 'openpose_2d' = flat 2D skeleton "
|
||||
"from pred_keypoints_2d (DWPose look, ControlNet-ready). "
|
||||
"'openpose_3d' = same skeleton as flat-shaded 3D capsules "
|
||||
"(camera-aware, proper depth). 'scail' = SCAIL 3D capsules "
|
||||
"via torch SDF ray-march (proper occlusion / depth)."
|
||||
),
|
||||
),
|
||||
],
|
||||
@ -853,7 +922,7 @@ class SAM3DBody_Render(io.ComfyNode):
|
||||
|
||||
|
||||
@classmethod
|
||||
def execute(cls, mhr_pose_data, background=None, width=0, height=0, render_style=None) -> io.NodeOutput:
|
||||
def execute(cls, mhr_pose_data, background=None, width=0, height=0, camera_info=None, camera_fov=0.0, render_style=None) -> io.NodeOutput:
|
||||
render_style = render_style or {"render_style": "mesh"}
|
||||
mode_key = render_style.get("render_style", "mesh")
|
||||
|
||||
@ -869,10 +938,11 @@ class SAM3DBody_Render(io.ComfyNode):
|
||||
new_H = max(1, round(native_H * new_W / native_W))
|
||||
mhr_pose_data = _scale_pose_data(mhr_pose_data, new_H, new_W)
|
||||
H, W = new_H, new_W
|
||||
# Marker/stick px constants are authored for native resolution —
|
||||
# scale them so the openpose overlay reads at the same relative size.
|
||||
px_scale = min(new_W / native_W, new_H / native_H)
|
||||
|
||||
if camera_info is not None:
|
||||
mhr_pose_data = apply_camera_override(mhr_pose_data, camera_info, H, W, fov_deg=float(camera_fov))
|
||||
|
||||
B = len(mhr_pose_data["frames"])
|
||||
if B == 0:
|
||||
return io.NodeOutput(torch.zeros(1, H, W, 3, dtype=torch.float32))
|
||||
@ -880,6 +950,8 @@ class SAM3DBody_Render(io.ComfyNode):
|
||||
out_device = comfy.model_management.intermediate_device()
|
||||
bg_t = None if background is None else background.to(device=out_device, dtype=torch.float32)
|
||||
|
||||
if bg_t is not None and tuple(bg_t.shape[1:3]) != (H, W): # Match the background to the render resolution
|
||||
bg_t = comfy.utils.common_upscale(bg_t.movedim(-1, 1), W, H, "bilinear", "disabled").movedim(1, -1)
|
||||
|
||||
if mode_key == "silhouette":
|
||||
composite = "silhouette"
|
||||
@ -888,7 +960,7 @@ class SAM3DBody_Render(io.ComfyNode):
|
||||
else:
|
||||
composite = "mesh_only"
|
||||
|
||||
if mode_key == "openpose":
|
||||
if mode_key == "openpose_2d":
|
||||
marker_radius_px = max(1, int(round(render_style.get("marker_radius_px", 4) * px_scale)))
|
||||
stick_width_px = max(1, int(round(render_style.get("stick_width_px", 4) * px_scale)))
|
||||
limb_alpha = float(render_style.get("limb_alpha", 0.6))
|
||||
@ -897,6 +969,10 @@ class SAM3DBody_Render(io.ComfyNode):
|
||||
include_hands = hand_style != "disabled"
|
||||
hand_color_style = hand_style if include_hands else "dwpose"
|
||||
person_palette_falloff = float(render_style.get("person_palette_falloff", 0.6))
|
||||
elif mode_key == "openpose_3d":
|
||||
op3d_radius_m = float(render_style.get("radius_m", 0.015))
|
||||
op3d_include_hands = bool(render_style.get("include_hands", True))
|
||||
person_palette_falloff = float(render_style.get("person_palette_falloff", 0.6))
|
||||
elif mode_key == "scail":
|
||||
cap_radius_m = float(render_style.get("radius_m", 0.030))
|
||||
cap_hand_style = str(render_style.get("hand_style", "disabled"))
|
||||
@ -931,7 +1007,8 @@ class SAM3DBody_Render(io.ComfyNode):
|
||||
frames_out = []
|
||||
pbar = comfy.utils.ProgressBar(B)
|
||||
desc = (
|
||||
"SAM3D openpose-2D render" if mode_key == "openpose"
|
||||
"SAM3D openpose-2D render" if mode_key == "openpose_2d"
|
||||
else "SAM3D openpose-3D render" if mode_key == "openpose_3d"
|
||||
else "SAM3D SCAIL-3D render" if mode_key == "scail"
|
||||
else "SAM3D silhouette" if mode_key == "silhouette"
|
||||
else "SAM3D render"
|
||||
@ -940,7 +1017,7 @@ class SAM3DBody_Render(io.ComfyNode):
|
||||
bg_f = None
|
||||
if bg_t is not None:
|
||||
bg_f = bg_t[min(f, bg_t.shape[0] - 1)]
|
||||
if mode_key == "openpose":
|
||||
if mode_key == "openpose_2d":
|
||||
img = render_pose_data_openpose(
|
||||
mhr_pose_data, frame_idx=f, W=W, H=H,
|
||||
background=bg_f,
|
||||
@ -953,6 +1030,17 @@ class SAM3DBody_Render(io.ComfyNode):
|
||||
hand_color_style=hand_color_style,
|
||||
person_brightness_falloff=person_palette_falloff,
|
||||
)
|
||||
elif mode_key == "openpose_3d":
|
||||
img = render_pose_data_capsules(
|
||||
mhr_pose_data, frame_idx=f, W=W, H=H,
|
||||
background=bg_f,
|
||||
composite=composite,
|
||||
radius_m=op3d_radius_m,
|
||||
include_hands=op3d_include_hands,
|
||||
palette="openpose",
|
||||
flat_shade=True,
|
||||
person_brightness_falloff=person_palette_falloff,
|
||||
)
|
||||
elif mode_key == "scail":
|
||||
# SCAIL renders body as 3D capsules + 2D openpose hands on top
|
||||
img = render_pose_data_capsules(
|
||||
|
||||
@ -449,7 +449,7 @@ class BuildPoseGLB(IO.ComfyNode):
|
||||
IO.DynamicCombo.Option("octahedrons", [
|
||||
IO.Float.Input(
|
||||
"bone_vis_radius_m",
|
||||
default=0.02, min=0.005, max=0.5, step=0.005,
|
||||
default=0.02, min=0.005, max=0.5, step=0.005, advanced=True,
|
||||
tooltip="Radius in m (sphere radius / octahedron half-width).",
|
||||
),
|
||||
IO.Combo.Input(
|
||||
@ -527,7 +527,7 @@ class BuildPoseGLB(IO.ComfyNode):
|
||||
IO.DynamicCombo.Option("octahedrons", [
|
||||
IO.Float.Input(
|
||||
"bone_vis_radius_m",
|
||||
default=0.02, min=0.005, max=0.5, step=0.005,
|
||||
default=0.02, min=0.005, max=0.5, step=0.005, advanced=True,
|
||||
tooltip="Radius in m (sphere radius / octahedron half-width).",
|
||||
),
|
||||
IO.Combo.Input(
|
||||
@ -557,12 +557,20 @@ class BuildPoseGLB(IO.ComfyNode):
|
||||
),
|
||||
]),
|
||||
IO.DynamicCombo.Option("openpose", [
|
||||
IO.Int.Input(
|
||||
"bone_smooth_window",
|
||||
default=0, min=0, max=51, step=2,
|
||||
tooltip=(
|
||||
"Gaussian window on keypoint tracks. 0 = off. "
|
||||
"7-15 calms jitter where upstream Smooth misses spikes."
|
||||
),
|
||||
),
|
||||
IO.Float.Input(
|
||||
"marker_radius_m", default=0.010, min=0.005, max=0.1, step=0.001,
|
||||
"marker_radius_m", default=0.010, min=0.005, max=0.1, step=0.001, advanced=True,
|
||||
tooltip="Sphere radius in m.",
|
||||
),
|
||||
IO.Float.Input(
|
||||
"stick_radius_m", default=0.008, min=0.002, max=0.05, step=0.001,
|
||||
"stick_radius_m", default=0.008, min=0.002, max=0.05, step=0.001, advanced=True,
|
||||
tooltip="Limb half-width in m. Auto-clamped to bone_length x 0.1.",
|
||||
),
|
||||
IO.Boolean.Input(
|
||||
@ -573,31 +581,39 @@ class BuildPoseGLB(IO.ComfyNode):
|
||||
),
|
||||
),
|
||||
IO.Float.Input(
|
||||
"hand_marker_radius_m", default=0.005, min=0.001, max=0.1, step=0.001,
|
||||
"hand_marker_radius_m", default=0.005, min=0.001, max=0.1, step=0.001, advanced=True,
|
||||
tooltip="Hand sphere radius in m.",
|
||||
),
|
||||
IO.Float.Input(
|
||||
"hand_stick_radius_m", default=0.003, min=0.001, max=0.05, step=0.001,
|
||||
"hand_stick_radius_m", default=0.003, min=0.001, max=0.05, step=0.001, advanced=True,
|
||||
tooltip="Hand limb half-width in m.",
|
||||
),
|
||||
IO.Combo.Input(
|
||||
"face_source",
|
||||
options=["off", "rig"],
|
||||
default="off",
|
||||
"face_style",
|
||||
options=["disabled", "full", "eyes_mouth"],
|
||||
default="disabled",
|
||||
tooltip=(
|
||||
"'rig' adds ~30 face-contour landmarks sampled from pred_vertices "
|
||||
"at fixed head-mesh vertex IDs (brow/eyes/nose/mouth/jaw); needs "
|
||||
"canonical_colors on pose_data."
|
||||
"Face-contour landmarks sampled from pred_vertices at fixed "
|
||||
"head-mesh vertex IDs (needs canonical_colors on pose_data). "
|
||||
"'full' = all ~30 points; 'eyes_mouth' = eyes + outer lips only."
|
||||
),
|
||||
),
|
||||
IO.Float.Input(
|
||||
"face_marker_radius_m", default=0.0, min=0.0, max=0.05, step=0.0005,
|
||||
"face_marker_radius_m", default=0.0, min=0.0, max=0.05, step=0.0005, advanced=True,
|
||||
tooltip="Face dot radius. 0 = auto = 0.3 x marker_radius_m.",
|
||||
),
|
||||
]),
|
||||
IO.DynamicCombo.Option("scail", [
|
||||
IO.Int.Input(
|
||||
"bone_smooth_window",
|
||||
default=0, min=0, max=51, step=2,
|
||||
tooltip=(
|
||||
"Gaussian window on keypoint tracks. 0 = off. "
|
||||
"7-15 calms jitter where upstream Smooth misses spikes."
|
||||
),
|
||||
),
|
||||
IO.Float.Input(
|
||||
"stick_radius_m", default=0.022, min=0.002, max=0.1, step=0.001,
|
||||
"stick_radius_m", default=0.022, min=0.002, max=0.1, step=0.001, advanced=True,
|
||||
tooltip=(
|
||||
"Cylinder radius in m. Bones are open cylinders at constant "
|
||||
"radius; joint spheres (auto-sized to match) cap the open ends. "
|
||||
@ -605,11 +621,11 @@ class BuildPoseGLB(IO.ComfyNode):
|
||||
),
|
||||
),
|
||||
IO.Float.Input(
|
||||
"marker_radius_m", default=0.0, min=0.0, max=0.1, step=0.001,
|
||||
"marker_radius_m", default=0.0, min=0.0, max=0.1, step=0.001, advanced=True,
|
||||
tooltip="Joint sphere radius. 0 = auto = stick_radius_m (flush cap).",
|
||||
),
|
||||
IO.Float.Input(
|
||||
"material_roughness", default=0.3, min=0.0, max=1.0, step=0.05,
|
||||
"material_roughness", default=0.3, min=0.0, max=1.0, step=0.05, advanced=True,
|
||||
tooltip="PBR roughness. SCAIL ref = 0.3. 1 = matte; 0 = chrome.",
|
||||
),
|
||||
IO.Boolean.Input(
|
||||
@ -617,13 +633,23 @@ class BuildPoseGLB(IO.ComfyNode):
|
||||
tooltip="Append 21+21 hand keypoints + capsule sticks per track.",
|
||||
),
|
||||
IO.Float.Input(
|
||||
"hand_marker_radius_m", default=0.005, min=0.001, max=0.05, step=0.001,
|
||||
"hand_marker_radius_m", default=0.005, min=0.001, max=0.05, step=0.001, advanced=True,
|
||||
tooltip="Hand sphere radius in m.",
|
||||
),
|
||||
IO.Float.Input(
|
||||
"hand_stick_radius_m", default=0.003, min=0.001, max=0.05, step=0.001,
|
||||
"hand_stick_radius_m", default=0.003, min=0.001, max=0.05, step=0.001, advanced=True,
|
||||
tooltip="Hand cylinder radius in m.",
|
||||
),
|
||||
IO.Combo.Input(
|
||||
"face_style",
|
||||
options=["disabled", "full", "eyes_mouth"],
|
||||
default="disabled",
|
||||
tooltip=(
|
||||
"Face-contour landmarks sampled from pred_vertices (needs "
|
||||
"canonical_colors on pose_data). 'full' = all ~30 points; "
|
||||
"'eyes_mouth' = eyes + outer lips only."
|
||||
),
|
||||
),
|
||||
]),
|
||||
],
|
||||
tooltip=(
|
||||
@ -710,10 +736,11 @@ class BuildPoseGLB(IO.ComfyNode):
|
||||
include_hands=bool(mesh_style.get("include_hands", False)),
|
||||
hand_marker_radius_m=float(mesh_style.get("hand_marker_radius_m", 0.005)),
|
||||
hand_stick_radius_m=float(mesh_style.get("hand_stick_radius_m", 0.003)),
|
||||
face_source=str(mesh_style.get("face_source", "off")),
|
||||
face_style=str(mesh_style.get("face_style", "disabled")),
|
||||
face_marker_radius_m=float(mesh_style.get("face_marker_radius_m", 0.0)),
|
||||
palette="openpose",
|
||||
shape="ellipsoid",
|
||||
bone_smooth_window=int(mesh_style.get("bone_smooth_window", 0)),
|
||||
)
|
||||
elif mode_key == "scail":
|
||||
# SCAIL rig: open cylinders capped flush by joint spheres (sphere
|
||||
@ -732,7 +759,7 @@ class BuildPoseGLB(IO.ComfyNode):
|
||||
include_hands=bool(mesh_style.get("include_hands", False)),
|
||||
hand_marker_radius_m=float(mesh_style.get("hand_marker_radius_m", 0.005)),
|
||||
hand_stick_radius_m=float(mesh_style.get("hand_stick_radius_m", 0.003)),
|
||||
face_source="off",
|
||||
face_style=str(mesh_style.get("face_style", "disabled")),
|
||||
palette="scail",
|
||||
shape="capsule",
|
||||
smooth_shade=True,
|
||||
@ -740,6 +767,7 @@ class BuildPoseGLB(IO.ComfyNode):
|
||||
# inside of the open cylinders shades sensibly at grazing angles.
|
||||
material_roughness=float(mesh_style.get("material_roughness", 0.3)),
|
||||
material_double_sided=True,
|
||||
bone_smooth_window=int(mesh_style.get("bone_smooth_window", 0)),
|
||||
)
|
||||
else:
|
||||
raise ValueError(f"BuildPoseGLB: unknown mesh_style {mode_key!r}")
|
||||
|
||||
@ -41,10 +41,11 @@ def _build_specs_from_pose(
|
||||
include_hands: bool,
|
||||
palette: str,
|
||||
person_brightness_falloff: float = 0.0,
|
||||
) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
|
||||
) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
|
||||
"""Flatten body + optional hand limbs for one frame into
|
||||
(starts, ends, colors_rgba) in camera coords (Y-down, +Z forward).
|
||||
Drops endpoints that are non-finite or behind the camera.
|
||||
(starts, ends, colors_rgba, is_hand) in camera coords (Y-down, +Z forward).
|
||||
Drops endpoints that are non-finite or behind the camera. `is_hand` flags
|
||||
the hand limbs so the renderer can draw them thinner.
|
||||
|
||||
`person_brightness_falloff` mixes each per-person limb color toward white
|
||||
by `1 - falloff^k` for track index `k` (track 0 stays vivid). Matches the
|
||||
@ -52,6 +53,7 @@ def _build_specs_from_pose(
|
||||
starts: List[np.ndarray] = []
|
||||
ends: List[np.ndarray] = []
|
||||
colors: List[np.ndarray] = []
|
||||
is_hand: List[bool] = []
|
||||
|
||||
body_limb_colors = _limb_palette_rgb01(palette)
|
||||
hand_limb_colors = OPENPOSE_HAND_COLORS_21.astype(np.float32)
|
||||
@ -109,6 +111,7 @@ def _build_specs_from_pose(
|
||||
sb = sa + spine_dir * (sd_len * 0.3)
|
||||
starts.append(sa)
|
||||
ends.append(sb)
|
||||
is_hand.append(False)
|
||||
color_rgb = _tint(body_limb_colors[limb_i])
|
||||
colors.append(np.array([color_rgb[0], color_rgb[1], color_rgb[2], 1.0],
|
||||
dtype=np.float32))
|
||||
@ -125,6 +128,7 @@ def _build_specs_from_pose(
|
||||
continue
|
||||
starts.append(sa)
|
||||
ends.append(sb)
|
||||
is_hand.append(True)
|
||||
color_rgb = _tint(hand_limb_colors[(a + b) % len(hand_limb_colors)])
|
||||
colors.append(np.array([color_rgb[0], color_rgb[1], color_rgb[2], 1.0],
|
||||
dtype=np.float32))
|
||||
@ -132,10 +136,12 @@ def _build_specs_from_pose(
|
||||
if not starts:
|
||||
return (np.zeros((0, 3), dtype=np.float32),
|
||||
np.zeros((0, 3), dtype=np.float32),
|
||||
np.zeros((0, 4), dtype=np.float32))
|
||||
np.zeros((0, 4), dtype=np.float32),
|
||||
np.zeros((0,), dtype=bool))
|
||||
return (np.stack(starts).astype(np.float32),
|
||||
np.stack(ends).astype(np.float32),
|
||||
np.stack(colors).astype(np.float32))
|
||||
np.stack(colors).astype(np.float32),
|
||||
np.asarray(is_hand, dtype=bool))
|
||||
|
||||
|
||||
def _ray_capsule_t(
|
||||
@ -144,14 +150,14 @@ def _ray_capsule_t(
|
||||
ends: torch.Tensor, # (M, 3)
|
||||
ba_norm: torch.Tensor, # (M, 3) unit axis (A → B)
|
||||
ba_len: torch.Tensor, # (M,) segment length
|
||||
radius: float,
|
||||
radius: torch.Tensor, # (M,) per-capsule radius
|
||||
) -> torch.Tensor:
|
||||
"""Closed-form ray-capsule intersection. Returns (K, M) tensor of ray
|
||||
parameters t to the nearest valid hit per capsule, +inf where the ray
|
||||
misses. A capsule is the union of (cylinder body, hemisphere at A,
|
||||
hemisphere at B); each component is a quadratic root-find."""
|
||||
INF = float("inf")
|
||||
r_sq = float(radius) * float(radius)
|
||||
r_sq = radius * radius # (M,)
|
||||
|
||||
# Cached dot products.
|
||||
dn = ray_dirs @ ba_norm.transpose(0, 1) # (K, M) — d·n
|
||||
@ -199,9 +205,10 @@ def _render_capsules_torch(
|
||||
colors: torch.Tensor,
|
||||
H: int, W: int,
|
||||
fx: float, fy: float, cx: float, cy: float,
|
||||
radius: float,
|
||||
radius: torch.Tensor, # scalar or (M,) per-capsule radius
|
||||
background_rgb: Optional[torch.Tensor],
|
||||
device: torch.device,
|
||||
flat_shade: bool = False,
|
||||
) -> torch.Tensor:
|
||||
"""Analytic ray-capsule renderer for a union of capsules. Camera at
|
||||
origin looking down +Z; pixels in y-down screen coords."""
|
||||
@ -224,12 +231,16 @@ def _render_capsules_torch(
|
||||
flat_dirs = ray_dirs.view(-1, 3)
|
||||
N = flat_dirs.shape[0]
|
||||
|
||||
radius = torch.as_tensor(radius, device=device, dtype=torch.float32)
|
||||
if radius.ndim == 0:
|
||||
radius = radius.expand(M)
|
||||
|
||||
ba = ends - starts
|
||||
ba_len = torch.linalg.norm(ba, dim=1).clamp(min=1e-6)
|
||||
ba_norm = ba / ba_len.unsqueeze(1)
|
||||
|
||||
z_min = float(min(starts[:, 2].min().item(), ends[:, 2].min().item()))
|
||||
z_near = max(0.05, z_min - radius)
|
||||
z_near = max(0.05, z_min - float(radius.max().item()))
|
||||
|
||||
# Union of per-capsule screen-space bboxes. Pixels outside this mask
|
||||
# provably can't hit any capsule, so the analytic intersection only runs
|
||||
@ -298,6 +309,10 @@ def _render_capsules_torch(
|
||||
normals = normals / normals.norm(dim=-1, keepdim=True).clamp(min=1e-8)
|
||||
|
||||
col = colors[m_h, :3]
|
||||
if flat_shade:
|
||||
# Solid per-limb color (OpenPose look) — no lighting/depth modulation.
|
||||
out[hit_idx] = col
|
||||
return out.view(H, W, 3).clamp(0.0, 1.0)
|
||||
# SCAIL Blinn-Phong (render_torch.py:290-331). Headlight: light = +Z.
|
||||
diff = torch.clamp(-(normals[:, 2]), min=0.0)
|
||||
diffuse = 0.45 + 0.55 * diff
|
||||
@ -336,6 +351,8 @@ def render_pose_data_capsules(
|
||||
include_hands: bool = False,
|
||||
palette: str = "scail",
|
||||
person_brightness_falloff: float = 0.0,
|
||||
flat_shade: bool = False,
|
||||
hand_radius_scale: float = 0.4,
|
||||
device: Optional[torch.device] = None,
|
||||
) -> torch.Tensor:
|
||||
"""Render a frame's pose_data as 3D capsules projected through the per-
|
||||
@ -345,7 +362,8 @@ def render_pose_data_capsules(
|
||||
`composite='mesh_only'` always uses a black canvas.
|
||||
|
||||
`radius_m` is in METERS (matching `pred_keypoints_3d` / `pred_cam_t`).
|
||||
Camera fx/fy come from each person's `focal_length` (pixels); cx/cy = center.
|
||||
Hand limbs use `radius_m * hand_radius_scale` (their bones are far shorter
|
||||
than body limbs). Camera fx/fy come from each person's `focal_length`.
|
||||
"""
|
||||
persons = pose_data["frames"][frame_idx]
|
||||
if device is None:
|
||||
@ -361,7 +379,7 @@ def render_pose_data_capsules(
|
||||
break
|
||||
cx, cy = W * 0.5, H * 0.5
|
||||
|
||||
starts_np, ends_np, colors_np = _build_specs_from_pose(
|
||||
starts_np, ends_np, colors_np, is_hand_np = _build_specs_from_pose(
|
||||
persons, include_hands=include_hands, palette=palette,
|
||||
person_brightness_falloff=person_brightness_falloff,
|
||||
)
|
||||
@ -384,11 +402,14 @@ def render_pose_data_capsules(
|
||||
starts_t = torch.from_numpy(starts_np).to(device=device, dtype=torch.float32)
|
||||
ends_t = torch.from_numpy(ends_np).to(device=device, dtype=torch.float32)
|
||||
colors_t = torch.from_numpy(colors_np).to(device=device, dtype=torch.float32)
|
||||
radii_np = np.where(is_hand_np, radius_m * hand_radius_scale, radius_m).astype(np.float32)
|
||||
radii_t = torch.from_numpy(radii_np).to(device=device, dtype=torch.float32)
|
||||
|
||||
return _render_capsules_torch(
|
||||
starts_t, ends_t, colors_t,
|
||||
H=H, W=W, fx=fx, fy=fy, cx=cx, cy=cy,
|
||||
radius=float(radius_m),
|
||||
radius=radii_t,
|
||||
background_rgb=bg_t,
|
||||
device=device,
|
||||
flat_shade=flat_shade,
|
||||
)
|
||||
|
||||
@ -37,6 +37,7 @@ from .glb_shared import (
|
||||
SCAIL_LIMB_COLORS_17,
|
||||
collect_tracks,
|
||||
flat_shade_mesh,
|
||||
gaussian_smooth_positions,
|
||||
make_lit_material,
|
||||
quat_sign_fix_per_joint,
|
||||
rotation_align,
|
||||
@ -364,11 +365,14 @@ def _build_openpose_spheres(
|
||||
bind_kp_m: np.ndarray, radius_m: float, kp_colors: np.ndarray,
|
||||
base_joint_idx: int = 0,
|
||||
smooth_shade: bool = False,
|
||||
joint_indices: Optional[np.ndarray] = None,
|
||||
) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
|
||||
"""UV sphere per OpenPose keypoint, rigidly skinned to that keypoint's
|
||||
joint, vertex-colored from kp_colors. `base_joint_idx` is added to the
|
||||
emitted JOINTS_0 indices so callers can place this group at any offset
|
||||
in the shared skin (body=0, right hand=18, etc.).
|
||||
in the shared skin (body=0, right hand=18, etc.). `joint_indices` (when
|
||||
given) overrides that with explicit per-sphere joint indices, so callers
|
||||
can skip keypoints (e.g. SCAIL head dots).
|
||||
|
||||
`smooth_shade=True` keeps the indexed mesh and writes per-vertex
|
||||
normals via face-normal averaging — round shading on the spheres.
|
||||
@ -390,7 +394,7 @@ def _build_openpose_spheres(
|
||||
out_v[v_off:v_off + Nv] = sv * radius_m + bind_kp_m[j]
|
||||
out_n[v_off:v_off + Nv] = sv
|
||||
out_f[j * Nf:(j + 1) * Nf] = sf + v_off
|
||||
out_j[v_off:v_off + Nv, 0] = j + base_joint_idx
|
||||
out_j[v_off:v_off + Nv, 0] = int(joint_indices[j]) if joint_indices is not None else j + base_joint_idx
|
||||
out_w[v_off:v_off + Nv, 0] = 1.0
|
||||
out_c[v_off:v_off + Nv] = kp_colors[j]
|
||||
return _finalize_skinned_mesh(out_v, out_f, out_j, out_w, out_c, smooth_shade)
|
||||
@ -579,6 +583,24 @@ def _capsule_mesh_local(
|
||||
return v_arr, np.asarray(faces, dtype=np.uint32), weights
|
||||
|
||||
|
||||
def _scail_redirect_neck_stub(body_kp: np.ndarray) -> np.ndarray:
|
||||
"""Replace the nose keypoint (idx 0) of a (...,18,3) array with a short
|
||||
neck stub (0.6 spine + 0.4 neck→nose), matching the capsule render."""
|
||||
out = body_kp.copy()
|
||||
neck = body_kp[..., 1, :]
|
||||
nose = body_kp[..., 0, :]
|
||||
mid_hip = 0.5 * (body_kp[..., 8, :] + body_kp[..., 11, :])
|
||||
|
||||
def _unit(v):
|
||||
return v / np.linalg.norm(v, axis=-1, keepdims=True).clip(min=1e-6)
|
||||
|
||||
nose_vec = nose - neck
|
||||
nose_len = np.linalg.norm(nose_vec, axis=-1, keepdims=True)
|
||||
mixed = _unit(0.6 * _unit(neck - mid_hip) + 0.4 * _unit(nose_vec))
|
||||
out[..., 0, :] = neck + mixed * (nose_len * 0.5)
|
||||
return out
|
||||
|
||||
|
||||
def _openpose_limb_rest_trs(
|
||||
bind_kp_m: np.ndarray, pairs: Tuple[Tuple[int, int], ...],
|
||||
) -> Tuple[np.ndarray, np.ndarray]:
|
||||
@ -636,6 +658,7 @@ def _build_openpose_sticks(
|
||||
limb_joint_base_idx: int = 0,
|
||||
shape: str = "ellipsoid",
|
||||
smooth_shade: bool = False,
|
||||
end_width_frac: float = 0.3,
|
||||
) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
|
||||
"""Capsule (cylinder + hemispherical caps) per limb pair (a, b).
|
||||
|
||||
@ -682,7 +705,7 @@ def _build_openpose_sticks(
|
||||
half_width_eff = max(MIN_WIDTH, min(length * WIDTH_RATIO, half_width_m))
|
||||
|
||||
v_local, f_local, _weights_unused = _capsule_mesh_local(
|
||||
length, half_width_eff, shape=shape,
|
||||
length, half_width_eff, shape=shape, end_width_frac=end_width_frac,
|
||||
)
|
||||
v_world = v_local @ R.T + head
|
||||
Nv = v_local.shape[0]
|
||||
@ -729,13 +752,15 @@ def build_glb_openpose(
|
||||
hand_marker_radius_m: float = 0.0,
|
||||
hand_stick_radius_m: float = 0.0,
|
||||
hand_color_style: str = "dwpose",
|
||||
face_source: str = "off",
|
||||
face_style: str = "disabled",
|
||||
face_marker_radius_m: float = 0.0,
|
||||
palette: str = "openpose",
|
||||
shape: str = "ellipsoid",
|
||||
smooth_shade: bool = False,
|
||||
material_roughness: float = 0.85,
|
||||
material_double_sided: bool = False,
|
||||
stick_end_width_frac: float = 0.6,
|
||||
bone_smooth_window: int = 0,
|
||||
) -> bytes:
|
||||
"""Build a GLB containing an OpenPose-style 3D skeleton — sphere markers
|
||||
per keypoint plus rainbow-colored sticks between standard limb pairs.
|
||||
@ -757,9 +782,10 @@ def build_glb_openpose(
|
||||
rainbow per-finger sticks (controlnet_aux/dwpose convention);
|
||||
'openpose' = rainbow per-finger dots AND sticks (matches
|
||||
poseParameters.cpp::HAND_COLORS_RENDER).
|
||||
face_source: 'off' (default) | 'rig' — when 'rig', adds ~30 face
|
||||
contour landmarks sampled from `pred_vertices` at vertex IDs
|
||||
picked from `pose_data["canonical_colors"]["positions"]`.
|
||||
face_style: 'disabled' (default) | 'full' | 'eyes_mouth' — face
|
||||
landmarks sampled from `pred_vertices` at vertex IDs picked from
|
||||
`pose_data["canonical_colors"]["positions"]`. 'full' = all ~30
|
||||
contour points; 'eyes_mouth' = the eyes + outer-lip subset.
|
||||
face_marker_radius_m: per-face landmark sphere radius. 0 = auto =
|
||||
0.3 × `marker_radius_m` — face landmarks are densely packed
|
||||
around the eyes/mouth/jaw and need to be much smaller than
|
||||
@ -771,6 +797,12 @@ def build_glb_openpose(
|
||||
SCAIL-Pose style — warm hues right side, cool hues left side,
|
||||
grey neck-to-nose centerline, distinct per-limb colors.
|
||||
"""
|
||||
is_scail = str(palette) == "scail"
|
||||
# SCAIL drops the face bones (13..16) and eye/ear spheres; keeps nose (idx 0,
|
||||
# the neck-stub tip) to cap the open cylinder. Matches the capsule render.
|
||||
body_pairs = OPENPOSE_18_PAIRS[:13] if is_scail else OPENPOSE_18_PAIRS
|
||||
body_sphere_kp = (np.arange(14, dtype=np.int64)
|
||||
if is_scail else np.arange(18, dtype=np.int64))
|
||||
if str(palette) == "scail":
|
||||
body_sphere_colors = SCAIL_KEYPOINT_COLORS_18
|
||||
body_stick_colors = SCAIL_LIMB_COLORS_17
|
||||
@ -805,25 +837,30 @@ def build_glb_openpose(
|
||||
if not tracks:
|
||||
raise ValueError("build_glb_openpose: no valid tracks in pose_data")
|
||||
|
||||
# Eyes (6..13) + outer-lip ring (19..22) from FACE_LANDMARK_TARGETS.
|
||||
_EYES_MOUTH_IDX = np.array([6, 7, 8, 9, 10, 11, 12, 13, 19, 20, 21, 22], dtype=np.int64)
|
||||
face_vert_ids: Optional[np.ndarray] = None
|
||||
if face_source == "rig":
|
||||
face_target_idx = np.arange(len(FACE_LANDMARK_TARGETS), dtype=np.int64)
|
||||
if face_style in ("full", "eyes_mouth"):
|
||||
canonical_colors = pose_data.get("canonical_colors") or {}
|
||||
positions = canonical_colors.get("positions")
|
||||
if positions is None:
|
||||
raise ValueError(
|
||||
"build_glb_openpose: face_source='rig' needs "
|
||||
"build_glb_openpose: face_style needs "
|
||||
"pose_data['canonical_colors']['positions'] (computed at "
|
||||
"model load and attached by Predict). Ensure the SAM3DBody "
|
||||
"Loader+Predict ran upstream of this node."
|
||||
)
|
||||
if face_style == "eyes_mouth":
|
||||
face_target_idx = _EYES_MOUTH_IDX
|
||||
face_vert_ids = select_face_landmark_vert_ids(
|
||||
np.asarray(positions),
|
||||
face_mask=canonical_colors.get("face_mask"),
|
||||
)
|
||||
elif face_source != "off":
|
||||
)[face_target_idx]
|
||||
elif face_style != "disabled":
|
||||
raise ValueError(
|
||||
f"build_glb_openpose: unknown face_source={face_source!r} "
|
||||
"(expected 'off' or 'rig')"
|
||||
f"build_glb_openpose: unknown face_style={face_style!r} "
|
||||
"(expected 'disabled', 'full', or 'eyes_mouth')"
|
||||
)
|
||||
|
||||
K_body = 18
|
||||
@ -833,7 +870,7 @@ def build_glb_openpose(
|
||||
|
||||
# Limb counts: one joint per stick pair. Limb joints carry translation +
|
||||
# rotation so each capsule rotates rigidly with its limb (no LBS thinning).
|
||||
K_body_limbs = len(OPENPOSE_18_PAIRS)
|
||||
K_body_limbs = len(body_pairs)
|
||||
K_hand_limbs = len(OPENPOSE_HAND_PAIRS) if include_hands else 0
|
||||
K_limbs = K_body_limbs + 2 * K_hand_limbs # face has no sticks
|
||||
|
||||
@ -843,14 +880,14 @@ def build_glb_openpose(
|
||||
joint_names.extend([f"openpose_R_{n}" for n in OPENPOSE_HAND21_NAMES])
|
||||
joint_names.extend([f"openpose_L_{n}" for n in OPENPOSE_HAND21_NAMES])
|
||||
if K_face > 0:
|
||||
joint_names.extend([f"openpose_face_{name}"
|
||||
for name, _ in FACE_LANDMARK_TARGETS])
|
||||
joint_names.extend([f"openpose_face_{FACE_LANDMARK_TARGETS[i][0]}"
|
||||
for i in face_target_idx])
|
||||
|
||||
# Limb joint names, stacked body → R-hand → L-hand to match the limb
|
||||
# joint ordering in skin.joints (after the K keypoint joints).
|
||||
limb_names: List[str] = [
|
||||
f"openpose_limb_{OPENPOSE_18_NAMES[a]}_{OPENPOSE_18_NAMES[b]}"
|
||||
for (a, b) in OPENPOSE_18_PAIRS
|
||||
for (a, b) in body_pairs
|
||||
]
|
||||
if include_hands:
|
||||
for side in ("R", "L"):
|
||||
@ -882,6 +919,8 @@ def build_glb_openpose(
|
||||
seq_chunks.append(_extract_face_landmarks_from_verts(
|
||||
pose_data, frame_indices, person_k, face_vert_ids))
|
||||
kp_seq = np.concatenate(seq_chunks, axis=1) # (N, K, 3)
|
||||
if bone_smooth_window and bone_smooth_window > 1:
|
||||
kp_seq = gaussian_smooth_positions(kp_seq, int(bone_smooth_window))
|
||||
|
||||
# Static-bind = rig's REST pose when available (override path); else
|
||||
# fall back to frame 0 of the motion. The rest-pose bind makes the
|
||||
@ -896,6 +935,10 @@ def build_glb_openpose(
|
||||
bind_kp_m = (bind_kp_m_rest if bind_kp_m_rest is not None
|
||||
else kp_seq[0].astype(np.float32))
|
||||
|
||||
if is_scail: # nose → neck stub, matching the capsule render
|
||||
kp_seq[:, :K_body] = _scail_redirect_neck_stub(kp_seq[:, :K_body])
|
||||
bind_kp_m[:K_body] = _scail_redirect_neck_stub(bind_kp_m[:K_body])
|
||||
|
||||
person_root: Dict[str, Any] = {"name": f"track{track_i:02d}", "children": []}
|
||||
nodes.append(person_root)
|
||||
person_root_idx = len(nodes) - 1
|
||||
@ -920,8 +963,8 @@ def build_glb_openpose(
|
||||
limb_rest_axes_list: List[np.ndarray] = []
|
||||
limb_anim_mids_list: List[np.ndarray] = []
|
||||
limb_anim_quats_list: List[np.ndarray] = []
|
||||
rmid_b, raxis_b = _openpose_limb_rest_trs(bind_kp_m[:K_body], OPENPOSE_18_PAIRS)
|
||||
amid_b, aquat_b = _openpose_limb_anim_trs(kp_seq[:, :K_body], OPENPOSE_18_PAIRS, raxis_b)
|
||||
rmid_b, raxis_b = _openpose_limb_rest_trs(bind_kp_m[:K_body], body_pairs)
|
||||
amid_b, aquat_b = _openpose_limb_anim_trs(kp_seq[:, :K_body], body_pairs, raxis_b)
|
||||
limb_rest_mids_list.append(rmid_b)
|
||||
limb_rest_axes_list.append(raxis_b)
|
||||
limb_anim_mids_list.append(amid_b)
|
||||
@ -979,15 +1022,17 @@ def build_glb_openpose(
|
||||
group_meshes: List[Tuple[np.ndarray, np.ndarray, np.ndarray,
|
||||
np.ndarray, np.ndarray, np.ndarray]] = []
|
||||
sp = _build_openpose_spheres(
|
||||
bind_kp_m[:K_body], float(marker_radius_m),
|
||||
body_sphere_colors, base_joint_idx=0,
|
||||
bind_kp_m[body_sphere_kp], float(marker_radius_m),
|
||||
body_sphere_colors[body_sphere_kp], base_joint_idx=0,
|
||||
smooth_shade=smooth_shade,
|
||||
joint_indices=body_sphere_kp,
|
||||
)
|
||||
st = _build_openpose_sticks(
|
||||
bind_kp_m[:K_body], OPENPOSE_18_PAIRS, float(stick_radius_m),
|
||||
bind_kp_m[:K_body], body_pairs, float(stick_radius_m),
|
||||
body_stick_colors, limb_joint_base_idx=K, # body limbs start at K
|
||||
shape=shape,
|
||||
smooth_shade=smooth_shade,
|
||||
end_width_frac=stick_end_width_frac,
|
||||
)
|
||||
group_meshes.append(sp)
|
||||
group_meshes.append(st)
|
||||
@ -1012,6 +1057,7 @@ def build_glb_openpose(
|
||||
limb_joint_base_idx=K + K_body_limbs + hand_i * K_hand_limbs,
|
||||
shape=shape,
|
||||
smooth_shade=smooth_shade,
|
||||
end_width_frac=stick_end_width_frac,
|
||||
))
|
||||
|
||||
if K_face > 0:
|
||||
|
||||
@ -122,6 +122,30 @@ def gaussian_smooth_quats(q_seq: np.ndarray, window: int) -> np.ndarray:
|
||||
return out.astype(np.float32)
|
||||
|
||||
|
||||
def gaussian_smooth_positions(seq: np.ndarray, window: int) -> np.ndarray:
|
||||
"""Gaussian-smooth a (N, K, 3) position sequence along time (edge-replicate
|
||||
padding). Used to calm jittery keypoint tracks before the openpose rig
|
||||
derives sphere translations + limb TRS from them."""
|
||||
if window <= 1 or seq.shape[0] < 2:
|
||||
return seq
|
||||
s = np.asarray(seq, dtype=np.float64)
|
||||
n = s.shape[0]
|
||||
half = window // 2
|
||||
sigma = max(0.5, window / 4.0)
|
||||
x = np.arange(-half, half + 1, dtype=np.float64)
|
||||
kernel = np.exp(-x * x / (2.0 * sigma * sigma))
|
||||
kernel = kernel / kernel.sum()
|
||||
padded = np.concatenate([
|
||||
np.broadcast_to(s[:1], (half,) + s.shape[1:]),
|
||||
s,
|
||||
np.broadcast_to(s[-1:], (half,) + s.shape[1:]),
|
||||
], axis=0)
|
||||
out = np.zeros_like(s)
|
||||
for k, wgt in enumerate(kernel):
|
||||
out += wgt * padded[k:k + n]
|
||||
return out.astype(np.float32)
|
||||
|
||||
|
||||
def quat_sign_fix_per_joint(q_seq: np.ndarray) -> np.ndarray:
|
||||
"""Walk (N, NJ, 4) along time, flip sign whenever consecutive frames sit
|
||||
on opposite hemispheres. Eliminates long-path slerp glitches (mid-anim
|
||||
@ -900,19 +924,23 @@ def rotation_align(from_vec: np.ndarray, to_vec: np.ndarray) -> np.ndarray:
|
||||
|
||||
|
||||
def make_lit_material(
|
||||
roughness: float = 0.85, double_sided: bool = False,
|
||||
roughness: float = 0.85, double_sided: bool = False, opacity: float = 1.0,
|
||||
) -> dict:
|
||||
"""Lit PBR material using vertex COLOR_0 multiplicatively. KHR_materials_unlit
|
||||
is intentionally off so viewer lighting reveals surface form. metallic=0
|
||||
keeps the surface dielectric so vertex colors stay readable. roughness=0.85
|
||||
suits dense rainbow body meshes; 0.3 matches SCAIL-Pose's glossy rig look."""
|
||||
suits dense rainbow body meshes; 0.3 matches SCAIL-Pose's glossy rig look.
|
||||
opacity < 1 switches to alpha-blend (e.g. see-through body mesh over bones)."""
|
||||
a = float(max(0.0, min(1.0, opacity)))
|
||||
mat = {
|
||||
"pbrMetallicRoughness": {
|
||||
"baseColorFactor": [1.0, 1.0, 1.0, 1.0],
|
||||
"baseColorFactor": [1.0, 1.0, 1.0, a],
|
||||
"metallicFactor": 0.0,
|
||||
"roughnessFactor": float(max(0.0, min(1.0, roughness))),
|
||||
},
|
||||
}
|
||||
if a < 1.0:
|
||||
mat["alphaMode"] = "BLEND"
|
||||
if double_sided:
|
||||
mat["doubleSided"] = True
|
||||
return mat
|
||||
|
||||
@ -362,8 +362,10 @@ def build_glb_skeletal(
|
||||
"indices": indices_acc,
|
||||
"mode": 4,
|
||||
}
|
||||
if color_acc is not None:
|
||||
materials.append(make_lit_material())
|
||||
# See-through body when bones are shown, else opaque (only when a
|
||||
# vertex-color shader baked COLOR_0 — otherwise default material).
|
||||
if color_acc is not None or include_bones:
|
||||
materials.append(make_lit_material(opacity=0.35 if include_bones else 1.0))
|
||||
primitive["material"] = len(materials) - 1
|
||||
if expr_morph_accs:
|
||||
primitive["targets"] = [{"POSITION": a} for a in expr_morph_accs]
|
||||
|
||||
@ -105,16 +105,85 @@ def cam_int_from_moge(moge_geometry, height: int, width: int) -> Optional[torch.
|
||||
)
|
||||
|
||||
|
||||
def run_batched_single_chunk(
|
||||
inner: SAM3DBody,
|
||||
frames_rgb: List[torch.Tensor],
|
||||
per_frame_boxes: List[torch.Tensor],
|
||||
per_frame_masks: Optional[List[torch.Tensor]],
|
||||
image_size: Tuple[int, int],
|
||||
inference_type: str,
|
||||
K: int,
|
||||
cam_int: Optional[torch.Tensor] = None,
|
||||
) -> List[List[Dict[str, Any]]]:
|
||||
def apply_camera_override(mhr_pose_data: Dict[str, Any], camera_info: Dict[str, Any],
|
||||
H: int, W: int, fov_deg: float = 0.0) -> Dict[str, Any]:
|
||||
"""Re-project every frame's pose through a Load3D 6DOF camera (position/
|
||||
target/zoom + optional FOV). Returns a new mhr_pose_data; unchanged on
|
||||
empty/invalid input."""
|
||||
first_frame = mhr_pose_data["frames"][0] if mhr_pose_data["frames"] else []
|
||||
if not first_frame:
|
||||
return mhr_pose_data
|
||||
# GLB exports the rig root at origin, so Load3D coords are root-relative
|
||||
roots = [np.asarray(p["pred_cam_t"], dtype=np.float32).reshape(3)
|
||||
for p in first_frame if p.get("pred_cam_t") is not None]
|
||||
if not roots:
|
||||
return mhr_pose_data
|
||||
subj_center = np.mean(np.stack(roots, axis=0), axis=0)
|
||||
|
||||
# Meter-scale, so Three.js coords map 1:1 (Three.js Y-up → flip Y,Z)
|
||||
pos = camera_info.get("position") or {}
|
||||
tgt = camera_info.get("target") or {}
|
||||
pos_v = np.array([float(pos.get("x", 0.0)), -float(pos.get("y", 5.0)), -float(pos.get("z", 0.0))], dtype=np.float32)
|
||||
tgt_v = np.array([float(tgt.get("x", 0.0)), -float(tgt.get("y", 0.0)), -float(tgt.get("z", 0.0))], dtype=np.float32)
|
||||
offset = pos_v - tgt_v
|
||||
if float(np.linalg.norm(offset)) < 1e-6:
|
||||
return mhr_pose_data
|
||||
|
||||
zoom = float(camera_info.get("zoom", 1.0)) or 1.0
|
||||
target = subj_center + tgt_v
|
||||
eye = target + offset / max(0.01, zoom)
|
||||
|
||||
# Look-at basis. z = -offset (already non-zero); x degenerates only when
|
||||
# looking straight along world-up, then fall back to world +X.
|
||||
z_axis = -offset / float(np.linalg.norm(offset))
|
||||
x_axis = np.cross(z_axis, np.array([0.0, -1.0, 0.0], dtype=np.float32))
|
||||
x_norm = float(np.linalg.norm(x_axis))
|
||||
x_axis = x_axis / x_norm if x_norm > 1e-6 else np.array([1.0, 0.0, 0.0], dtype=np.float32)
|
||||
y_axis = np.cross(z_axis, x_axis)
|
||||
R = np.stack([x_axis, y_axis, z_axis], axis=0).astype(np.float32)
|
||||
|
||||
# fov_deg > 0 overrides the lens; 0 keeps the SAM3D predicted focal so only
|
||||
# the viewpoint changes. Three.js fov is vertical → focal from image height.
|
||||
if fov_deg > 0:
|
||||
new_focal = float(H) / (2.0 * float(np.tan(np.deg2rad(fov_deg) / 2.0)))
|
||||
else:
|
||||
f0 = first_frame[0].get("focal_length")
|
||||
new_focal = (float(np.asarray(f0, dtype=np.float32).reshape(-1)[0]) if f0 is not None
|
||||
else float(H) / (2.0 * float(np.tan(np.deg2rad(50.0) / 2.0))))
|
||||
|
||||
center = np.array([W * 0.5, H * 0.5], dtype=np.float32)
|
||||
reproj = {"pred_keypoints_3d": "pred_keypoints_2d", "pred_face_keypoints_3d": "pred_face_keypoints_2d"}
|
||||
new_frames: List[List[Dict[str, Any]]] = []
|
||||
for frame in mhr_pose_data["frames"]:
|
||||
scaled = []
|
||||
for p in frame:
|
||||
p = dict(p)
|
||||
cam_t = p.get("pred_cam_t")
|
||||
if cam_t is None:
|
||||
scaled.append(p)
|
||||
continue
|
||||
cam_t = np.asarray(cam_t, dtype=np.float32).reshape(3)
|
||||
for k in ("pred_keypoints_3d", "pred_vertices", "pred_face_keypoints_3d"):
|
||||
v = p.get(k)
|
||||
if v is None:
|
||||
continue
|
||||
cam = (np.asarray(v, dtype=np.float32) + cam_t - eye) @ R.T
|
||||
p[k] = cam.astype(np.float32)
|
||||
if k in reproj: # re-project the new 3D to 2D image coords
|
||||
z = np.maximum(cam[..., 2:3], 1e-6)
|
||||
p[reproj[k]] = (cam[..., :2] * new_focal / z + center).astype(np.float32)
|
||||
p["pred_cam_t"] = np.zeros(3, dtype=np.float32)
|
||||
p["focal_length"] = np.array(new_focal, dtype=np.float32)
|
||||
scaled.append(p)
|
||||
new_frames.append(scaled)
|
||||
out = dict(mhr_pose_data)
|
||||
out["frames"] = new_frames
|
||||
return out
|
||||
|
||||
|
||||
def run_batched_single_chunk(inner: SAM3DBody, frames_rgb: List[torch.Tensor], per_frame_boxes: List[torch.Tensor],
|
||||
per_frame_masks: Optional[List[torch.Tensor]], image_size: Tuple[int, int], inference_type: str, K: int,
|
||||
cam_int: Optional[torch.Tensor] = None) -> List[List[Dict[str, Any]]]:
|
||||
"""Run a SINGLE chunk of frames through run_inference in one forward."""
|
||||
N = len(frames_rgb)
|
||||
total = N * K
|
||||
|
||||
Loading…
Reference in New Issue
Block a user