Camera info to render nodes, fixes and tweaks

2026-07-14 10:27:17 +08:00 · 2026-05-28 01:47:00 +03:00 · 2026-05-28 01:47:00 +03:00 · 8acb162c31
commit 8acb162c31
parent 828016a274
7 changed files with 373 additions and 91 deletions
--- a/comfy_extras/nodes_sam3d_body.py
+++ b/comfy_extras/nodes_sam3d_body.py
@ -17,6 +17,7 @@ import folder_paths
 from comfy.ldm.sam3d_body.model.model import SAM3DBody
 from comfy.ldm.sam3d_body.model.dinov3 import apply_dinov3_qkv_bias_mask
 from comfy_extras.sam3d_body.utils import (
+        apply_camera_override,
        cam_int_from_fov,
        cam_int_from_moge,
        inputs_from_sam3_track,
@ -99,6 +100,32 @@ class SAM3DBody_Loader(io.ComfyNode):

 # Predict

+def _per_frame_bboxes_from_detections(bboxes, B: int):
+    # BoundingBox payload (RT-DETR etc.): dict | list[dict] | list[list[dict]].
+    if isinstance(bboxes, dict):
+        norm = [[bboxes]]
+    elif not bboxes:
+        return None
+    elif isinstance(bboxes[0], dict):
+        norm = [bboxes]  # flat list → same detections every frame
+    else:
+        norm = list(bboxes)
+    if len(norm) == 1:
+        norm = norm * B
+    norm = (norm + [[]] * B)[:B]
+    out = []
+    for frame in norm:
+        if frame:
+            boxes = torch.tensor(
+                [[d["x"], d["y"], d["x"] + d["width"], d["y"] + d["height"]] for d in frame],
+                dtype=torch.float32,
+            )
+        else:
+            boxes = torch.zeros((0, 4), dtype=torch.float32)
+        out.append(boxes)
+    return out
+
+
 class SAM3DBody_Predict(io.ComfyNode):
    @classmethod
    def define_schema(cls):
@ -113,6 +140,14 @@ class SAM3DBody_Predict(io.ComfyNode):
                    "sam3_track_data", optional=True,
                    tooltip=("Output of SAM3 Video Track, required for multi-person detection"),
                ),
+                io.BoundingBox.Input(
+                    "bboxes", optional=True, force_input=True,
+                    tooltip=(
+                        "Per-frame person boxes (e.g. RT-DETR Detect with class_name='person'). "
+                        "Used when no SAM3 track is wired — gives the top-down model a tight, "
+                        "person-centered crop. Multi-person supported (one box = one person)."
+                    ),
+                ),
                io.Boolean.Input(
                    "run_hand_refinement", default=True,
                    tooltip="Improves hand pose at the cost of extra inference time and memory use"),
@ -146,19 +181,22 @@ class SAM3DBody_Predict(io.ComfyNode):
        )

    @classmethod
-    def execute(cls, sam3d_body_model, image, sam3_track_data=None, run_hand_refinement=True, fov_degrees=0.0, moge_geometry=None, chunk_size=144) -> io.NodeOutput:
+    def execute(cls, sam3d_body_model, image, sam3_track_data=None, bboxes=None, run_hand_refinement=True, fov_degrees=0.0, moge_geometry=None, chunk_size=64) -> io.NodeOutput:
        comfy.model_management.load_model_gpu(sam3d_body_model)
        inner: SAM3DBody = sam3d_body_model.model

        B, H, W, _ = image.shape
        image_size = getattr(inner, "_sam3d_image_size", (512, 512))

+        # Precedence: SAM3 track (masks + boxes) > detector boxes > full-frame fallback.
        per_frame_bboxes, per_frame_masks = (None, None)
        if sam3_track_data is not None:
            per_frame_bboxes, per_frame_masks = inputs_from_sam3_track(sam3_track_data, B, H, W)
+        if per_frame_bboxes is None and bboxes:
+            per_frame_bboxes = _per_frame_bboxes_from_detections(bboxes, B)
+            per_frame_masks = None
        if per_frame_bboxes is None:
-            # No track wired (or empty / frame count mismatch) — single-person
-            # full-frame fallback. Multi-person scenes need SAM3 Video Track.
+            # No track or detector boxes — single-person full-frame fallback.
            full_frame_bbox = torch.tensor([[0.0, 0.0, float(W), float(H)]], dtype=torch.float32)
            per_frame_bboxes = [full_frame_bbox.clone() for _ in range(B)]
            per_frame_masks = None
@ -711,6 +749,26 @@ def _render_capsules_mode_inputs():
    ]


+def _render_openpose3d_mode_inputs():
+    return [
+        io.Float.Input(
+            "radius_m", default=0.015, min=0.004, max=0.1, step=0.001,
+            tooltip="Limb capsule radius in meters (thin = stick-like).",
+        ),
+        io.Boolean.Input(
+            "include_hands", default=True,
+            tooltip="Draw 21+21 hand keypoints as 3D capsules.",
+        ),
+        io.Float.Input(
+            "person_palette_falloff", default=0.6, min=0.1, max=1.0, step=0.05,
+            tooltip=(
+                "Per-person desaturation: track k blends toward white by "
+                "1 - falloff^k. Track 0 stays vivid; 1.0 disables falloff."
+            ),
+        ),
+    ]
+
+
 def _render_openpose_mode_inputs():
    return [
        io.Int.Input(
@ -755,15 +813,8 @@ def _render_openpose_mode_inputs():


 def _scale_pose_data(mhr_pose_data: Dict[str, Any], new_H: int, new_W: int) -> Dict[str, Any]:
-    """Rescale per-person camera intrinsics + 2D coords to a new canvas size.
-    Pose data records focal_length in pixels of the original image; without
-    scaling, the FOV would change and subjects would be cropped/zoomed.
-
-    When the new aspect differs from the original, the body (3D-projected
-    through focal_length on a centered principal point) lands in a
-    letterboxed region of the new canvas. 2D-prestored coords must follow
-    the same uniform scale + center offset so face/hand overlays align with
-    the body — per-axis stretching would split them apart."""
+    # 2D coords must match the body's letterbox transform (uniform scale +
+    # center offset), else face/hand overlays drift off the body.
    old_H, old_W = mhr_pose_data["image_size"]
    if new_H == old_H and new_W == old_W:
        return mhr_pose_data
@ -831,20 +882,38 @@ class SAM3DBody_Render(io.ComfyNode):
                        "other is derived preserving the original aspect."
                    ),
                ),
+                io.Load3DCamera.Input(
+                    "camera_info", optional=True,
+                    tooltip=(
+                        "Free 6DOF camera override. When wired, the pose is re-projected through this camera "
+                        "(position/target/zoom) instead of the predicted one. "
+                    ),
+                ),
+                io.Float.Input(
+                    "camera_fov", default=0.0, min=0.0, max=170.0, step=0.5, advanced=True,
+                    tooltip=(
+                        "Vertical FOV for the camera_info override. 0 = keep the SAM3D "
+                        "predicted camera's FOV (only the viewpoint changes). Any non-zero "
+                        "value overrides the lens. Ignored when camera_info is unwired."
+                    ),
+                ),
                io.DynamicCombo.Input(
                    "render_style",
                    options=[
                        io.DynamicCombo.Option("mesh", _render_mesh_mode_inputs()),
                        io.DynamicCombo.Option("silhouette", []),
-                        io.DynamicCombo.Option("openpose", _render_openpose_mode_inputs()),
+                        io.DynamicCombo.Option("openpose_2d", _render_openpose_mode_inputs()),
+                        io.DynamicCombo.Option("openpose_3d", _render_openpose3d_mode_inputs()),
                        io.DynamicCombo.Option("scail", _render_capsules_mode_inputs()),
                    ],
                    tooltip=(
                        "'mesh' = 3D MHR mesh rasterized through the camera. "
                        "'silhouette' = binary mask of the mesh (white-on-black, "
-                        "background ignored). 'openpose' = flat 2D skeleton "
-                        "from pred_keypoints_2d (DWPose look). 'scail' = SCAIL "
-                        "3D capsules via torch SDF ray-march (proper occlusion / depth)."
+                        "background ignored). 'openpose_2d' = flat 2D skeleton "
+                        "from pred_keypoints_2d (DWPose look, ControlNet-ready). "
+                        "'openpose_3d' = same skeleton as flat-shaded 3D capsules "
+                        "(camera-aware, proper depth). 'scail' = SCAIL 3D capsules "
+                        "via torch SDF ray-march (proper occlusion / depth)."
                    ),
                ),
            ],
@ -853,7 +922,7 @@ class SAM3DBody_Render(io.ComfyNode):


    @classmethod
-    def execute(cls, mhr_pose_data, background=None, width=0, height=0, render_style=None) -> io.NodeOutput:
+    def execute(cls, mhr_pose_data, background=None, width=0, height=0, camera_info=None, camera_fov=0.0, render_style=None) -> io.NodeOutput:
        render_style = render_style or {"render_style": "mesh"}
        mode_key = render_style.get("render_style", "mesh")

@ -869,10 +938,11 @@ class SAM3DBody_Render(io.ComfyNode):
                new_H = max(1, round(native_H * new_W / native_W))
            mhr_pose_data = _scale_pose_data(mhr_pose_data, new_H, new_W)
            H, W = new_H, new_W
-            # Marker/stick px constants are authored for native resolution —
-            # scale them so the openpose overlay reads at the same relative size.
            px_scale = min(new_W / native_W, new_H / native_H)

+        if camera_info is not None:
+            mhr_pose_data = apply_camera_override(mhr_pose_data, camera_info, H, W, fov_deg=float(camera_fov))
+
        B = len(mhr_pose_data["frames"])
        if B == 0:
            return io.NodeOutput(torch.zeros(1, H, W, 3, dtype=torch.float32))
@ -880,6 +950,8 @@ class SAM3DBody_Render(io.ComfyNode):
        out_device = comfy.model_management.intermediate_device()
        bg_t = None if background is None else background.to(device=out_device, dtype=torch.float32)

+        if bg_t is not None and tuple(bg_t.shape[1:3]) != (H, W): # Match the background to the render resolution
+            bg_t = comfy.utils.common_upscale(bg_t.movedim(-1, 1), W, H, "bilinear", "disabled").movedim(1, -1)

        if mode_key == "silhouette":
            composite = "silhouette"
@ -888,7 +960,7 @@ class SAM3DBody_Render(io.ComfyNode):
        else:
            composite = "mesh_only"

-        if mode_key == "openpose":
+        if mode_key == "openpose_2d":
            marker_radius_px = max(1, int(round(render_style.get("marker_radius_px", 4) * px_scale)))
            stick_width_px = max(1, int(round(render_style.get("stick_width_px", 4) * px_scale)))
            limb_alpha = float(render_style.get("limb_alpha", 0.6))
@ -897,6 +969,10 @@ class SAM3DBody_Render(io.ComfyNode):
            include_hands = hand_style != "disabled"
            hand_color_style = hand_style if include_hands else "dwpose"
            person_palette_falloff = float(render_style.get("person_palette_falloff", 0.6))
+        elif mode_key == "openpose_3d":
+            op3d_radius_m = float(render_style.get("radius_m", 0.015))
+            op3d_include_hands = bool(render_style.get("include_hands", True))
+            person_palette_falloff = float(render_style.get("person_palette_falloff", 0.6))
        elif mode_key == "scail":
            cap_radius_m = float(render_style.get("radius_m", 0.030))
            cap_hand_style = str(render_style.get("hand_style", "disabled"))
@ -931,7 +1007,8 @@ class SAM3DBody_Render(io.ComfyNode):
        frames_out = []
        pbar = comfy.utils.ProgressBar(B)
        desc = (
-            "SAM3D openpose-2D render" if mode_key == "openpose"
+            "SAM3D openpose-2D render" if mode_key == "openpose_2d"
+            else "SAM3D openpose-3D render" if mode_key == "openpose_3d"
            else "SAM3D SCAIL-3D render" if mode_key == "scail"
            else "SAM3D silhouette" if mode_key == "silhouette"
            else "SAM3D render"
@ -940,7 +1017,7 @@ class SAM3DBody_Render(io.ComfyNode):
            bg_f = None
            if bg_t is not None:
                bg_f = bg_t[min(f, bg_t.shape[0] - 1)]
-            if mode_key == "openpose":
+            if mode_key == "openpose_2d":
                img = render_pose_data_openpose(
                    mhr_pose_data, frame_idx=f, W=W, H=H,
                    background=bg_f,
@ -953,6 +1030,17 @@ class SAM3DBody_Render(io.ComfyNode):
                    hand_color_style=hand_color_style,
                    person_brightness_falloff=person_palette_falloff,
                )
+            elif mode_key == "openpose_3d":
+                img = render_pose_data_capsules(
+                    mhr_pose_data, frame_idx=f, W=W, H=H,
+                    background=bg_f,
+                    composite=composite,
+                    radius_m=op3d_radius_m,
+                    include_hands=op3d_include_hands,
+                    palette="openpose",
+                    flat_shade=True,
+                    person_brightness_falloff=person_palette_falloff,
+                )
            elif mode_key == "scail":
                # SCAIL renders body as 3D capsules + 2D openpose hands on top
                img = render_pose_data_capsules(
--- a/comfy_extras/nodes_save_3d.py
+++ b/comfy_extras/nodes_save_3d.py
@ -449,7 +449,7 @@ class BuildPoseGLB(IO.ComfyNode):
                                    IO.DynamicCombo.Option("octahedrons", [
                                        IO.Float.Input(
                                            "bone_vis_radius_m",
-                                            default=0.02, min=0.005, max=0.5, step=0.005,
+                                            default=0.02, min=0.005, max=0.5, step=0.005, advanced=True,
                                            tooltip="Radius in m (sphere radius / octahedron half-width).",
                                        ),
                                        IO.Combo.Input(
@ -527,7 +527,7 @@ class BuildPoseGLB(IO.ComfyNode):
                                    IO.DynamicCombo.Option("octahedrons", [
                                        IO.Float.Input(
                                            "bone_vis_radius_m",
-                                            default=0.02, min=0.005, max=0.5, step=0.005,
+                                            default=0.02, min=0.005, max=0.5, step=0.005, advanced=True,
                                            tooltip="Radius in m (sphere radius / octahedron half-width).",
                                        ),
                                        IO.Combo.Input(
@ -557,12 +557,20 @@ class BuildPoseGLB(IO.ComfyNode):
                            ),
                        ]),
                        IO.DynamicCombo.Option("openpose", [
+                            IO.Int.Input(
+                                "bone_smooth_window",
+                                default=0, min=0, max=51, step=2,
+                                tooltip=(
+                                    "Gaussian window on keypoint tracks. 0 = off. "
+                                    "7-15 calms jitter where upstream Smooth misses spikes."
+                                ),
+                            ),
                            IO.Float.Input(
-                                "marker_radius_m", default=0.010, min=0.005, max=0.1, step=0.001,
+                                "marker_radius_m", default=0.010, min=0.005, max=0.1, step=0.001, advanced=True,
                                tooltip="Sphere radius in m.",
                            ),
                            IO.Float.Input(
-                                "stick_radius_m", default=0.008, min=0.002, max=0.05, step=0.001,
+                                "stick_radius_m", default=0.008, min=0.002, max=0.05, step=0.001, advanced=True,
                                tooltip="Limb half-width in m. Auto-clamped to bone_length x 0.1.",
                            ),
                            IO.Boolean.Input(
@ -573,31 +581,39 @@ class BuildPoseGLB(IO.ComfyNode):
                                ),
                            ),
                            IO.Float.Input(
-                                "hand_marker_radius_m", default=0.005, min=0.001, max=0.1, step=0.001,
+                                "hand_marker_radius_m", default=0.005, min=0.001, max=0.1, step=0.001, advanced=True,
                                tooltip="Hand sphere radius in m.",
                            ),
                            IO.Float.Input(
-                                "hand_stick_radius_m", default=0.003, min=0.001, max=0.05, step=0.001,
+                                "hand_stick_radius_m", default=0.003, min=0.001, max=0.05, step=0.001, advanced=True,
                                tooltip="Hand limb half-width in m.",
                            ),
                            IO.Combo.Input(
-                                "face_source",
-                                options=["off", "rig"],
-                                default="off",
+                                "face_style",
+                                options=["disabled", "full", "eyes_mouth"],
+                                default="disabled",
                                tooltip=(
-                                    "'rig' adds ~30 face-contour landmarks sampled from pred_vertices "
-                                    "at fixed head-mesh vertex IDs (brow/eyes/nose/mouth/jaw); needs "
-                                    "canonical_colors on pose_data."
+                                    "Face-contour landmarks sampled from pred_vertices at fixed "
+                                    "head-mesh vertex IDs (needs canonical_colors on pose_data). "
+                                    "'full' = all ~30 points; 'eyes_mouth' = eyes + outer lips only."
                                ),
                            ),
                            IO.Float.Input(
-                                "face_marker_radius_m", default=0.0, min=0.0, max=0.05, step=0.0005,
+                                "face_marker_radius_m", default=0.0, min=0.0, max=0.05, step=0.0005, advanced=True,
                                tooltip="Face dot radius. 0 = auto = 0.3 x marker_radius_m.",
                            ),
                        ]),
                        IO.DynamicCombo.Option("scail", [
+                            IO.Int.Input(
+                                "bone_smooth_window",
+                                default=0, min=0, max=51, step=2,
+                                tooltip=(
+                                    "Gaussian window on keypoint tracks. 0 = off. "
+                                    "7-15 calms jitter where upstream Smooth misses spikes."
+                                ),
+                            ),
                            IO.Float.Input(
-                                "stick_radius_m", default=0.022, min=0.002, max=0.1, step=0.001,
+                                "stick_radius_m", default=0.022, min=0.002, max=0.1, step=0.001, advanced=True,
                                tooltip=(
                                    "Cylinder radius in m. Bones are open cylinders at constant "
                                    "radius; joint spheres (auto-sized to match) cap the open ends. "
@ -605,11 +621,11 @@ class BuildPoseGLB(IO.ComfyNode):
                                ),
                            ),
                            IO.Float.Input(
-                                "marker_radius_m", default=0.0, min=0.0, max=0.1, step=0.001,
+                                "marker_radius_m", default=0.0, min=0.0, max=0.1, step=0.001, advanced=True,
                                tooltip="Joint sphere radius. 0 = auto = stick_radius_m (flush cap).",
                            ),
                            IO.Float.Input(
-                                "material_roughness", default=0.3, min=0.0, max=1.0, step=0.05,
+                                "material_roughness", default=0.3, min=0.0, max=1.0, step=0.05, advanced=True,
                                tooltip="PBR roughness. SCAIL ref = 0.3. 1 = matte; 0 = chrome.",
                            ),
                            IO.Boolean.Input(
@ -617,13 +633,23 @@ class BuildPoseGLB(IO.ComfyNode):
                                tooltip="Append 21+21 hand keypoints + capsule sticks per track.",
                            ),
                            IO.Float.Input(
-                                "hand_marker_radius_m", default=0.005, min=0.001, max=0.05, step=0.001,
+                                "hand_marker_radius_m", default=0.005, min=0.001, max=0.05, step=0.001, advanced=True,
                                tooltip="Hand sphere radius in m.",
                            ),
                            IO.Float.Input(
-                                "hand_stick_radius_m", default=0.003, min=0.001, max=0.05, step=0.001,
+                                "hand_stick_radius_m", default=0.003, min=0.001, max=0.05, step=0.001, advanced=True,
                                tooltip="Hand cylinder radius in m.",
                            ),
+                            IO.Combo.Input(
+                                "face_style",
+                                options=["disabled", "full", "eyes_mouth"],
+                                default="disabled",
+                                tooltip=(
+                                    "Face-contour landmarks sampled from pred_vertices (needs "
+                                    "canonical_colors on pose_data). 'full' = all ~30 points; "
+                                    "'eyes_mouth' = eyes + outer lips only."
+                                ),
+                            ),
                        ]),
                    ],
                    tooltip=(
@ -710,10 +736,11 @@ class BuildPoseGLB(IO.ComfyNode):
                include_hands=bool(mesh_style.get("include_hands", False)),
                hand_marker_radius_m=float(mesh_style.get("hand_marker_radius_m", 0.005)),
                hand_stick_radius_m=float(mesh_style.get("hand_stick_radius_m", 0.003)),
-                face_source=str(mesh_style.get("face_source", "off")),
+                face_style=str(mesh_style.get("face_style", "disabled")),
                face_marker_radius_m=float(mesh_style.get("face_marker_radius_m", 0.0)),
                palette="openpose",
                shape="ellipsoid",
+                bone_smooth_window=int(mesh_style.get("bone_smooth_window", 0)),
            )
        elif mode_key == "scail":
            # SCAIL rig: open cylinders capped flush by joint spheres (sphere
@ -732,7 +759,7 @@ class BuildPoseGLB(IO.ComfyNode):
                include_hands=bool(mesh_style.get("include_hands", False)),
                hand_marker_radius_m=float(mesh_style.get("hand_marker_radius_m", 0.005)),
                hand_stick_radius_m=float(mesh_style.get("hand_stick_radius_m", 0.003)),
-                face_source="off",
+                face_style=str(mesh_style.get("face_style", "disabled")),
                palette="scail",
                shape="capsule",
                smooth_shade=True,
@ -740,6 +767,7 @@ class BuildPoseGLB(IO.ComfyNode):
                # inside of the open cylinders shades sensibly at grazing angles.
                material_roughness=float(mesh_style.get("material_roughness", 0.3)),
                material_double_sided=True,
+                bone_smooth_window=int(mesh_style.get("bone_smooth_window", 0)),
            )
        else:
            raise ValueError(f"BuildPoseGLB: unknown mesh_style {mode_key!r}")
--- a/comfy_extras/sam3d_body/export/capsules.py
+++ b/comfy_extras/sam3d_body/export/capsules.py
@ -41,10 +41,11 @@ def _build_specs_from_pose(
    include_hands: bool,
    palette: str,
    person_brightness_falloff: float = 0.0,
-) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
+) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
    """Flatten body + optional hand limbs for one frame into
-    (starts, ends, colors_rgba) in camera coords (Y-down, +Z forward).
-    Drops endpoints that are non-finite or behind the camera.
+    (starts, ends, colors_rgba, is_hand) in camera coords (Y-down, +Z forward).
+    Drops endpoints that are non-finite or behind the camera. `is_hand` flags
+    the hand limbs so the renderer can draw them thinner.

    `person_brightness_falloff` mixes each per-person limb color toward white
    by `1 - falloff^k` for track index `k` (track 0 stays vivid). Matches the
@ -52,6 +53,7 @@ def _build_specs_from_pose(
    starts: List[np.ndarray] = []
    ends: List[np.ndarray] = []
    colors: List[np.ndarray] = []
+    is_hand: List[bool] = []

    body_limb_colors = _limb_palette_rgb01(palette)
    hand_limb_colors = OPENPOSE_HAND_COLORS_21.astype(np.float32)
@ -109,6 +111,7 @@ def _build_specs_from_pose(
                    sb = sa + spine_dir * (sd_len * 0.3)
            starts.append(sa)
            ends.append(sb)
+            is_hand.append(False)
            color_rgb = _tint(body_limb_colors[limb_i])
            colors.append(np.array([color_rgb[0], color_rgb[1], color_rgb[2], 1.0],
                                   dtype=np.float32))
@ -125,6 +128,7 @@ def _build_specs_from_pose(
                        continue
                    starts.append(sa)
                    ends.append(sb)
+                    is_hand.append(True)
                    color_rgb = _tint(hand_limb_colors[(a + b) % len(hand_limb_colors)])
                    colors.append(np.array([color_rgb[0], color_rgb[1], color_rgb[2], 1.0],
                                           dtype=np.float32))
@ -132,10 +136,12 @@ def _build_specs_from_pose(
    if not starts:
        return (np.zeros((0, 3), dtype=np.float32),
                np.zeros((0, 3), dtype=np.float32),
-                np.zeros((0, 4), dtype=np.float32))
+                np.zeros((0, 4), dtype=np.float32),
+                np.zeros((0,), dtype=bool))
    return (np.stack(starts).astype(np.float32),
            np.stack(ends).astype(np.float32),
-            np.stack(colors).astype(np.float32))
+            np.stack(colors).astype(np.float32),
+            np.asarray(is_hand, dtype=bool))


 def _ray_capsule_t(
@ -144,14 +150,14 @@ def _ray_capsule_t(
    ends: torch.Tensor,        # (M, 3)
    ba_norm: torch.Tensor,     # (M, 3) unit axis (A → B)
    ba_len: torch.Tensor,      # (M,) segment length
-    radius: float,
+    radius: torch.Tensor,      # (M,) per-capsule radius
 ) -> torch.Tensor:
    """Closed-form ray-capsule intersection. Returns (K, M) tensor of ray
    parameters t to the nearest valid hit per capsule, +inf where the ray
    misses. A capsule is the union of (cylinder body, hemisphere at A,
    hemisphere at B); each component is a quadratic root-find."""
    INF = float("inf")
-    r_sq = float(radius) * float(radius)
+    r_sq = radius * radius                      # (M,)

    # Cached dot products.
    dn = ray_dirs @ ba_norm.transpose(0, 1)     # (K, M) — d·n
@ -199,9 +205,10 @@ def _render_capsules_torch(
    colors: torch.Tensor,
    H: int, W: int,
    fx: float, fy: float, cx: float, cy: float,
-    radius: float,
+    radius: torch.Tensor,     # scalar or (M,) per-capsule radius
    background_rgb: Optional[torch.Tensor],
    device: torch.device,
+    flat_shade: bool = False,
 ) -> torch.Tensor:
    """Analytic ray-capsule renderer for a union of capsules. Camera at
    origin looking down +Z; pixels in y-down screen coords."""
@ -224,12 +231,16 @@ def _render_capsules_torch(
    flat_dirs = ray_dirs.view(-1, 3)
    N = flat_dirs.shape[0]

+    radius = torch.as_tensor(radius, device=device, dtype=torch.float32)
+    if radius.ndim == 0:
+        radius = radius.expand(M)
+
    ba = ends - starts
    ba_len = torch.linalg.norm(ba, dim=1).clamp(min=1e-6)
    ba_norm = ba / ba_len.unsqueeze(1)

    z_min = float(min(starts[:, 2].min().item(), ends[:, 2].min().item()))
-    z_near = max(0.05, z_min - radius)
+    z_near = max(0.05, z_min - float(radius.max().item()))

    # Union of per-capsule screen-space bboxes. Pixels outside this mask
    # provably can't hit any capsule, so the analytic intersection only runs
@ -298,6 +309,10 @@ def _render_capsules_torch(
        normals = normals / normals.norm(dim=-1, keepdim=True).clamp(min=1e-8)

        col = colors[m_h, :3]
+        if flat_shade:
+            # Solid per-limb color (OpenPose look) — no lighting/depth modulation.
+            out[hit_idx] = col
+            return out.view(H, W, 3).clamp(0.0, 1.0)
        # SCAIL Blinn-Phong (render_torch.py:290-331). Headlight: light = +Z.
        diff = torch.clamp(-(normals[:, 2]), min=0.0)
        diffuse = 0.45 + 0.55 * diff
@ -336,6 +351,8 @@ def render_pose_data_capsules(
    include_hands: bool = False,
    palette: str = "scail",
    person_brightness_falloff: float = 0.0,
+    flat_shade: bool = False,
+    hand_radius_scale: float = 0.4,
    device: Optional[torch.device] = None,
 ) -> torch.Tensor:
    """Render a frame's pose_data as 3D capsules projected through the per-
@ -345,7 +362,8 @@ def render_pose_data_capsules(
    `composite='mesh_only'` always uses a black canvas.

    `radius_m` is in METERS (matching `pred_keypoints_3d` / `pred_cam_t`).
-    Camera fx/fy come from each person's `focal_length` (pixels); cx/cy = center.
+    Hand limbs use `radius_m * hand_radius_scale` (their bones are far shorter
+    than body limbs). Camera fx/fy come from each person's `focal_length`.
    """
    persons = pose_data["frames"][frame_idx]
    if device is None:
@ -361,7 +379,7 @@ def render_pose_data_capsules(
        break
    cx, cy = W * 0.5, H * 0.5

-    starts_np, ends_np, colors_np = _build_specs_from_pose(
+    starts_np, ends_np, colors_np, is_hand_np = _build_specs_from_pose(
        persons, include_hands=include_hands, palette=palette,
        person_brightness_falloff=person_brightness_falloff,
    )
@ -384,11 +402,14 @@ def render_pose_data_capsules(
    starts_t = torch.from_numpy(starts_np).to(device=device, dtype=torch.float32)
    ends_t = torch.from_numpy(ends_np).to(device=device, dtype=torch.float32)
    colors_t = torch.from_numpy(colors_np).to(device=device, dtype=torch.float32)
+    radii_np = np.where(is_hand_np, radius_m * hand_radius_scale, radius_m).astype(np.float32)
+    radii_t = torch.from_numpy(radii_np).to(device=device, dtype=torch.float32)

    return _render_capsules_torch(
        starts_t, ends_t, colors_t,
        H=H, W=W, fx=fx, fy=fy, cx=cx, cy=cy,
-        radius=float(radius_m),
+        radius=radii_t,
        background_rgb=bg_t,
        device=device,
+        flat_shade=flat_shade,
    )
--- a/comfy_extras/sam3d_body/export/glb_openpose.py
+++ b/comfy_extras/sam3d_body/export/glb_openpose.py
@ -37,6 +37,7 @@ from .glb_shared import (
    SCAIL_LIMB_COLORS_17,
    collect_tracks,
    flat_shade_mesh,
+    gaussian_smooth_positions,
    make_lit_material,
    quat_sign_fix_per_joint,
    rotation_align,
@ -364,11 +365,14 @@ def _build_openpose_spheres(
    bind_kp_m: np.ndarray, radius_m: float, kp_colors: np.ndarray,
    base_joint_idx: int = 0,
    smooth_shade: bool = False,
+    joint_indices: Optional[np.ndarray] = None,
 ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
    """UV sphere per OpenPose keypoint, rigidly skinned to that keypoint's
    joint, vertex-colored from kp_colors. `base_joint_idx` is added to the
    emitted JOINTS_0 indices so callers can place this group at any offset
-    in the shared skin (body=0, right hand=18, etc.).
+    in the shared skin (body=0, right hand=18, etc.). `joint_indices` (when
+    given) overrides that with explicit per-sphere joint indices, so callers
+    can skip keypoints (e.g. SCAIL head dots).

    `smooth_shade=True` keeps the indexed mesh and writes per-vertex
    normals via face-normal averaging — round shading on the spheres.
@ -390,7 +394,7 @@ def _build_openpose_spheres(
        out_v[v_off:v_off + Nv] = sv * radius_m + bind_kp_m[j]
        out_n[v_off:v_off + Nv] = sv
        out_f[j * Nf:(j + 1) * Nf] = sf + v_off
-        out_j[v_off:v_off + Nv, 0] = j + base_joint_idx
+        out_j[v_off:v_off + Nv, 0] = int(joint_indices[j]) if joint_indices is not None else j + base_joint_idx
        out_w[v_off:v_off + Nv, 0] = 1.0
        out_c[v_off:v_off + Nv] = kp_colors[j]
    return _finalize_skinned_mesh(out_v, out_f, out_j, out_w, out_c, smooth_shade)
@ -579,6 +583,24 @@ def _capsule_mesh_local(
    return v_arr, np.asarray(faces, dtype=np.uint32), weights


+def _scail_redirect_neck_stub(body_kp: np.ndarray) -> np.ndarray:
+    """Replace the nose keypoint (idx 0) of a (...,18,3) array with a short
+    neck stub (0.6 spine + 0.4 neck→nose), matching the capsule render."""
+    out = body_kp.copy()
+    neck = body_kp[..., 1, :]
+    nose = body_kp[..., 0, :]
+    mid_hip = 0.5 * (body_kp[..., 8, :] + body_kp[..., 11, :])
+
+    def _unit(v):
+        return v / np.linalg.norm(v, axis=-1, keepdims=True).clip(min=1e-6)
+
+    nose_vec = nose - neck
+    nose_len = np.linalg.norm(nose_vec, axis=-1, keepdims=True)
+    mixed = _unit(0.6 * _unit(neck - mid_hip) + 0.4 * _unit(nose_vec))
+    out[..., 0, :] = neck + mixed * (nose_len * 0.5)
+    return out
+
+
 def _openpose_limb_rest_trs(
    bind_kp_m: np.ndarray, pairs: Tuple[Tuple[int, int], ...],
 ) -> Tuple[np.ndarray, np.ndarray]:
@ -636,6 +658,7 @@ def _build_openpose_sticks(
    limb_joint_base_idx: int = 0,
    shape: str = "ellipsoid",
    smooth_shade: bool = False,
+    end_width_frac: float = 0.3,
 ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
    """Capsule (cylinder + hemispherical caps) per limb pair (a, b).

@ -682,7 +705,7 @@ def _build_openpose_sticks(
            half_width_eff = max(MIN_WIDTH, min(length * WIDTH_RATIO, half_width_m))

        v_local, f_local, _weights_unused = _capsule_mesh_local(
-            length, half_width_eff, shape=shape,
+            length, half_width_eff, shape=shape, end_width_frac=end_width_frac,
        )
        v_world = v_local @ R.T + head
        Nv = v_local.shape[0]
@ -729,13 +752,15 @@ def build_glb_openpose(
    hand_marker_radius_m: float = 0.0,
    hand_stick_radius_m: float = 0.0,
    hand_color_style: str = "dwpose",
-    face_source: str = "off",
+    face_style: str = "disabled",
    face_marker_radius_m: float = 0.0,
    palette: str = "openpose",
    shape: str = "ellipsoid",
    smooth_shade: bool = False,
    material_roughness: float = 0.85,
    material_double_sided: bool = False,
+    stick_end_width_frac: float = 0.6,
+    bone_smooth_window: int = 0,
 ) -> bytes:
    """Build a GLB containing an OpenPose-style 3D skeleton — sphere markers
    per keypoint plus rainbow-colored sticks between standard limb pairs.
@ -757,9 +782,10 @@ def build_glb_openpose(
            rainbow per-finger sticks (controlnet_aux/dwpose convention);
            'openpose' = rainbow per-finger dots AND sticks (matches
            poseParameters.cpp::HAND_COLORS_RENDER).
-        face_source: 'off' (default) | 'rig' — when 'rig', adds ~30 face
-            contour landmarks sampled from `pred_vertices` at vertex IDs
-            picked from `pose_data["canonical_colors"]["positions"]`.
+        face_style: 'disabled' (default) | 'full' | 'eyes_mouth' — face
+            landmarks sampled from `pred_vertices` at vertex IDs picked from
+            `pose_data["canonical_colors"]["positions"]`. 'full' = all ~30
+            contour points; 'eyes_mouth' = the eyes + outer-lip subset.
        face_marker_radius_m: per-face landmark sphere radius. 0 = auto =
            0.3 × `marker_radius_m` — face landmarks are densely packed
            around the eyes/mouth/jaw and need to be much smaller than
@ -771,6 +797,12 @@ def build_glb_openpose(
            SCAIL-Pose style — warm hues right side, cool hues left side,
            grey neck-to-nose centerline, distinct per-limb colors.
    """
+    is_scail = str(palette) == "scail"
+    # SCAIL drops the face bones (13..16) and eye/ear spheres; keeps nose (idx 0,
+    # the neck-stub tip) to cap the open cylinder. Matches the capsule render.
+    body_pairs = OPENPOSE_18_PAIRS[:13] if is_scail else OPENPOSE_18_PAIRS
+    body_sphere_kp = (np.arange(14, dtype=np.int64)
+                      if is_scail else np.arange(18, dtype=np.int64))
    if str(palette) == "scail":
        body_sphere_colors = SCAIL_KEYPOINT_COLORS_18
        body_stick_colors = SCAIL_LIMB_COLORS_17
@ -805,25 +837,30 @@ def build_glb_openpose(
    if not tracks:
        raise ValueError("build_glb_openpose: no valid tracks in pose_data")

+    # Eyes (6..13) + outer-lip ring (19..22) from FACE_LANDMARK_TARGETS.
+    _EYES_MOUTH_IDX = np.array([6, 7, 8, 9, 10, 11, 12, 13, 19, 20, 21, 22], dtype=np.int64)
    face_vert_ids: Optional[np.ndarray] = None
-    if face_source == "rig":
+    face_target_idx = np.arange(len(FACE_LANDMARK_TARGETS), dtype=np.int64)
+    if face_style in ("full", "eyes_mouth"):
        canonical_colors = pose_data.get("canonical_colors") or {}
        positions = canonical_colors.get("positions")
        if positions is None:
            raise ValueError(
-                "build_glb_openpose: face_source='rig' needs "
+                "build_glb_openpose: face_style needs "
                "pose_data['canonical_colors']['positions'] (computed at "
                "model load and attached by Predict). Ensure the SAM3DBody "
                "Loader+Predict ran upstream of this node."
            )
+        if face_style == "eyes_mouth":
+            face_target_idx = _EYES_MOUTH_IDX
        face_vert_ids = select_face_landmark_vert_ids(
            np.asarray(positions),
            face_mask=canonical_colors.get("face_mask"),
-        )
-    elif face_source != "off":
+        )[face_target_idx]
+    elif face_style != "disabled":
        raise ValueError(
-            f"build_glb_openpose: unknown face_source={face_source!r} "
-            "(expected 'off' or 'rig')"
+            f"build_glb_openpose: unknown face_style={face_style!r} "
+            "(expected 'disabled', 'full', or 'eyes_mouth')"
        )

    K_body = 18
@ -833,7 +870,7 @@ def build_glb_openpose(

    # Limb counts: one joint per stick pair. Limb joints carry translation +
    # rotation so each capsule rotates rigidly with its limb (no LBS thinning).
-    K_body_limbs = len(OPENPOSE_18_PAIRS)
+    K_body_limbs = len(body_pairs)
    K_hand_limbs = len(OPENPOSE_HAND_PAIRS) if include_hands else 0
    K_limbs = K_body_limbs + 2 * K_hand_limbs  # face has no sticks

@ -843,14 +880,14 @@ def build_glb_openpose(
        joint_names.extend([f"openpose_R_{n}" for n in OPENPOSE_HAND21_NAMES])
        joint_names.extend([f"openpose_L_{n}" for n in OPENPOSE_HAND21_NAMES])
    if K_face > 0:
-        joint_names.extend([f"openpose_face_{name}"
-                            for name, _ in FACE_LANDMARK_TARGETS])
+        joint_names.extend([f"openpose_face_{FACE_LANDMARK_TARGETS[i][0]}"
+                            for i in face_target_idx])

    # Limb joint names, stacked body → R-hand → L-hand to match the limb
    # joint ordering in skin.joints (after the K keypoint joints).
    limb_names: List[str] = [
        f"openpose_limb_{OPENPOSE_18_NAMES[a]}_{OPENPOSE_18_NAMES[b]}"
-        for (a, b) in OPENPOSE_18_PAIRS
+        for (a, b) in body_pairs
    ]
    if include_hands:
        for side in ("R", "L"):
@ -882,6 +919,8 @@ def build_glb_openpose(
            seq_chunks.append(_extract_face_landmarks_from_verts(
                pose_data, frame_indices, person_k, face_vert_ids))
        kp_seq = np.concatenate(seq_chunks, axis=1)  # (N, K, 3)
+        if bone_smooth_window and bone_smooth_window > 1:
+            kp_seq = gaussian_smooth_positions(kp_seq, int(bone_smooth_window))

        # Static-bind = rig's REST pose when available (override path); else
        # fall back to frame 0 of the motion. The rest-pose bind makes the
@ -896,6 +935,10 @@ def build_glb_openpose(
        bind_kp_m = (bind_kp_m_rest if bind_kp_m_rest is not None
                     else kp_seq[0].astype(np.float32))

+        if is_scail:  # nose → neck stub, matching the capsule render
+            kp_seq[:, :K_body] = _scail_redirect_neck_stub(kp_seq[:, :K_body])
+            bind_kp_m[:K_body] = _scail_redirect_neck_stub(bind_kp_m[:K_body])
+
        person_root: Dict[str, Any] = {"name": f"track{track_i:02d}", "children": []}
        nodes.append(person_root)
        person_root_idx = len(nodes) - 1
@ -920,8 +963,8 @@ def build_glb_openpose(
        limb_rest_axes_list: List[np.ndarray] = []
        limb_anim_mids_list: List[np.ndarray] = []
        limb_anim_quats_list: List[np.ndarray] = []
-        rmid_b, raxis_b = _openpose_limb_rest_trs(bind_kp_m[:K_body], OPENPOSE_18_PAIRS)
-        amid_b, aquat_b = _openpose_limb_anim_trs(kp_seq[:, :K_body], OPENPOSE_18_PAIRS, raxis_b)
+        rmid_b, raxis_b = _openpose_limb_rest_trs(bind_kp_m[:K_body], body_pairs)
+        amid_b, aquat_b = _openpose_limb_anim_trs(kp_seq[:, :K_body], body_pairs, raxis_b)
        limb_rest_mids_list.append(rmid_b)
        limb_rest_axes_list.append(raxis_b)
        limb_anim_mids_list.append(amid_b)
@ -979,15 +1022,17 @@ def build_glb_openpose(
        group_meshes: List[Tuple[np.ndarray, np.ndarray, np.ndarray,
                                 np.ndarray, np.ndarray, np.ndarray]] = []
        sp = _build_openpose_spheres(
-            bind_kp_m[:K_body], float(marker_radius_m),
-            body_sphere_colors, base_joint_idx=0,
+            bind_kp_m[body_sphere_kp], float(marker_radius_m),
+            body_sphere_colors[body_sphere_kp], base_joint_idx=0,
            smooth_shade=smooth_shade,
+            joint_indices=body_sphere_kp,
        )
        st = _build_openpose_sticks(
-            bind_kp_m[:K_body], OPENPOSE_18_PAIRS, float(stick_radius_m),
+            bind_kp_m[:K_body], body_pairs, float(stick_radius_m),
            body_stick_colors, limb_joint_base_idx=K,  # body limbs start at K
            shape=shape,
            smooth_shade=smooth_shade,
+            end_width_frac=stick_end_width_frac,
        )
        group_meshes.append(sp)
        group_meshes.append(st)
@ -1012,6 +1057,7 @@ def build_glb_openpose(
                    limb_joint_base_idx=K + K_body_limbs + hand_i * K_hand_limbs,
                    shape=shape,
                    smooth_shade=smooth_shade,
+                    end_width_frac=stick_end_width_frac,
                ))

        if K_face > 0:
--- a/comfy_extras/sam3d_body/export/glb_shared.py
+++ b/comfy_extras/sam3d_body/export/glb_shared.py
@ -122,6 +122,30 @@ def gaussian_smooth_quats(q_seq: np.ndarray, window: int) -> np.ndarray:
    return out.astype(np.float32)


+def gaussian_smooth_positions(seq: np.ndarray, window: int) -> np.ndarray:
+    """Gaussian-smooth a (N, K, 3) position sequence along time (edge-replicate
+    padding). Used to calm jittery keypoint tracks before the openpose rig
+    derives sphere translations + limb TRS from them."""
+    if window <= 1 or seq.shape[0] < 2:
+        return seq
+    s = np.asarray(seq, dtype=np.float64)
+    n = s.shape[0]
+    half = window // 2
+    sigma = max(0.5, window / 4.0)
+    x = np.arange(-half, half + 1, dtype=np.float64)
+    kernel = np.exp(-x * x / (2.0 * sigma * sigma))
+    kernel = kernel / kernel.sum()
+    padded = np.concatenate([
+        np.broadcast_to(s[:1], (half,) + s.shape[1:]),
+        s,
+        np.broadcast_to(s[-1:], (half,) + s.shape[1:]),
+    ], axis=0)
+    out = np.zeros_like(s)
+    for k, wgt in enumerate(kernel):
+        out += wgt * padded[k:k + n]
+    return out.astype(np.float32)
+
+
 def quat_sign_fix_per_joint(q_seq: np.ndarray) -> np.ndarray:
    """Walk (N, NJ, 4) along time, flip sign whenever consecutive frames sit
    on opposite hemispheres. Eliminates long-path slerp glitches (mid-anim
@ -900,19 +924,23 @@ def rotation_align(from_vec: np.ndarray, to_vec: np.ndarray) -> np.ndarray:


 def make_lit_material(
-    roughness: float = 0.85, double_sided: bool = False,
+    roughness: float = 0.85, double_sided: bool = False, opacity: float = 1.0,
 ) -> dict:
    """Lit PBR material using vertex COLOR_0 multiplicatively. KHR_materials_unlit
    is intentionally off so viewer lighting reveals surface form. metallic=0
    keeps the surface dielectric so vertex colors stay readable. roughness=0.85
-    suits dense rainbow body meshes; 0.3 matches SCAIL-Pose's glossy rig look."""
+    suits dense rainbow body meshes; 0.3 matches SCAIL-Pose's glossy rig look.
+    opacity < 1 switches to alpha-blend (e.g. see-through body mesh over bones)."""
+    a = float(max(0.0, min(1.0, opacity)))
    mat = {
        "pbrMetallicRoughness": {
-            "baseColorFactor": [1.0, 1.0, 1.0, 1.0],
+            "baseColorFactor": [1.0, 1.0, 1.0, a],
            "metallicFactor": 0.0,
            "roughnessFactor": float(max(0.0, min(1.0, roughness))),
        },
    }
+    if a < 1.0:
+        mat["alphaMode"] = "BLEND"
    if double_sided:
        mat["doubleSided"] = True
    return mat
--- a/comfy_extras/sam3d_body/export/glb_skeletal.py
+++ b/comfy_extras/sam3d_body/export/glb_skeletal.py
@ -362,8 +362,10 @@ def build_glb_skeletal(
                "indices": indices_acc,
                "mode": 4,
            }
-            if color_acc is not None:
-                materials.append(make_lit_material())
+            # See-through body when bones are shown, else opaque (only when a
+            # vertex-color shader baked COLOR_0 — otherwise default material).
+            if color_acc is not None or include_bones:
+                materials.append(make_lit_material(opacity=0.35 if include_bones else 1.0))
                primitive["material"] = len(materials) - 1
            if expr_morph_accs:
                primitive["targets"] = [{"POSITION": a} for a in expr_morph_accs]
--- a/comfy_extras/sam3d_body/utils.py
+++ b/comfy_extras/sam3d_body/utils.py
@ -105,16 +105,85 @@ def cam_int_from_moge(moge_geometry, height: int, width: int) -> Optional[torch.
    )


-def run_batched_single_chunk(
-    inner: SAM3DBody,
-    frames_rgb: List[torch.Tensor],
-    per_frame_boxes: List[torch.Tensor],
-    per_frame_masks: Optional[List[torch.Tensor]],
-    image_size: Tuple[int, int],
-    inference_type: str,
-    K: int,
-    cam_int: Optional[torch.Tensor] = None,
-) -> List[List[Dict[str, Any]]]:
+def apply_camera_override(mhr_pose_data: Dict[str, Any], camera_info: Dict[str, Any],
+                          H: int, W: int, fov_deg: float = 0.0) -> Dict[str, Any]:
+    """Re-project every frame's pose through a Load3D 6DOF camera (position/
+    target/zoom + optional FOV). Returns a new mhr_pose_data; unchanged on
+    empty/invalid input."""
+    first_frame = mhr_pose_data["frames"][0] if mhr_pose_data["frames"] else []
+    if not first_frame:
+        return mhr_pose_data
+    # GLB exports the rig root at origin, so Load3D coords are root-relative
+    roots = [np.asarray(p["pred_cam_t"], dtype=np.float32).reshape(3)
+             for p in first_frame if p.get("pred_cam_t") is not None]
+    if not roots:
+        return mhr_pose_data
+    subj_center = np.mean(np.stack(roots, axis=0), axis=0)
+
+    # Meter-scale, so Three.js coords map 1:1 (Three.js Y-up → flip Y,Z)
+    pos = camera_info.get("position") or {}
+    tgt = camera_info.get("target") or {}
+    pos_v = np.array([float(pos.get("x", 0.0)), -float(pos.get("y", 5.0)), -float(pos.get("z", 0.0))], dtype=np.float32)
+    tgt_v = np.array([float(tgt.get("x", 0.0)), -float(tgt.get("y", 0.0)), -float(tgt.get("z", 0.0))], dtype=np.float32)
+    offset = pos_v - tgt_v
+    if float(np.linalg.norm(offset)) < 1e-6:
+        return mhr_pose_data
+
+    zoom = float(camera_info.get("zoom", 1.0)) or 1.0
+    target = subj_center + tgt_v
+    eye = target + offset / max(0.01, zoom)
+
+    # Look-at basis. z = -offset (already non-zero); x degenerates only when
+    # looking straight along world-up, then fall back to world +X.
+    z_axis = -offset / float(np.linalg.norm(offset))
+    x_axis = np.cross(z_axis, np.array([0.0, -1.0, 0.0], dtype=np.float32))
+    x_norm = float(np.linalg.norm(x_axis))
+    x_axis = x_axis / x_norm if x_norm > 1e-6 else np.array([1.0, 0.0, 0.0], dtype=np.float32)
+    y_axis = np.cross(z_axis, x_axis)
+    R = np.stack([x_axis, y_axis, z_axis], axis=0).astype(np.float32)
+
+    # fov_deg > 0 overrides the lens; 0 keeps the SAM3D predicted focal so only
+    # the viewpoint changes. Three.js fov is vertical → focal from image height.
+    if fov_deg > 0:
+        new_focal = float(H) / (2.0 * float(np.tan(np.deg2rad(fov_deg) / 2.0)))
+    else:
+        f0 = first_frame[0].get("focal_length")
+        new_focal = (float(np.asarray(f0, dtype=np.float32).reshape(-1)[0]) if f0 is not None
+                     else float(H) / (2.0 * float(np.tan(np.deg2rad(50.0) / 2.0))))
+
+    center = np.array([W * 0.5, H * 0.5], dtype=np.float32)
+    reproj = {"pred_keypoints_3d": "pred_keypoints_2d", "pred_face_keypoints_3d": "pred_face_keypoints_2d"}
+    new_frames: List[List[Dict[str, Any]]] = []
+    for frame in mhr_pose_data["frames"]:
+        scaled = []
+        for p in frame:
+            p = dict(p)
+            cam_t = p.get("pred_cam_t")
+            if cam_t is None:
+                scaled.append(p)
+                continue
+            cam_t = np.asarray(cam_t, dtype=np.float32).reshape(3)
+            for k in ("pred_keypoints_3d", "pred_vertices", "pred_face_keypoints_3d"):
+                v = p.get(k)
+                if v is None:
+                    continue
+                cam = (np.asarray(v, dtype=np.float32) + cam_t - eye) @ R.T
+                p[k] = cam.astype(np.float32)
+                if k in reproj:  # re-project the new 3D to 2D image coords
+                    z = np.maximum(cam[..., 2:3], 1e-6)
+                    p[reproj[k]] = (cam[..., :2] * new_focal / z + center).astype(np.float32)
+            p["pred_cam_t"] = np.zeros(3, dtype=np.float32)
+            p["focal_length"] = np.array(new_focal, dtype=np.float32)
+            scaled.append(p)
+        new_frames.append(scaled)
+    out = dict(mhr_pose_data)
+    out["frames"] = new_frames
+    return out
+
+
+def run_batched_single_chunk(inner: SAM3DBody, frames_rgb: List[torch.Tensor], per_frame_boxes: List[torch.Tensor],
+    per_frame_masks: Optional[List[torch.Tensor]], image_size: Tuple[int, int], inference_type: str, K: int,
+    cam_int: Optional[torch.Tensor] = None) -> List[List[Dict[str, Any]]]:
    """Run a SINGLE chunk of frames through run_inference in one forward."""
    N = len(frames_rgb)
    total = N * K