mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2026-07-03 21:20:49 +08:00
Cleanup Pixal3DConditioning
This commit is contained in:
parent
f4b2173cf2
commit
2333d6bc40
@ -2,6 +2,7 @@
|
||||
|
||||
|
||||
import torch
|
||||
import math
|
||||
|
||||
import comfy.utils
|
||||
import folder_paths
|
||||
@ -391,6 +392,53 @@ class MoGePointMapToMesh(io.ComfyNode):
|
||||
return io.NodeOutput(mesh)
|
||||
|
||||
|
||||
class MoGeGeometryToFOV(io.ComfyNode):
|
||||
"""Extract horizontal/vertical FOV from MoGe intrinsics, e.g. fov_y to feed SAM3DBody_Predict."""
|
||||
|
||||
@classmethod
|
||||
def define_schema(cls):
|
||||
return io.Schema(
|
||||
node_id="MoGeGeometryToFOV",
|
||||
search_aliases=["moge", "fov", "geometry", "intrinsics", "field of view"],
|
||||
display_name="Get FoV from MoGe Geometry",
|
||||
description="Derive the field of view and focal length from MoGe intrinsics.",
|
||||
category="image/geometry estimation",
|
||||
inputs=[
|
||||
MoGeGeometry.Input("moge_geometry"),
|
||||
io.Combo.Input("axis", options=["vertical", "horizontal", "diagonal"], default="vertical",
|
||||
tooltip="'vertical' (fov_y), 'horizontal' (fov_x), or 'diagonal'."),
|
||||
io.Combo.Input("unit", options=["degrees", "radians"], default="degrees",
|
||||
tooltip="Output unit for the FOV."),
|
||||
],
|
||||
outputs=[
|
||||
io.Float.Output(display_name="fov"),
|
||||
io.Float.Output(display_name="focal_pixels"),
|
||||
],
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def execute(cls, moge_geometry, axis, unit) -> io.NodeOutput:
|
||||
K = moge_geometry.get("intrinsics") if isinstance(moge_geometry, dict) else None
|
||||
if K is None:
|
||||
raise ValueError("moge_geometry has no intrinsics (panorama geometry has none).")
|
||||
if K.ndim == 3:
|
||||
K = K[0]
|
||||
# MoGe normalizes fx by width and fy by height; with cx=cy=0.5 the half-extent
|
||||
# in normalized units is 0.5, so fov = 2*atan(0.5 / f) per axis (hypot for diagonal).
|
||||
hx = 0.5 / float(K[0, 0].item())
|
||||
hy = 0.5 / float(K[1, 1].item())
|
||||
half_tan = {"horizontal": hx, "vertical": hy, "diagonal": math.hypot(hx, hy)}[axis]
|
||||
fov_radians = 2.0 * math.atan(half_tan)
|
||||
fov = fov_radians if unit == "radians" else math.degrees(fov_radians)
|
||||
# Pixels are square here, so fy*H == fx*W is the single lens focal in pixels.
|
||||
src = next((moge_geometry[k] for k in ("image", "points", "depth") if k in moge_geometry), None)
|
||||
if src is None:
|
||||
raise ValueError("moge_geometry has no image/points/depth to read the pixel height from.")
|
||||
H = int(src.shape[1])
|
||||
focal_pixels = float(K[1, 1].item()) * H
|
||||
return io.NodeOutput(fov, focal_pixels)
|
||||
|
||||
|
||||
class MoGeMaskOut(io.ComfyNode):
|
||||
"""Mark masked pixels as invalid in a MoGe geometry. MoGePointMapToMesh's
|
||||
finite-check then drops them during triangulation, so the scene mesh has a
|
||||
@ -466,7 +514,7 @@ class MoGeMaskOut(io.ComfyNode):
|
||||
class MoGeExtension(ComfyExtension):
|
||||
@override
|
||||
async def get_node_list(self) -> list[type[io.ComfyNode]]:
|
||||
return [LoadMoGeModel, MoGeInference, MoGePanoramaInference, MoGeRender, MoGePointMapToMesh, MoGeMaskOut]
|
||||
return [LoadMoGeModel, MoGeInference, MoGePanoramaInference, MoGeRender, MoGePointMapToMesh, MoGeGeometryToFOV, MoGeMaskOut]
|
||||
|
||||
|
||||
async def comfy_entrypoint() -> MoGeExtension:
|
||||
|
||||
@ -845,11 +845,6 @@ def _crop_image_with_mask(item_image, item_mask, max_image_size=1024):
|
||||
composite = (composite * 255.0).round().clamp(0, 255).to(torch.uint8).float() / 255.0
|
||||
return composite, crop_bbox, scene_size
|
||||
|
||||
def _fov_from_moge_intrinsics(moge_intrinsics: torch.Tensor) -> float:
|
||||
fx = moge_intrinsics[..., 0, 0].float()
|
||||
fov = 2.0 * torch.atan(0.5 / fx.clamp(min=1e-4))
|
||||
return float(fov.mean().item())
|
||||
|
||||
class Pixal3DConditioning(IO.ComfyNode):
|
||||
|
||||
@classmethod
|
||||
@ -862,18 +857,11 @@ class Pixal3DConditioning(IO.ComfyNode):
|
||||
IO.Image.Input("image"),
|
||||
IO.Mask.Input("mask"),
|
||||
IO.Float.Input(
|
||||
"camera_angle_x", default=0.2, min=0.0175, max=2.9671, step=0.001,
|
||||
tooltip="Horizontal FOV in radians (upstream demo default 0.2). "
|
||||
"Overridden by moge_geometry if connected.",
|
||||
),
|
||||
IO.Float.Input(
|
||||
"mesh_scale", default=1.0, min=0.1, max=4.0, step=0.01,
|
||||
tooltip="Mesh scale; 1.0 means unit cube.",
|
||||
),
|
||||
io.Custom("MOGE_GEOMETRY").Input(
|
||||
"moge_geometry",
|
||||
optional=True,
|
||||
tooltip="If connected, camera_angle_x is recovered from MoGe.",
|
||||
"camera_angle_x", display_name="fov",
|
||||
default=11.46, min=1.0, max=170.0, step=0.01, advanced=True,
|
||||
tooltip="Horizontal FOV in degrees (original default ~11.46° = 0.2 rad). "
|
||||
"Wire a MoGeGeometryToFOV (axis='horizontal', unit='degrees') "
|
||||
"output here for a MoGe-derived FOV.",
|
||||
),
|
||||
NAFModel.Input(
|
||||
"naf_model",
|
||||
@ -889,8 +877,7 @@ class Pixal3DConditioning(IO.ComfyNode):
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def execute(cls, clip_vision_model, image, mask, camera_angle_x, mesh_scale,
|
||||
moge_geometry=None, naf_model=None) -> IO.NodeOutput:
|
||||
def execute(cls, clip_vision_model, image, mask, camera_angle_x, naf_model=None) -> IO.NodeOutput:
|
||||
if image.ndim == 3:
|
||||
image = image.unsqueeze(0)
|
||||
if mask.ndim == 2:
|
||||
@ -901,9 +888,6 @@ class Pixal3DConditioning(IO.ComfyNode):
|
||||
elif mask.shape[0] != batch_size:
|
||||
raise ValueError(f"Pixal3DConditioning mask batch {mask.shape[0]} != image batch {batch_size}")
|
||||
|
||||
if moge_geometry is not None and "intrinsics" in moge_geometry:
|
||||
camera_angle_x = _fov_from_moge_intrinsics(moge_geometry["intrinsics"])
|
||||
|
||||
device = comfy.model_management.intermediate_device()
|
||||
|
||||
cond_512_list, cond_1024_list = [], []
|
||||
@ -954,11 +938,12 @@ class Pixal3DConditioning(IO.ComfyNode):
|
||||
hr_tex_1024 = _naf_hr(fm_1024_dino, composite_list, 1024, (1024, 1024))
|
||||
|
||||
# distance_from_fov: grid_point (-1, 0, 0) projects to pixel (0, image_resolution-1).
|
||||
camera_angle_x = float(camera_angle_x)
|
||||
distance = 0.5 / math.tan(camera_angle_x / 2.0) / float(mesh_scale)
|
||||
# FOV widget is in degrees for UX; trig + downstream projection expect radians.
|
||||
camera_angle_x = math.radians(float(camera_angle_x))
|
||||
distance = 0.5 / math.tan(camera_angle_x / 2.0)
|
||||
cam_angle_t = torch.tensor([camera_angle_x] * batch_size, device=device, dtype=torch.float32)
|
||||
dist_t = torch.tensor([distance] * batch_size, device=device, dtype=torch.float32)
|
||||
scale_t = torch.tensor([float(mesh_scale)] * batch_size, device=device, dtype=torch.float32)
|
||||
scale_t = torch.ones(batch_size, device=device, dtype=torch.float32)
|
||||
T = build_proj_transform_matrix(dist_t, batch_size, device=device, dtype=torch.float32)
|
||||
|
||||
proj_pack = {
|
||||
|
||||
Loading…
Reference in New Issue
Block a user