mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2026-04-15 04:52:31 +08:00
Refactor EGL device enumeration and fix eglGetDisplay fallback
- Extract device enumeration into _egl_device_display() helper
- Use ctypes.util.find_library("EGL") instead of hardcoded libEGL.so.1
- Fix eglGetDisplay(EGL_DEFAULT_DISPLAY) failure also falling through to
device enumeration (previously raised immediately, skipping the fallback)
- Two-pass eglQueryDevicesEXT to avoid arbitrary device count cap
This commit is contained in:
parent
8e0558c4a4
commit
e24d0f0ad1
@ -197,6 +197,65 @@ def _init_glfw():
|
|||||||
raise
|
raise
|
||||||
|
|
||||||
|
|
||||||
|
def _egl_device_display(eglInitialize):
|
||||||
|
"""Obtain an EGLDisplay via EGL_EXT_platform_device for headless GPU rendering.
|
||||||
|
|
||||||
|
EGL_DEFAULT_DISPLAY fails on headless NVIDIA (EGL_BAD_ACCESS) because there is
|
||||||
|
no X/Wayland compositor. The correct approach is to enumerate EGL devices and
|
||||||
|
obtain a display from a specific device handle using EGL_EXT_platform_device.
|
||||||
|
|
||||||
|
PyOpenGL's egl_get_devices() wrapper does not reliably resolve eglQueryDevicesEXT
|
||||||
|
in this scenario, so both extension functions are loaded from libEGL directly via
|
||||||
|
ctypes. Returns an initialized (display, major, minor) tuple.
|
||||||
|
"""
|
||||||
|
logger.debug("_egl_device_display: starting")
|
||||||
|
|
||||||
|
libegl_name = ctypes.util.find_library("EGL")
|
||||||
|
if not libegl_name:
|
||||||
|
raise RuntimeError("libEGL not found")
|
||||||
|
_libegl = ctypes.CDLL(libegl_name)
|
||||||
|
_get_proc = _libegl.eglGetProcAddress
|
||||||
|
_get_proc.restype = ctypes.c_void_p
|
||||||
|
_get_proc.argtypes = [ctypes.c_char_p]
|
||||||
|
|
||||||
|
_query_devices_ptr = _get_proc(b"eglQueryDevicesEXT")
|
||||||
|
if not _query_devices_ptr:
|
||||||
|
raise RuntimeError("eglQueryDevicesEXT not available — install libnvidia-egl-gbm1 or libegl-mesa0")
|
||||||
|
_query_devices = ctypes.CFUNCTYPE(
|
||||||
|
ctypes.c_bool,
|
||||||
|
ctypes.c_int32, ctypes.POINTER(ctypes.c_void_p), ctypes.POINTER(ctypes.c_int32),
|
||||||
|
)(_query_devices_ptr)
|
||||||
|
|
||||||
|
_get_platform_display_ptr = _get_proc(b"eglGetPlatformDisplayEXT")
|
||||||
|
if not _get_platform_display_ptr:
|
||||||
|
raise RuntimeError("eglGetPlatformDisplayEXT not available in libEGL")
|
||||||
|
_get_platform_display = ctypes.CFUNCTYPE(
|
||||||
|
ctypes.c_void_p, ctypes.c_uint32, ctypes.c_void_p, ctypes.c_void_p,
|
||||||
|
)(_get_platform_display_ptr)
|
||||||
|
|
||||||
|
# Two-pass query: first get the count, then fetch all devices.
|
||||||
|
count = ctypes.c_int32(0)
|
||||||
|
if not _query_devices(0, None, ctypes.byref(count)) or count.value == 0:
|
||||||
|
raise RuntimeError("eglQueryDevicesEXT() found no EGL devices")
|
||||||
|
raw_devices = (ctypes.c_void_p * count.value)()
|
||||||
|
if not _query_devices(count.value, raw_devices, ctypes.byref(count)):
|
||||||
|
raise RuntimeError("eglQueryDevicesEXT() failed on second call")
|
||||||
|
logger.debug(f"_egl_device_display: found {count.value} EGL device(s)")
|
||||||
|
|
||||||
|
EGL_PLATFORM_DEVICE_EXT = 0x313F
|
||||||
|
raw_display = _get_platform_display(EGL_PLATFORM_DEVICE_EXT, raw_devices[0], None)
|
||||||
|
if not raw_display:
|
||||||
|
raise RuntimeError("eglGetPlatformDisplayEXT() returned NULL")
|
||||||
|
# Cast the raw pointer to the opaque EGLDisplay type (c_void_p) that PyOpenGL uses.
|
||||||
|
display = ctypes.c_void_p(raw_display)
|
||||||
|
|
||||||
|
major, minor = ctypes.c_int32(0), ctypes.c_int32(0)
|
||||||
|
if not eglInitialize(display, major, minor):
|
||||||
|
raise RuntimeError("eglInitialize() failed on device display")
|
||||||
|
logger.debug(f"_egl_device_display: EGL version {major.value}.{minor.value}")
|
||||||
|
return display, major, minor
|
||||||
|
|
||||||
|
|
||||||
def _init_egl():
|
def _init_egl():
|
||||||
"""Initialize EGL for headless rendering. Returns (display, context, surface, EGL_module). Raises RuntimeError on failure."""
|
"""Initialize EGL for headless rendering. Returns (display, context, surface, EGL_module). Raises RuntimeError on failure."""
|
||||||
logger.debug("_init_egl: starting")
|
logger.debug("_init_egl: starting")
|
||||||
@ -217,71 +276,24 @@ def _init_egl():
|
|||||||
surface = None
|
surface = None
|
||||||
|
|
||||||
try:
|
try:
|
||||||
logger.debug("_init_egl: calling eglGetDisplay()")
|
# Try EGL_DEFAULT_DISPLAY first (works when a display server is present).
|
||||||
display = eglGetDisplay(EGL_DEFAULT_DISPLAY)
|
# Fall back to device enumeration for headless setups (e.g. NVIDIA with no
|
||||||
if display == _EGL.EGL_NO_DISPLAY:
|
# X/Wayland). Both eglGetDisplay failure and eglInitialize failure (which may
|
||||||
raise RuntimeError("eglGetDisplay() failed")
|
# return False or raise EGLError depending on PyOpenGL version) trigger the
|
||||||
|
# fallback.
|
||||||
logger.debug("_init_egl: calling eglInitialize()")
|
|
||||||
major, minor = _EGL.EGLint(), _EGL.EGLint()
|
major, minor = _EGL.EGLint(), _EGL.EGLint()
|
||||||
# eglInitialize may return False or raise EGLError depending on the PyOpenGL
|
default_ok = False
|
||||||
# version and EGL vendor. Catch both so we can fall through to device enumeration.
|
|
||||||
default_display_ok = False
|
|
||||||
try:
|
try:
|
||||||
if eglInitialize(display, major, minor):
|
display = eglGetDisplay(EGL_DEFAULT_DISPLAY)
|
||||||
default_display_ok = True
|
if display != _EGL.EGL_NO_DISPLAY and eglInitialize(display, major, minor):
|
||||||
|
default_ok = True
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
if not default_display_ok:
|
if not default_ok:
|
||||||
# EGL_DEFAULT_DISPLAY fails on headless NVIDIA (EGL_BAD_ACCESS) because
|
|
||||||
# there is no X/Wayland compositor to back it. The correct approach for
|
|
||||||
# headless GPU rendering is to enumerate EGL devices and obtain a display
|
|
||||||
# from a specific device using EGL_EXT_platform_device.
|
|
||||||
#
|
|
||||||
# PyOpenGL's egl_get_devices() wrapper does not reliably resolve the
|
|
||||||
# eglQueryDevicesEXT function pointer in this scenario, so we call
|
|
||||||
# libEGL directly via ctypes instead.
|
|
||||||
display = None # Not initialized, don't terminate
|
display = None # Not initialized, don't terminate
|
||||||
logger.debug("_init_egl: EGL_DEFAULT_DISPLAY failed, falling back to EGL device enumeration")
|
logger.debug("_init_egl: EGL_DEFAULT_DISPLAY failed, falling back to EGL device enumeration")
|
||||||
|
display, major, minor = _egl_device_display(eglInitialize)
|
||||||
_libegl = ctypes.CDLL("libEGL.so.1")
|
|
||||||
_get_proc = _libegl.eglGetProcAddress
|
|
||||||
_get_proc.restype = ctypes.c_void_p
|
|
||||||
_get_proc.argtypes = [ctypes.c_char_p]
|
|
||||||
|
|
||||||
_query_devices_ptr = _get_proc(b"eglQueryDevicesEXT")
|
|
||||||
if not _query_devices_ptr:
|
|
||||||
raise RuntimeError("eglQueryDevicesEXT not available — install libnvidia-egl-gbm1 or libegl-mesa0")
|
|
||||||
_query_devices = ctypes.CFUNCTYPE(
|
|
||||||
ctypes.c_bool,
|
|
||||||
ctypes.c_int32, ctypes.POINTER(ctypes.c_void_p), ctypes.POINTER(ctypes.c_int32),
|
|
||||||
)(_query_devices_ptr)
|
|
||||||
|
|
||||||
_get_platform_display_ptr = _get_proc(b"eglGetPlatformDisplayEXT")
|
|
||||||
if not _get_platform_display_ptr:
|
|
||||||
raise RuntimeError("eglGetPlatformDisplayEXT not available in libEGL")
|
|
||||||
_get_platform_display = ctypes.CFUNCTYPE(
|
|
||||||
ctypes.c_void_p, ctypes.c_uint32, ctypes.c_void_p, ctypes.c_void_p,
|
|
||||||
)(_get_platform_display_ptr)
|
|
||||||
|
|
||||||
max_devices = 4
|
|
||||||
raw_devices = (ctypes.c_void_p * max_devices)()
|
|
||||||
count = ctypes.c_int32(0)
|
|
||||||
if not _query_devices(max_devices, raw_devices, ctypes.byref(count)) or count.value == 0:
|
|
||||||
raise RuntimeError("eglQueryDevicesEXT() found no EGL devices")
|
|
||||||
logger.debug(f"_init_egl: found {count.value} EGL device(s)")
|
|
||||||
|
|
||||||
EGL_PLATFORM_DEVICE_EXT = 0x313F
|
|
||||||
raw_display = _get_platform_display(EGL_PLATFORM_DEVICE_EXT, raw_devices[0], None)
|
|
||||||
if not raw_display:
|
|
||||||
raise RuntimeError("eglGetPlatformDisplayEXT() returned NULL")
|
|
||||||
# Cast the raw pointer to the opaque EGLDisplay type that PyOpenGL uses
|
|
||||||
# (c_void_p, same as EGL_NO_DISPLAY) so downstream EGL calls accept it.
|
|
||||||
display = ctypes.c_void_p(raw_display)
|
|
||||||
if not eglInitialize(display, major, minor):
|
|
||||||
display = None
|
|
||||||
raise RuntimeError("eglInitialize() failed on device display")
|
|
||||||
logger.debug(f"_init_egl: EGL version {major.value}.{minor.value}")
|
logger.debug(f"_init_egl: EGL version {major.value}.{minor.value}")
|
||||||
|
|
||||||
config_attribs = [
|
config_attribs = [
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user