ComfyUI/comfy_extras/nodes_glsl.py

1190 lines
47 KiB
Python

import os
import sys
import re
import logging
import ctypes.util
import importlib.util
from typing import Type, TypedDict
import numpy as np
import torch
import nodes
from comfy_api.latest import ComfyExtension, io, ui
from typing_extensions import override
from utils.install_util import get_missing_requirements_message
logger = logging.getLogger(__name__)
# OpenGL modules - initialized lazily when context is created
gl = None
def _check_opengl_availability():
"""Early check for OpenGL availability. Raises RuntimeError if unlikely to work."""
logger.debug("_check_opengl_availability: starting")
missing = []
# Check Python packages (using find_spec to avoid importing)
logger.debug("_check_opengl_availability: checking for glfw package")
if importlib.util.find_spec("glfw") is None:
missing.append("glfw")
logger.debug("_check_opengl_availability: checking for OpenGL package")
if importlib.util.find_spec("OpenGL") is None:
missing.append("PyOpenGL")
if missing:
raise RuntimeError(
f"OpenGL dependencies not available.\n{get_missing_requirements_message()}\n"
)
# On Linux without display, check if headless backends are available
logger.debug(f"_check_opengl_availability: platform={sys.platform}")
if sys.platform.startswith("linux"):
has_display = os.environ.get("DISPLAY") or os.environ.get("WAYLAND_DISPLAY")
logger.debug(f"_check_opengl_availability: has_display={bool(has_display)}")
if not has_display:
# Check for EGL or OSMesa libraries
logger.debug("_check_opengl_availability: checking for EGL library")
has_egl = ctypes.util.find_library("EGL")
logger.debug("_check_opengl_availability: checking for OSMesa library")
has_osmesa = ctypes.util.find_library("OSMesa")
# Error disabled for CI as it fails this check
# if not has_egl and not has_osmesa:
# raise RuntimeError(
# "GLSL Shader node: No display and no headless backend (EGL/OSMesa) found.\n"
# "See error below for installation instructions."
# )
logger.debug(f"Headless mode: EGL={'yes' if has_egl else 'no'}, OSMesa={'yes' if has_osmesa else 'no'}")
logger.debug("_check_opengl_availability: completed")
# Run early check at import time
logger.debug("nodes_glsl: running _check_opengl_availability at import time")
_check_opengl_availability()
class SizeModeInput(TypedDict):
size_mode: str
width: int
height: int
MAX_IMAGES = 5 # u_image0-4
MAX_UNIFORMS = 5 # u_float0-4, u_int0-4
MAX_OUTPUTS = 4 # fragColor0-3 (MRT)
# Vertex shader using gl_VertexID trick - no VBO needed.
# Draws a single triangle that covers the entire screen:
#
# (-1,3)
# /|
# / | <- visible area is the unit square from (-1,-1) to (1,1)
# / | parts outside get clipped away
# (-1,-1)---(3,-1)
#
# v_texCoord is computed from clip space: * 0.5 + 0.5 maps (-1,1) -> (0,1)
VERTEX_SHADER = """#version 330 core
out vec2 v_texCoord;
void main() {
vec2 verts[3] = vec2[](vec2(-1, -1), vec2(3, -1), vec2(-1, 3));
v_texCoord = verts[gl_VertexID] * 0.5 + 0.5;
gl_Position = vec4(verts[gl_VertexID], 0, 1);
}
"""
DEFAULT_FRAGMENT_SHADER = """#version 300 es
precision highp float;
uniform sampler2D u_image0;
uniform vec2 u_resolution;
in vec2 v_texCoord;
layout(location = 0) out vec4 fragColor0;
void main() {
fragColor0 = texture(u_image0, v_texCoord);
}
"""
def _convert_es_to_desktop(source: str) -> str:
"""Convert GLSL ES (WebGL) shader source to desktop GLSL 330 core."""
# Remove any existing #version directive
source = re.sub(r"#version\s+\d+(\s+es)?\s*\n?", "", source, flags=re.IGNORECASE)
# Remove precision qualifiers (not needed in desktop GLSL)
source = re.sub(r"precision\s+(lowp|mediump|highp)\s+\w+\s*;\s*\n?", "", source)
# Prepend desktop GLSL version
return "#version 330 core\n" + source
def _detect_output_count(source: str) -> int:
"""Detect how many fragColor outputs are used in the shader.
Returns the count of outputs needed (1 to MAX_OUTPUTS).
"""
matches = re.findall(r"fragColor(\d+)", source)
if not matches:
return 1 # Default to 1 output if none found
max_index = max(int(m) for m in matches)
return min(max_index + 1, MAX_OUTPUTS)
def _detect_pass_count(source: str) -> int:
"""Detect multi-pass rendering from #pragma passes N directive.
Returns the number of passes (1 if not specified).
"""
match = re.search(r'#pragma\s+passes\s+(\d+)', source)
if match:
return max(1, int(match.group(1)))
return 1
############################################################
class GLContext:
"""Manages OpenGL context and resources for shader execution.
Acts as a singleton factory: ``GLContext`` itself is an "abstract" class (not a true ABC though) and never instantiates itself directly. Instead, its instance is always one of "concrete backend" contexts - a first valid subclass in the fallback sequence. ``GLContext`` doesn't inherit from ABC just to prevent IDE warnings caused by this polymorphism. For all intents and purposes, it **IS** a singleton-ABC.
Backends fallback order: GLFW (desktop) → EGL (headless GPU) → OSMesa (software). See ``__subclass_fallback_order()``.
"""
__instance: 'GLContext' = None # The singleton
def __new__(cls):
# Since ``GLContext`` is a singleton anyway, we should store it
# explicitly in ``GLContext.__instance``, NOT in ``cls.__instance``.
if GLContext.__instance is None:
GLContext.__instance = GLContext.__new_instance_using_concrete_class_fallback_order()
assert isinstance(GLContext.__instance, GLContext)
return GLContext.__instance
@staticmethod
def __concrete_class_fallback_order() -> tuple[Type['GLContext'], ...]:
"""The order concrete subclasses are tried in: GLFW → EGL → OSMesa."""
return _GLContextGLFW, _GLContextEGL, _GLContextOSMesa
@staticmethod
def __new_instance_using_concrete_class_fallback_order() -> 'GLContext':
"""Try to init backends in the fallback order.
Called from ``__new__()`` on first attempt to instantiate the singleton.
Raises RuntimeError if none of the backends work.
"""
errors: list[tuple[str, Exception]] = []
for cls in GLContext.__concrete_class_fallback_order():
name = cls.backend_name()
logger.debug(f"GLContext.__init__: trying {name} backend")
try:
instance: GLContext = object.__new__(cls)
# Since this code is called while in `__new__()`, we need to manually call `__init__()`, too.
# Otherwise, Python would call it only AFTER `__new__()`, causing init errors outside our try-except check.
instance.__init__()
logger.debug(f"GLContext.__init__: {name} backend succeeded. The singleton is: {cls!r}")
logger.info(f"Concrete GLSL context initialized as: {name}")
return instance
except Exception as e:
logger.debug(f"GLContext.__init__: {name} backend failed: {e}")
errors.append((name, e))
# If we still haven't returned, none of the backends succeeded.
# Let's raise the error.
if sys.platform == "win32":
platform_help = (
"Windows: Ensure GPU drivers are installed and display is available.\n"
" CPU-only/headless mode is not supported on Windows."
)
elif sys.platform == "darwin":
platform_help = (
"macOS: GLFW is not supported.\n"
" Install OSMesa via Homebrew: brew install mesa\n"
" Then: pip install PyOpenGL PyOpenGL-accelerate"
)
else:
platform_help = (
"Linux: Install one of these backends:\n"
" Desktop: sudo apt install libgl1-mesa-glx libglfw3\n"
" Headless with GPU: sudo apt install libegl1-mesa libgl1-mesa-dri\n"
" Headless (CPU): sudo apt install libosmesa6"
)
error_details = "\n".join(f" {name}: {err}" for name, err in errors)
raise RuntimeError(
f"Failed to create OpenGL context.\n\n"
f"Backend errors:\n{error_details}\n\n"
f"{platform_help}"
)
def __init__(self):
try:
if self.__initialized:
# 99% of the time (after first init) we get here and just return
logger.debug("GLContext.__init__: already initialized, skipping")
return
logger.warning("GLContext.__init__: weird state: the singleton has <__initialized> attribute, but is NOT initialized.")
except AttributeError:
# First instance creation: it was created with `__new__()`, but hasn't been initialized yet
pass
logger.debug("GLContext.__init__: starting initialization")
self.__initialized: bool = False
self._vao = None
import time
start_time: float = time.perf_counter()
self._init_backend_concrete() # must fully initialize backend
# Now import OpenGL.GL (after context is current)
logger.debug("GLContext.__init__: importing OpenGL.GL")
self.__import_opengl()
gl = self._gl
# Create VAO (required for core profile, but OSMesa may use compat profile)
logger.debug("GLContext.__init__: creating VAO")
try:
vao = gl.glGenVertexArrays(1)
gl.glBindVertexArray(vao)
self._vao = vao # Only store after successful bind
logger.debug("GLContext.__init__: VAO created successfully")
except Exception as e:
logger.debug(f"GLContext.__init__: VAO creation failed (may be expected for OSMesa): {e}")
# OSMesa with older Mesa may not support VAOs
# Clean up if we created but couldn't bind
if vao:
try:
gl.glDeleteVertexArrays(1, [vao])
except Exception:
pass
self.__initialized = True
self._glBindVertexArray = gl.glBindVertexArray
elapsed = (time.perf_counter() - start_time) * 1000
# Log device info
def gl_string(value) -> str:
string = gl.glGetString(value)
return string.decode() if string else "Unknown"
renderer, vendor, version = (
gl_string(x) for x in [gl.GL_RENDERER, gl.GL_VENDOR, gl.GL_VERSION]
)
logger.info(f"GLSL context initialized in {elapsed:.1f}ms ({self.backend_name()}) - {renderer} ({vendor}), GL {version}")
def __import_opengl(self):
"""Import OpenGL module. Called after context is created."""
global gl
if gl is not None:
return
logger.debug("__import_opengl: importing OpenGL.GL")
import OpenGL.GL as _gl
gl = _gl
self._gl = _gl
logger.debug("__import_opengl: import completed")
@classmethod
def backend_name(cls) -> str:
"""Per-concrete-class unique string identifier. Used for log messages."""
raise NotImplementedError("Must be implemented in a concrete subclass.")
def _init_backend_concrete(self):
"""Actual initialisation hook of a concrete backend. Called mid-init."""
raise NotImplementedError("Must be implemented in a concrete subclass.")
def _make_current_concrete(self):
raise NotImplementedError("Must be implemented in a concrete subclass.")
def make_current(self):
self._make_current_concrete()
if self._vao is not None:
self._glBindVertexArray(self._vao)
def compile_shader(self, source: str, shader_type: int) -> int:
"""Compile a shader and return its ID."""
gl = self._gl
shader = gl.glCreateShader(shader_type)
gl.glShaderSource(shader, source)
gl.glCompileShader(shader)
if gl.glGetShaderiv(shader, gl.GL_COMPILE_STATUS) != gl.GL_TRUE:
error = gl.glGetShaderInfoLog(shader).decode()
gl.glDeleteShader(shader)
raise RuntimeError(f"Shader compilation failed:\n{error}")
return shader
def create_program(self, vertex_source: str, fragment_source: str) -> int:
"""Create and link a shader program."""
gl = self._gl
compile = self.compile_shader
vertex_shader = compile(vertex_source, gl.GL_VERTEX_SHADER)
try:
fragment_shader = compile(fragment_source, gl.GL_FRAGMENT_SHADER)
except RuntimeError:
gl.glDeleteShader(vertex_shader)
raise
program = gl.glCreateProgram()
gl.glAttachShader(program, vertex_shader)
gl.glAttachShader(program, fragment_shader)
gl.glLinkProgram(program)
gl.glDeleteShader(vertex_shader)
gl.glDeleteShader(fragment_shader)
if gl.glGetProgramiv(program, gl.GL_LINK_STATUS) != gl.GL_TRUE:
error = gl.glGetProgramInfoLog(program).decode()
gl.glDeleteProgram(program)
raise RuntimeError(f"Program linking failed:\n{error}")
return program
def render_shader_batch(
self,
fragment_code: str,
width: int,
height: int,
image_batches: list[list[np.ndarray]],
floats: list[float],
ints: list[int],
) -> list[list[np.ndarray]]:
"""
Render a fragment shader for multiple batches efficiently.
Compiles shader once, reuses framebuffer/textures across batches.
Supports multi-pass rendering via #pragma passes N directive.
Args:
fragment_code: User's fragment shader code
width: Output width
height: Output height
image_batches: List of batches, each batch is a list of input images (H, W, C) float32 [0,1]
floats: List of float uniforms
ints: List of int uniforms
Returns:
List of batch outputs, each is a list of output images (H, W, 4) float32 [0,1]
"""
import time
gl = self._gl
start_time = time.perf_counter()
if not image_batches:
return []
self.make_current()
# Convert from GLSL ES to desktop GLSL 330
fragment_source = _convert_es_to_desktop(fragment_code)
# Detect how many outputs the shader actually uses
num_outputs = _detect_output_count(fragment_code)
# Detect multi-pass rendering
num_passes = _detect_pass_count(fragment_code)
# Track resources for cleanup
program = None
fbo = None
output_textures = []
input_textures = []
ping_pong_textures = []
ping_pong_fbos = []
num_inputs = len(image_batches[0])
try:
# Compile shaders (once for all batches)
try:
program = self.create_program(VERTEX_SHADER, fragment_source)
except RuntimeError:
logger.error(f"Fragment shader:\n{fragment_source}")
raise
gl.glUseProgram(program)
# Create framebuffer with only the needed color attachments
fbo = gl.glGenFramebuffers(1)
gl.glBindFramebuffer(gl.GL_FRAMEBUFFER, fbo)
draw_buffers = []
for i in range(num_outputs):
tex = gl.glGenTextures(1)
output_textures.append(tex)
gl.glBindTexture(gl.GL_TEXTURE_2D, tex)
gl.glTexImage2D(gl.GL_TEXTURE_2D, 0, gl.GL_RGBA32F, width, height, 0, gl.GL_RGBA, gl.GL_FLOAT, None)
gl.glTexParameteri(gl.GL_TEXTURE_2D, gl.GL_TEXTURE_MIN_FILTER, gl.GL_LINEAR)
gl.glTexParameteri(gl.GL_TEXTURE_2D, gl.GL_TEXTURE_MAG_FILTER, gl.GL_LINEAR)
gl.glFramebufferTexture2D(gl.GL_FRAMEBUFFER, gl.GL_COLOR_ATTACHMENT0 + i, gl.GL_TEXTURE_2D, tex, 0)
draw_buffers.append(gl.GL_COLOR_ATTACHMENT0 + i)
gl.glDrawBuffers(num_outputs, draw_buffers)
if gl.glCheckFramebufferStatus(gl.GL_FRAMEBUFFER) != gl.GL_FRAMEBUFFER_COMPLETE:
raise RuntimeError("Framebuffer is not complete")
# Create ping-pong resources for multi-pass rendering
if num_passes > 1:
for _ in range(2):
pp_tex = gl.glGenTextures(1)
ping_pong_textures.append(pp_tex)
gl.glBindTexture(gl.GL_TEXTURE_2D, pp_tex)
gl.glTexImage2D(gl.GL_TEXTURE_2D, 0, gl.GL_RGBA32F, width, height, 0, gl.GL_RGBA, gl.GL_FLOAT, None)
gl.glTexParameteri(gl.GL_TEXTURE_2D, gl.GL_TEXTURE_MIN_FILTER, gl.GL_LINEAR)
gl.glTexParameteri(gl.GL_TEXTURE_2D, gl.GL_TEXTURE_MAG_FILTER, gl.GL_LINEAR)
gl.glTexParameteri(gl.GL_TEXTURE_2D, gl.GL_TEXTURE_WRAP_S, gl.GL_CLAMP_TO_EDGE)
gl.glTexParameteri(gl.GL_TEXTURE_2D, gl.GL_TEXTURE_WRAP_T, gl.GL_CLAMP_TO_EDGE)
pp_fbo = gl.glGenFramebuffers(1)
ping_pong_fbos.append(pp_fbo)
gl.glBindFramebuffer(gl.GL_FRAMEBUFFER, pp_fbo)
gl.glFramebufferTexture2D(gl.GL_FRAMEBUFFER, gl.GL_COLOR_ATTACHMENT0, gl.GL_TEXTURE_2D, pp_tex, 0)
gl.glDrawBuffers(1, [gl.GL_COLOR_ATTACHMENT0])
if gl.glCheckFramebufferStatus(gl.GL_FRAMEBUFFER) != gl.GL_FRAMEBUFFER_COMPLETE:
raise RuntimeError("Ping-pong framebuffer is not complete")
# Create input textures (reused for all batches)
for i in range(num_inputs):
tex = gl.glGenTextures(1)
input_textures.append(tex)
gl.glActiveTexture(gl.GL_TEXTURE0 + i)
gl.glBindTexture(gl.GL_TEXTURE_2D, tex)
gl.glTexParameteri(gl.GL_TEXTURE_2D, gl.GL_TEXTURE_MIN_FILTER, gl.GL_LINEAR)
gl.glTexParameteri(gl.GL_TEXTURE_2D, gl.GL_TEXTURE_MAG_FILTER, gl.GL_LINEAR)
gl.glTexParameteri(gl.GL_TEXTURE_2D, gl.GL_TEXTURE_WRAP_S, gl.GL_CLAMP_TO_EDGE)
gl.glTexParameteri(gl.GL_TEXTURE_2D, gl.GL_TEXTURE_WRAP_T, gl.GL_CLAMP_TO_EDGE)
loc = gl.glGetUniformLocation(program, f"u_image{i}")
if loc >= 0:
gl.glUniform1i(loc, i)
# Set static uniforms (once for all batches)
loc = gl.glGetUniformLocation(program, "u_resolution")
if loc >= 0:
gl.glUniform2f(loc, float(width), float(height))
for i, v in enumerate(floats):
loc = gl.glGetUniformLocation(program, f"u_float{i}")
if loc >= 0:
gl.glUniform1f(loc, v)
for i, v in enumerate(ints):
loc = gl.glGetUniformLocation(program, f"u_int{i}")
if loc >= 0:
gl.glUniform1i(loc, v)
# Get u_pass uniform location for multi-pass
pass_loc = gl.glGetUniformLocation(program, "u_pass")
gl.glViewport(0, 0, width, height)
gl.glDisable(gl.GL_BLEND) # Ensure no alpha blending - write output directly
# Process each batch
all_batch_outputs = []
for images in image_batches:
# Update input textures with this batch's images
for i, img in enumerate(images):
gl.glActiveTexture(gl.GL_TEXTURE0 + i)
gl.glBindTexture(gl.GL_TEXTURE_2D, input_textures[i])
# Flip vertically for GL coordinates, ensure RGBA
h, w, c = img.shape
if c == 3:
img_upload = np.empty((h, w, 4), dtype=np.float32)
img_upload[:, :, :3] = img[::-1, :, :]
img_upload[:, :, 3] = 1.0
else:
img_upload = np.ascontiguousarray(img[::-1, :, :])
gl.glTexImage2D(gl.GL_TEXTURE_2D, 0, gl.GL_RGBA32F, w, h, 0, gl.GL_RGBA, gl.GL_FLOAT, img_upload)
if num_passes == 1:
# Single pass - render directly to output FBO
gl.glBindFramebuffer(gl.GL_FRAMEBUFFER, fbo)
if pass_loc >= 0:
gl.glUniform1i(pass_loc, 0)
gl.glClearColor(0, 0, 0, 0)
gl.glClear(gl.GL_COLOR_BUFFER_BIT)
gl.glDrawArrays(gl.GL_TRIANGLES, 0, 3)
else:
# Multi-pass rendering with ping-pong
for p in range(num_passes):
is_last_pass = (p == num_passes - 1)
# Set pass uniform
if pass_loc >= 0:
gl.glUniform1i(pass_loc, p)
if is_last_pass:
# Last pass renders to the main output FBO
gl.glBindFramebuffer(gl.GL_FRAMEBUFFER, fbo)
else:
# Intermediate passes render to ping-pong FBO
target_fbo = ping_pong_fbos[p % 2]
gl.glBindFramebuffer(gl.GL_FRAMEBUFFER, target_fbo)
# Set input texture for this pass
gl.glActiveTexture(gl.GL_TEXTURE0)
if p == 0:
# First pass reads from original input
gl.glBindTexture(gl.GL_TEXTURE_2D, input_textures[0])
else:
# Subsequent passes read from previous pass output
source_tex = ping_pong_textures[(p - 1) % 2]
gl.glBindTexture(gl.GL_TEXTURE_2D, source_tex)
gl.glClearColor(0, 0, 0, 0)
gl.glClear(gl.GL_COLOR_BUFFER_BIT)
gl.glDrawArrays(gl.GL_TRIANGLES, 0, 3)
# Read back outputs for this batch
# (glGetTexImage is synchronous, implicitly waits for rendering)
batch_outputs = []
for tex in output_textures:
gl.glBindTexture(gl.GL_TEXTURE_2D, tex)
data = gl.glGetTexImage(gl.GL_TEXTURE_2D, 0, gl.GL_RGBA, gl.GL_FLOAT)
img = np.frombuffer(data, dtype=np.float32).reshape(height, width, 4)
batch_outputs.append(img[::-1, :, :].copy())
# Pad with black images for unused outputs
black_img = np.zeros((height, width, 4), dtype=np.float32)
for _ in range(num_outputs, MAX_OUTPUTS):
batch_outputs.append(black_img)
all_batch_outputs.append(batch_outputs)
elapsed = (time.perf_counter() - start_time) * 1000
num_batches = len(image_batches)
pass_info = f", {num_passes} passes" if num_passes > 1 else ""
logger.info(f"GLSL shader executed in {elapsed:.1f}ms ({num_batches} batch{'es' if num_batches != 1 else ''}, {width}x{height}{pass_info})")
return all_batch_outputs
finally:
# Unbind before deleting
gl.glBindFramebuffer(gl.GL_FRAMEBUFFER, 0)
gl.glUseProgram(0)
for tex in input_textures:
gl.glDeleteTextures(tex)
for tex in output_textures:
gl.glDeleteTextures(tex)
for tex in ping_pong_textures:
gl.glDeleteTextures(tex)
if fbo is not None:
gl.glDeleteFramebuffers(1, [fbo])
for pp_fbo in ping_pong_fbos:
gl.glDeleteFramebuffers(1, [pp_fbo])
if program is not None:
gl.glDeleteProgram(program)
##########
class _GLContextGLFW(GLContext):
"""Concrete GLContext using GLFW backend."""
@classmethod
def backend_name(cls) -> str:
return "GLFW"
def _init_backend_concrete(self):
"""Initialize GLFW. Raises RuntimeError on failure."""
logger.debug("_init_backend_concrete (GLFW): starting")
# On macOS, glfw.init() must be called from main thread or it hangs forever
if sys.platform == "darwin":
logger.debug("_init_backend_concrete (GLFW): skipping on macOS")
raise RuntimeError("GLFW backend not supported on macOS")
logger.debug("_init_backend_concrete (GLFW): importing glfw module")
import glfw
logger.debug("_init_backend_concrete (GLFW): calling glfw.init()")
if not glfw.init():
raise RuntimeError("glfw.init() failed")
try:
logger.debug("_init_backend_concrete (GLFW): setting window hints")
glfw.window_hint(glfw.VISIBLE, glfw.FALSE)
glfw.window_hint(glfw.CONTEXT_VERSION_MAJOR, 3)
glfw.window_hint(glfw.CONTEXT_VERSION_MINOR, 3)
glfw.window_hint(glfw.OPENGL_PROFILE, glfw.OPENGL_CORE_PROFILE)
logger.debug("_init_backend_concrete (GLFW): calling create_window()")
window = glfw.create_window(64, 64, "ComfyUI GLSL", None, None)
if not window:
raise RuntimeError("glfw.create_window() failed")
logger.debug("_init_backend_concrete (GLFW): calling make_context_current()")
glfw.make_context_current(window)
except Exception:
logger.debug("_init_backend_concrete (GLFW): failed, terminating glfw")
glfw.terminate()
raise
self._window = window
self._glfw = glfw
logger.debug("_init_backend_concrete (GLFW): completed successfully")
def _make_current_concrete(self):
self._glfw.make_context_current(self._window)
##########
class _GLContextEGL(GLContext):
"""Concrete GLContext using EGL backend."""
@classmethod
def backend_name(cls) -> str:
return "EGL"
def _init_backend_concrete(self):
"""Initialize EGL for headless rendering. Raises RuntimeError on failure."""
logger.debug("_init_backend_concrete (EGL): starting")
from OpenGL import EGL
logger.debug("_init_backend_concrete (EGL): imports completed")
display = None
context = None
surface = None
try:
logger.debug("_init_backend_concrete (EGL): calling eglGetDisplay()")
display = EGL.eglGetDisplay(EGL.EGL_DEFAULT_DISPLAY)
if display == EGL.EGL_NO_DISPLAY:
raise RuntimeError("eglGetDisplay() failed")
logger.debug("_init_backend_concrete (EGL): calling eglInitialize()")
major, minor = EGL.EGLint(), EGL.EGLint()
if not EGL.eglInitialize(display, major, minor):
display = None # Not initialized, don't terminate
raise RuntimeError("eglInitialize() failed")
logger.debug(f"_init_backend_concrete (EGL): EGL version {major.value}.{minor.value}")
config_attribs = [
EGL.EGL_SURFACE_TYPE, EGL.EGL_PBUFFER_BIT,
EGL.EGL_RENDERABLE_TYPE, EGL.EGL_OPENGL_BIT,
EGL.EGL_RED_SIZE, 8, EGL.EGL_GREEN_SIZE, 8, EGL.EGL_BLUE_SIZE, 8, EGL.EGL_ALPHA_SIZE, 8,
EGL.EGL_DEPTH_SIZE, 0, EGL.EGL_NONE
]
configs = (EGL.EGLConfig * 1)()
num_configs = EGL.EGLint()
if not EGL.eglChooseConfig(display, config_attribs, configs, 1, num_configs) or num_configs.value == 0:
raise RuntimeError("eglChooseConfig() failed")
config = configs[0]
logger.debug(f"_init_backend_concrete (EGL): config chosen, num_configs={num_configs.value}")
if not EGL.eglBindAPI(EGL.EGL_OPENGL_API):
raise RuntimeError("eglBindAPI() failed")
logger.debug("_init_backend_concrete (EGL): calling eglCreateContext()")
context_attribs = [
EGL.EGL_CONTEXT_MAJOR_VERSION, 3,
EGL.EGL_CONTEXT_MINOR_VERSION, 3,
EGL.EGL_CONTEXT_OPENGL_PROFILE_MASK, EGL.EGL_CONTEXT_OPENGL_CORE_PROFILE_BIT,
EGL.EGL_NONE
]
context = EGL.eglCreateContext(display, config, EGL.EGL_NO_CONTEXT, context_attribs)
if context == EGL.EGL_NO_CONTEXT:
raise RuntimeError("eglCreateContext() failed")
logger.debug("_init_backend_concrete (EGL): calling eglCreatePbufferSurface()")
pbuffer_attribs = [EGL.EGL_WIDTH, 64, EGL.EGL_HEIGHT, 64, EGL.EGL_NONE]
surface = EGL.eglCreatePbufferSurface(display, config, pbuffer_attribs)
if surface == EGL.EGL_NO_SURFACE:
raise RuntimeError("eglCreatePbufferSurface() failed")
logger.debug("_init_backend_concrete (EGL): calling eglMakeCurrent()")
if not EGL.eglMakeCurrent(display, surface, surface, context):
raise RuntimeError("eglMakeCurrent() failed")
except Exception:
logger.debug("_init_backend_concrete (EGL): failed, cleaning up")
# Clean up any resources on failure
if surface is not None:
EGL.eglDestroySurface(display, surface)
if context is not None:
EGL.eglDestroyContext(display, context)
if display is not None:
EGL.eglTerminate(display)
raise
self._egl_display = display
self._egl_context = context
self._egl_surface = surface
self._EGL = EGL
self._eglMakeCurrent = EGL.eglMakeCurrent
logger.debug("_init_backend_concrete (EGL): completed successfully")
def _make_current_concrete(self):
self._eglMakeCurrent(self._egl_display, self._egl_surface, self._egl_surface, self._egl_context)
##########
class _GLContextOSMesa(GLContext):
"""Concrete GLContext using OSMesa backend."""
@classmethod
def backend_name(cls) -> str:
return "OSMesa"
def _init_backend_concrete(self):
"""Initialize OSMesa for software rendering. Returns (context, buffer). Raises RuntimeError on failure."""
import ctypes
logger.debug("_init_backend_concrete (OSMesa): starting")
os.environ["PYOPENGL_PLATFORM"] = "osmesa"
logger.debug("_init_backend_concrete (OSMesa): importing OpenGL.osmesa")
from OpenGL import GL as _gl
from OpenGL.osmesa import (
OSMesaCreateContextExt, OSMesaMakeCurrent, OSMesaDestroyContext,
OSMESA_RGBA,
)
logger.debug("_init_backend_concrete (OSMesa): imports completed")
ctx = OSMesaCreateContextExt(OSMESA_RGBA, 24, 0, 0, None)
if not ctx:
raise RuntimeError("OSMesaCreateContextExt() failed")
width, height = 64, 64
buffer = (ctypes.c_ubyte * (width * height * 4))()
logger.debug("_init_backend_concrete (OSMesa): calling OSMesaMakeCurrent()")
if not OSMesaMakeCurrent(ctx, buffer, _gl.GL_UNSIGNED_BYTE, width, height):
OSMesaDestroyContext(ctx)
raise RuntimeError("OSMesaMakeCurrent() failed")
self._osmesa_ctx = ctx
self._osmesa_buffer = buffer
logger.debug("_init_backend_concrete (OSMesa): completed successfully")
def _make_current_concrete(self):
from OpenGL.osmesa import OSMesaMakeCurrent
OSMesaMakeCurrent(self._osmesa_ctx, self._osmesa_buffer, self._gl.GL_UNSIGNED_BYTE, 64, 64)
############################################################
def _render_shader_batch(
fragment_code: str,
width: int,
height: int,
image_batches: list[list[np.ndarray]],
floats: list[float],
ints: list[int],
) -> list[list[np.ndarray]]:
"""
Render a fragment shader for multiple batches efficiently.
Compiles shader once, reuses framebuffer/textures across batches.
Supports multi-pass rendering via #pragma passes N directive.
Args:
fragment_code: User's fragment shader code
width: Output width
height: Output height
image_batches: List of batches, each batch is a list of input images (H, W, C) float32 [0,1]
floats: List of float uniforms
ints: List of int uniforms
Returns:
List of batch outputs, each is a list of output images (H, W, 4) float32 [0,1]
"""
import time
start_time = time.perf_counter()
if not image_batches:
return []
ctx = GLContext()
ctx.make_current()
# Convert from GLSL ES to desktop GLSL 330
fragment_source = _convert_es_to_desktop(fragment_code)
# Detect how many outputs the shader actually uses
num_outputs = _detect_output_count(fragment_code)
# Detect multi-pass rendering
num_passes = _detect_pass_count(fragment_code)
# Track resources for cleanup
program = None
fbo = None
output_textures = []
input_textures = []
ping_pong_textures = []
ping_pong_fbos = []
num_inputs = len(image_batches[0])
try:
# Compile shaders (once for all batches)
try:
program = ctx.create_program(VERTEX_SHADER, fragment_source)
except RuntimeError:
logger.error(f"Fragment shader:\n{fragment_source}")
raise
gl.glUseProgram(program)
# Create framebuffer with only the needed color attachments
fbo = gl.glGenFramebuffers(1)
gl.glBindFramebuffer(gl.GL_FRAMEBUFFER, fbo)
draw_buffers = []
for i in range(num_outputs):
tex = gl.glGenTextures(1)
output_textures.append(tex)
gl.glBindTexture(gl.GL_TEXTURE_2D, tex)
gl.glTexImage2D(gl.GL_TEXTURE_2D, 0, gl.GL_RGBA32F, width, height, 0, gl.GL_RGBA, gl.GL_FLOAT, None)
gl.glTexParameteri(gl.GL_TEXTURE_2D, gl.GL_TEXTURE_MIN_FILTER, gl.GL_LINEAR)
gl.glTexParameteri(gl.GL_TEXTURE_2D, gl.GL_TEXTURE_MAG_FILTER, gl.GL_LINEAR)
gl.glFramebufferTexture2D(gl.GL_FRAMEBUFFER, gl.GL_COLOR_ATTACHMENT0 + i, gl.GL_TEXTURE_2D, tex, 0)
draw_buffers.append(gl.GL_COLOR_ATTACHMENT0 + i)
gl.glDrawBuffers(num_outputs, draw_buffers)
if gl.glCheckFramebufferStatus(gl.GL_FRAMEBUFFER) != gl.GL_FRAMEBUFFER_COMPLETE:
raise RuntimeError("Framebuffer is not complete")
# Create ping-pong resources for multi-pass rendering
if num_passes > 1:
for _ in range(2):
pp_tex = gl.glGenTextures(1)
ping_pong_textures.append(pp_tex)
gl.glBindTexture(gl.GL_TEXTURE_2D, pp_tex)
gl.glTexImage2D(gl.GL_TEXTURE_2D, 0, gl.GL_RGBA32F, width, height, 0, gl.GL_RGBA, gl.GL_FLOAT, None)
gl.glTexParameteri(gl.GL_TEXTURE_2D, gl.GL_TEXTURE_MIN_FILTER, gl.GL_LINEAR)
gl.glTexParameteri(gl.GL_TEXTURE_2D, gl.GL_TEXTURE_MAG_FILTER, gl.GL_LINEAR)
gl.glTexParameteri(gl.GL_TEXTURE_2D, gl.GL_TEXTURE_WRAP_S, gl.GL_CLAMP_TO_EDGE)
gl.glTexParameteri(gl.GL_TEXTURE_2D, gl.GL_TEXTURE_WRAP_T, gl.GL_CLAMP_TO_EDGE)
pp_fbo = gl.glGenFramebuffers(1)
ping_pong_fbos.append(pp_fbo)
gl.glBindFramebuffer(gl.GL_FRAMEBUFFER, pp_fbo)
gl.glFramebufferTexture2D(gl.GL_FRAMEBUFFER, gl.GL_COLOR_ATTACHMENT0, gl.GL_TEXTURE_2D, pp_tex, 0)
gl.glDrawBuffers(1, [gl.GL_COLOR_ATTACHMENT0])
if gl.glCheckFramebufferStatus(gl.GL_FRAMEBUFFER) != gl.GL_FRAMEBUFFER_COMPLETE:
raise RuntimeError("Ping-pong framebuffer is not complete")
# Create input textures (reused for all batches)
for i in range(num_inputs):
tex = gl.glGenTextures(1)
input_textures.append(tex)
gl.glActiveTexture(gl.GL_TEXTURE0 + i)
gl.glBindTexture(gl.GL_TEXTURE_2D, tex)
gl.glTexParameteri(gl.GL_TEXTURE_2D, gl.GL_TEXTURE_MIN_FILTER, gl.GL_LINEAR)
gl.glTexParameteri(gl.GL_TEXTURE_2D, gl.GL_TEXTURE_MAG_FILTER, gl.GL_LINEAR)
gl.glTexParameteri(gl.GL_TEXTURE_2D, gl.GL_TEXTURE_WRAP_S, gl.GL_CLAMP_TO_EDGE)
gl.glTexParameteri(gl.GL_TEXTURE_2D, gl.GL_TEXTURE_WRAP_T, gl.GL_CLAMP_TO_EDGE)
loc = gl.glGetUniformLocation(program, f"u_image{i}")
if loc >= 0:
gl.glUniform1i(loc, i)
# Set static uniforms (once for all batches)
loc = gl.glGetUniformLocation(program, "u_resolution")
if loc >= 0:
gl.glUniform2f(loc, float(width), float(height))
for i, v in enumerate(floats):
loc = gl.glGetUniformLocation(program, f"u_float{i}")
if loc >= 0:
gl.glUniform1f(loc, v)
for i, v in enumerate(ints):
loc = gl.glGetUniformLocation(program, f"u_int{i}")
if loc >= 0:
gl.glUniform1i(loc, v)
# Get u_pass uniform location for multi-pass
pass_loc = gl.glGetUniformLocation(program, "u_pass")
gl.glViewport(0, 0, width, height)
gl.glDisable(gl.GL_BLEND) # Ensure no alpha blending - write output directly
# Process each batch
all_batch_outputs = []
for images in image_batches:
# Update input textures with this batch's images
for i, img in enumerate(images):
gl.glActiveTexture(gl.GL_TEXTURE0 + i)
gl.glBindTexture(gl.GL_TEXTURE_2D, input_textures[i])
# Flip vertically for GL coordinates, ensure RGBA
h, w, c = img.shape
if c == 3:
img_upload = np.empty((h, w, 4), dtype=np.float32)
img_upload[:, :, :3] = img[::-1, :, :]
img_upload[:, :, 3] = 1.0
else:
img_upload = np.ascontiguousarray(img[::-1, :, :])
gl.glTexImage2D(gl.GL_TEXTURE_2D, 0, gl.GL_RGBA32F, w, h, 0, gl.GL_RGBA, gl.GL_FLOAT, img_upload)
if num_passes == 1:
# Single pass - render directly to output FBO
gl.glBindFramebuffer(gl.GL_FRAMEBUFFER, fbo)
if pass_loc >= 0:
gl.glUniform1i(pass_loc, 0)
gl.glClearColor(0, 0, 0, 0)
gl.glClear(gl.GL_COLOR_BUFFER_BIT)
gl.glDrawArrays(gl.GL_TRIANGLES, 0, 3)
else:
# Multi-pass rendering with ping-pong
for p in range(num_passes):
is_last_pass = (p == num_passes - 1)
# Set pass uniform
if pass_loc >= 0:
gl.glUniform1i(pass_loc, p)
if is_last_pass:
# Last pass renders to the main output FBO
gl.glBindFramebuffer(gl.GL_FRAMEBUFFER, fbo)
else:
# Intermediate passes render to ping-pong FBO
target_fbo = ping_pong_fbos[p % 2]
gl.glBindFramebuffer(gl.GL_FRAMEBUFFER, target_fbo)
# Set input texture for this pass
gl.glActiveTexture(gl.GL_TEXTURE0)
if p == 0:
# First pass reads from original input
gl.glBindTexture(gl.GL_TEXTURE_2D, input_textures[0])
else:
# Subsequent passes read from previous pass output
source_tex = ping_pong_textures[(p - 1) % 2]
gl.glBindTexture(gl.GL_TEXTURE_2D, source_tex)
gl.glClearColor(0, 0, 0, 0)
gl.glClear(gl.GL_COLOR_BUFFER_BIT)
gl.glDrawArrays(gl.GL_TRIANGLES, 0, 3)
# Read back outputs for this batch
# (glGetTexImage is synchronous, implicitly waits for rendering)
batch_outputs = []
for tex in output_textures:
gl.glBindTexture(gl.GL_TEXTURE_2D, tex)
data = gl.glGetTexImage(gl.GL_TEXTURE_2D, 0, gl.GL_RGBA, gl.GL_FLOAT)
img = np.frombuffer(data, dtype=np.float32).reshape(height, width, 4)
batch_outputs.append(img[::-1, :, :].copy())
# Pad with black images for unused outputs
black_img = np.zeros((height, width, 4), dtype=np.float32)
for _ in range(num_outputs, MAX_OUTPUTS):
batch_outputs.append(black_img)
all_batch_outputs.append(batch_outputs)
elapsed = (time.perf_counter() - start_time) * 1000
num_batches = len(image_batches)
pass_info = f", {num_passes} passes" if num_passes > 1 else ""
logger.info(f"GLSL shader executed in {elapsed:.1f}ms ({num_batches} batch{'es' if num_batches != 1 else ''}, {width}x{height}{pass_info})")
return all_batch_outputs
finally:
# Unbind before deleting
gl.glBindFramebuffer(gl.GL_FRAMEBUFFER, 0)
gl.glUseProgram(0)
for tex in input_textures:
gl.glDeleteTextures(tex)
for tex in output_textures:
gl.glDeleteTextures(tex)
for tex in ping_pong_textures:
gl.glDeleteTextures(tex)
if fbo is not None:
gl.glDeleteFramebuffers(1, [fbo])
for pp_fbo in ping_pong_fbos:
gl.glDeleteFramebuffers(1, [pp_fbo])
if program is not None:
gl.glDeleteProgram(program)
class GLSLShader(io.ComfyNode):
@classmethod
def define_schema(cls) -> io.Schema:
image_template = io.Autogrow.TemplatePrefix(
io.Image.Input("image"),
prefix="image",
min=1,
max=MAX_IMAGES,
)
float_template = io.Autogrow.TemplatePrefix(
io.Float.Input("float", default=0.0),
prefix="u_float",
min=0,
max=MAX_UNIFORMS,
)
int_template = io.Autogrow.TemplatePrefix(
io.Int.Input("int", default=0),
prefix="u_int",
min=0,
max=MAX_UNIFORMS,
)
return io.Schema(
node_id="GLSLShader",
display_name="GLSL Shader",
category="image/shader",
description=(
"Apply GLSL ES fragment shaders to images. "
"u_resolution (vec2) is always available."
),
inputs=[
io.String.Input(
"fragment_shader",
default=DEFAULT_FRAGMENT_SHADER,
multiline=True,
tooltip="GLSL fragment shader source code (GLSL ES 3.00 / WebGL 2.0 compatible)",
),
io.DynamicCombo.Input(
"size_mode",
options=[
io.DynamicCombo.Option("from_input", []),
io.DynamicCombo.Option(
"custom",
[
io.Int.Input(
"width",
default=512,
min=1,
max=nodes.MAX_RESOLUTION,
),
io.Int.Input(
"height",
default=512,
min=1,
max=nodes.MAX_RESOLUTION,
),
],
),
],
tooltip="Output size: 'from_input' uses first input image dimensions, 'custom' allows manual size",
),
io.Autogrow.Input("images", template=image_template, tooltip=f"Images are available as u_image0-{MAX_IMAGES-1} (sampler2D) in the shader code"),
io.Autogrow.Input("floats", template=float_template, tooltip=f"Floats are available as u_float0-{MAX_UNIFORMS-1} in the shader code"),
io.Autogrow.Input("ints", template=int_template, tooltip=f"Ints are available as u_int0-{MAX_UNIFORMS-1} in the shader code"),
],
outputs=[
io.Image.Output(display_name="IMAGE0", tooltip="Available via layout(location = 0) out vec4 fragColor0 in the shader code"),
io.Image.Output(display_name="IMAGE1", tooltip="Available via layout(location = 1) out vec4 fragColor1 in the shader code"),
io.Image.Output(display_name="IMAGE2", tooltip="Available via layout(location = 2) out vec4 fragColor2 in the shader code"),
io.Image.Output(display_name="IMAGE3", tooltip="Available via layout(location = 3) out vec4 fragColor3 in the shader code"),
],
)
@classmethod
def execute(
cls,
fragment_shader: str,
size_mode: SizeModeInput,
images: io.Autogrow.Type,
floats: io.Autogrow.Type = None,
ints: io.Autogrow.Type = None,
**kwargs,
) -> io.NodeOutput:
image_list = [v for v in images.values() if v is not None]
float_list = (
[v if v is not None else 0.0 for v in floats.values()] if floats else []
)
int_list = [v if v is not None else 0 for v in ints.values()] if ints else []
if not image_list:
raise ValueError("At least one input image is required")
# Determine output dimensions
if size_mode["size_mode"] == "custom":
out_width = size_mode["width"]
out_height = size_mode["height"]
else:
out_height, out_width = image_list[0].shape[1:3]
batch_size = image_list[0].shape[0]
# Prepare batches
image_batches = []
for batch_idx in range(batch_size):
batch_images = [img_tensor[batch_idx].cpu().numpy().astype(np.float32) for img_tensor in image_list]
image_batches.append(batch_images)
all_batch_outputs = _render_shader_batch(
fragment_shader,
out_width,
out_height,
image_batches,
float_list,
int_list,
)
# Collect outputs into tensors
all_outputs = [[] for _ in range(MAX_OUTPUTS)]
for batch_outputs in all_batch_outputs:
for i, out_img in enumerate(batch_outputs):
all_outputs[i].append(torch.from_numpy(out_img))
output_tensors = [torch.stack(all_outputs[i], dim=0) for i in range(MAX_OUTPUTS)]
return io.NodeOutput(
*output_tensors,
ui=cls._build_ui_output(image_list, output_tensors[0]),
)
@classmethod
def _build_ui_output(
cls, image_list: list[torch.Tensor], output_batch: torch.Tensor
) -> dict[str, list]:
"""Build UI output with input and output images for client-side shader execution."""
combined_inputs = torch.cat(image_list, dim=0)
input_images_ui = ui.ImageSaveHelper.save_images(
combined_inputs,
filename_prefix="GLSLShader_input",
folder_type=io.FolderType.temp,
cls=None,
compress_level=1,
)
output_images_ui = ui.ImageSaveHelper.save_images(
output_batch,
filename_prefix="GLSLShader_output",
folder_type=io.FolderType.temp,
cls=None,
compress_level=1,
)
return {"input_images": input_images_ui, "images": output_images_ui}
class GLSLExtension(ComfyExtension):
@override
async def get_node_list(self) -> list[type[io.ComfyNode]]:
return [GLSLShader]
async def comfy_entrypoint() -> GLSLExtension:
return GLSLExtension()