From e51a83fc8ce2c5f8ecc7c6f6b2a3f3583d9b5f76 Mon Sep 17 00:00:00 2001
From: ULT7RA <prometheansystems@proton.me>
Date: Sun, 12 Apr 2026 21:46:16 -0600
Subject: [PATCH] Add FLUXNATION fused spike attention CUDA kernel for FLUX.1
 (#2766)

Co-authored-by: ULT7RA <ULT7RA@users.noreply.github.com>
Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 custom-node-list.json | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/custom-node-list.json b/custom-node-list.json
index 88f1b482..a4ac4f6d 100644
--- a/custom-node-list.json
+++ b/custom-node-list.json
@@ -1,5 +1,16 @@
 {
     "custom_nodes": [
+        {
+            "author": "ULT7RA",
+            "title": "FLUXNATION FLUX.1 Fused Neuromorphic SPIKE Attention & Step Cache Cuda Kernel",
+            "id": "fluxnation",
+            "reference": "https://github.com/ULT7RA/FLUXNATION",
+            "files": [
+                "https://github.com/ULT7RA/FLUXNATION"
+            ],
+            "install_type": "git-clone",
+            "description": "FLUXNATION is a fused FP8 CUDA kernel for FLUX.1 that replaces the entire SingleStreamBlock forward pass with a single torch.ops call covering modulation dual GEMMs QKV projection RoPE attention gating and residual all in one shot. On top of that it ships with neuromorphic spike attention which scores every attention block by real dot product similarity and kills the ones that dont matter after image structure has formed keeping only the top 45 percent. Step caching then replays the cached output on alternating spike steps for zero attention compute every other step. The result is 30 percent faster generation than stock ComfyUI on an RTX 4090 with no quality loss. Works with every sampler ComfyUI supports at any step count. FP16 ports for 10 series 20 series and 30 series GPUs are included. A must have for anyone running FLUX.1."
+        },
         {
             "author": "Dr.Lt.Data",
             "title": "ComfyUI-Manager",