Update graph.py

This commit is contained in:
Miklos Nagy 2025-11-12 15:14:45 +01:00 committed by GitHub
parent 1199411747
commit 09c250184d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -1,6 +1,12 @@
from __future__ import annotations # graph.py — grouped/batched scheduler on top of the updated ExecutionList
from typing import Type, Literal # Implements model-class batching to reduce device/context swaps while preserving
# the new execution_cache behavior added upstream.
from __future__ import annotations
from typing import Type, Literal, Optional
import os
import nodes import nodes
import asyncio import asyncio
import inspect import inspect
@ -10,15 +16,19 @@ from comfy.comfy_types.node_typing import ComfyNodeABC, InputTypeDict, InputType
# NOTE: ExecutionBlocker code got moved to graph_utils.py to prevent torch being imported too soon during unit tests # NOTE: ExecutionBlocker code got moved to graph_utils.py to prevent torch being imported too soon during unit tests
ExecutionBlocker = ExecutionBlocker ExecutionBlocker = ExecutionBlocker
class DependencyCycleError(Exception): class DependencyCycleError(Exception):
pass pass
class NodeInputError(Exception): class NodeInputError(Exception):
pass pass
class NodeNotFoundError(Exception): class NodeNotFoundError(Exception):
pass pass
class DynamicPrompt: class DynamicPrompt:
def __init__(self, original_prompt): def __init__(self, original_prompt):
# The original prompt provided by the user # The original prompt provided by the user
@ -62,6 +72,7 @@ class DynamicPrompt:
def get_original_prompt(self): def get_original_prompt(self):
return self.original_prompt return self.original_prompt
def get_input_info( def get_input_info(
class_def: Type[ComfyNodeABC], class_def: Type[ComfyNodeABC],
input_name: str, input_name: str,
@ -99,6 +110,7 @@ def get_input_info(
extra_info = {} extra_info = {}
return input_type, input_category, extra_info return input_type, input_category, extra_info
class TopologicalSort: class TopologicalSort:
def __init__(self, dynprompt): def __init__(self, dynprompt):
self.dynprompt = dynprompt self.dynprompt = dynprompt
@ -165,6 +177,7 @@ class TopologicalSort:
assert node_id in self.blockCount, "Can't add external block to a node that isn't pending" assert node_id in self.blockCount, "Can't add external block to a node that isn't pending"
self.externalBlocks += 1 self.externalBlocks += 1
self.blockCount[node_id] += 1 self.blockCount[node_id] += 1
def unblock(): def unblock():
self.externalBlocks -= 1 self.externalBlocks -= 1
self.blockCount[node_id] -= 1 self.blockCount[node_id] -= 1
@ -186,31 +199,44 @@ class TopologicalSort:
def is_empty(self): def is_empty(self):
return len(self.pendingNodes) == 0 return len(self.pendingNodes) == 0
class ExecutionList(TopologicalSort): class ExecutionList(TopologicalSort):
""" """
ExecutionList implements a topological dissolve of the graph. After a node is staged for execution, ExecutionList implements a topological dissolve of the graph with batching.
it can still be returned to the graph after having further dependencies added. After a node is staged for execution, it can still be returned to the graph
after having further dependencies added.
Batching: we favor running nodes of the same class_type back-to-back
to reduce device/context thrash (e.g., model swaps). Within a batch we still
apply UX-friendly priorities (output/async early, VAEDecodepreview, etc.).
""" """
def __init__(self, dynprompt, output_cache): def __init__(self, dynprompt, output_cache):
super().__init__(dynprompt) super().__init__(dynprompt)
self.output_cache = output_cache self.output_cache = output_cache
self.staged_node_id = None self.staged_node_id: Optional[str] = None
# Upstream execution cache (kept intact)
self.execution_cache = {} self.execution_cache = {}
self.execution_cache_listeners = {} self.execution_cache_listeners = {}
# Batching state
self._current_group_class: Optional[str] = None
# ----------------------------- cache ---------------------------------
def is_cached(self, node_id): def is_cached(self, node_id):
return self.output_cache.get(node_id) is not None return self.output_cache.get(node_id) is not None
def cache_link(self, from_node_id, to_node_id): def cache_link(self, from_node_id, to_node_id):
if not to_node_id in self.execution_cache: if to_node_id not in self.execution_cache:
self.execution_cache[to_node_id] = {} self.execution_cache[to_node_id] = {}
self.execution_cache[to_node_id][from_node_id] = self.output_cache.get(from_node_id) self.execution_cache[to_node_id][from_node_id] = self.output_cache.get(from_node_id)
if not from_node_id in self.execution_cache_listeners: if from_node_id not in self.execution_cache_listeners:
self.execution_cache_listeners[from_node_id] = set() self.execution_cache_listeners[from_node_id] = set()
self.execution_cache_listeners[from_node_id].add(to_node_id) self.execution_cache_listeners[from_node_id].add(to_node_id)
def get_cache(self, from_node_id, to_node_id): def get_cache(self, from_node_id, to_node_id):
if not to_node_id in self.execution_cache: if to_node_id not in self.execution_cache:
return None return None
value = self.execution_cache[to_node_id].get(from_node_id) value = self.execution_cache[to_node_id].get(from_node_id)
if value is None: if value is None:
@ -229,16 +255,93 @@ class ExecutionList(TopologicalSort):
super().add_strong_link(from_node_id, from_socket, to_node_id) super().add_strong_link(from_node_id, from_socket, to_node_id)
self.cache_link(from_node_id, to_node_id) self.cache_link(from_node_id, to_node_id)
# --------------------------- group utils ------------------------------
def _pick_largest_group(self, node_list):
"""Return the class_type with the most representatives in node_list.
Ties are resolved deterministically by class name."""
counts = {}
for nid in node_list:
ctype = self.dynprompt.get_node(nid)["class_type"]
counts[ctype] = counts.get(ctype, 0) + 1
# max by (count, class_name) for deterministic tie-break
return max(counts.items(), key=lambda kv: (kv[1], kv[0]))[0]
def _filter_by_group(self, node_list, group_cls):
"""Keep only nodes that belong to the given class."""
return [nid for nid in node_list if self.dynprompt.get_node(nid)["class_type"] == group_cls]
# ------------------------- node classification ------------------------
def _is_output(self, node_id):
class_type = self.dynprompt.get_node(node_id)["class_type"]
class_def = nodes.NODE_CLASS_MAPPINGS[class_type]
return getattr(class_def, 'OUTPUT_NODE', False) is True
def _is_async(self, node_id):
class_type = self.dynprompt.get_node(node_id)["class_type"]
class_def = nodes.NODE_CLASS_MAPPINGS[class_type]
return inspect.iscoroutinefunction(getattr(class_def, class_def.FUNCTION))
# ------------------------- UX within a batch --------------------------
def _pick_in_batch_with_ux(self, candidates):
"""
Original UX heuristics, but applied *within* the current batch.
"""
# 1) Output nodes ASAP
for nid in candidates:
if self._is_output(nid):
return nid
# 1b) Async nodes early to overlap
for nid in candidates:
if self._is_async(nid):
return nid
# 2) decoder-before-preview pattern (within the batch)
for nid in candidates:
for blocked in self.blocking[nid]:
if self._is_output(blocked):
return nid
# 3) VAELoader -> VAEDecode -> preview (within the batch)
for nid in candidates:
for blocked in self.blocking[nid]:
for blocked2 in self.blocking[blocked]:
if self._is_output(blocked2):
return nid
# 4) Otherwise, first candidate
return candidates[0]
# ------------------------- batch-aware picking ------------------------
def ux_friendly_pick_node(self, available):
"""
Choose which ready node to execute next, honoring the current batch.
When the current batch runs dry, switch to the largest ready group.
"""
# Ensure current batch is still present; otherwise pick a new largest group.
has_current = (
self._current_group_class is not None and
any(self.dynprompt.get_node(nid)["class_type"] == self._current_group_class for nid in available)
)
if not has_current:
new_group = self._pick_largest_group(available)
self._current_group_class = new_group
# Restrict to nodes of the current batch
candidates = self._filter_by_group(available, self._current_group_class)
return self._pick_in_batch_with_ux(candidates)
# --------------------------- staging / run ----------------------------
async def stage_node_execution(self): async def stage_node_execution(self):
assert self.staged_node_id is None assert self.staged_node_id is None
if self.is_empty(): if self.is_empty():
return None, None, None return None, None, None
available = self.get_ready_nodes() available = self.get_ready_nodes()
# If nothing ready but there are external blockers, wait for unblocks.
while len(available) == 0 and self.externalBlocks > 0: while len(available) == 0 and self.externalBlocks > 0:
# Wait for an external block to be released
await self.unblockedEvent.wait() await self.unblockedEvent.wait()
self.unblockedEvent.clear() self.unblockedEvent.clear()
available = self.get_ready_nodes() available = self.get_ready_nodes()
if len(available) == 0: if len(available) == 0:
cycled_nodes = self.get_nodes_in_cycle() cycled_nodes = self.get_nodes_in_cycle()
# Because cycles composed entirely of static nodes are caught during initial validation, # Because cycles composed entirely of static nodes are caught during initial validation,
@ -259,59 +362,25 @@ class ExecutionList(TopologicalSort):
} }
return None, error_details, ex return None, error_details, ex
# Batch-aware pick
self.staged_node_id = self.ux_friendly_pick_node(available) self.staged_node_id = self.ux_friendly_pick_node(available)
return self.staged_node_id, None, None return self.staged_node_id, None, None
def ux_friendly_pick_node(self, node_list):
# If an output node is available, do that first.
# Technically this has no effect on the overall length of execution, but it feels better as a user
# for a PreviewImage to display a result as soon as it can
# Some other heuristics could probably be used here to improve the UX further.
def is_output(node_id):
class_type = self.dynprompt.get_node(node_id)["class_type"]
class_def = nodes.NODE_CLASS_MAPPINGS[class_type]
if hasattr(class_def, 'OUTPUT_NODE') and class_def.OUTPUT_NODE == True:
return True
return False
# If an available node is async, do that first.
# This will execute the asynchronous function earlier, reducing the overall time.
def is_async(node_id):
class_type = self.dynprompt.get_node(node_id)["class_type"]
class_def = nodes.NODE_CLASS_MAPPINGS[class_type]
return inspect.iscoroutinefunction(getattr(class_def, class_def.FUNCTION))
for node_id in node_list:
if is_output(node_id) or is_async(node_id):
return node_id
#This should handle the VAEDecode -> preview case
for node_id in node_list:
for blocked_node_id in self.blocking[node_id]:
if is_output(blocked_node_id):
return node_id
#This should handle the VAELoader -> VAEDecode -> preview case
for node_id in node_list:
for blocked_node_id in self.blocking[node_id]:
for blocked_node_id1 in self.blocking[blocked_node_id]:
if is_output(blocked_node_id1):
return node_id
#TODO: this function should be improved
return node_list[0]
def unstage_node_execution(self): def unstage_node_execution(self):
# If a node execution resolves to PENDING, return it to the pool
# but keep the current batch so we continue batching next time.
assert self.staged_node_id is not None assert self.staged_node_id is not None
self.staged_node_id = None self.staged_node_id = None
def complete_node_execution(self): def complete_node_execution(self):
node_id = self.staged_node_id node_id = self.staged_node_id
self.pop_node(node_id) self.pop_node(node_id)
# Maintain current batch; it will switch automatically when empty.
self.execution_cache.pop(node_id, None) self.execution_cache.pop(node_id, None)
self.execution_cache_listeners.pop(node_id, None) self.execution_cache_listeners.pop(node_id, None)
self.staged_node_id = None self.staged_node_id = None
# ------------------------- cycle detection ----------------------------
def get_nodes_in_cycle(self): def get_nodes_in_cycle(self):
# We'll dissolve the graph in reverse topological order to leave only the nodes in the cycle. # We'll dissolve the graph in reverse topological order to leave only the nodes in the cycle.
# We're skipping some of the performance optimizations from the original TopologicalSort to keep # We're skipping some of the performance optimizations from the original TopologicalSort to keep