mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2026-05-25 00:17:23 +08:00
Pass per-device model to multigpu control clones in pre_run_control
QwenFunControlNet.pre_run stashes model.diffusion_model into extra_args, which the control_model then uses for forward passes (img_in, txt_in, pe_embedder, time_text_embed). With multigpu, every per-device control clone was being pre_run with the base model on GPU0, so secondary devices would invoke those modules with parameters on GPU0 and inputs on their own device, raising 'Expected all tensors to be on the same device'. Build a device -> per-device BaseModel lookup from the patcher's additional multigpu models and pass each clone the model on its own device. Falls back to the base model when no per-device match is found (single-GPU path and the case where cnet.multigpu_clones lags the patcher's clone set). Amp-Thread-ID: https://ampcode.com/threads/T-019e4a00-fe3d-76bd-a2f2-a8c8c4040082 Co-authored-by: Amp <amp@ampcode.com>
This commit is contained in:
parent
963621603c
commit
a18dd219d5
@ -870,14 +870,21 @@ def calculate_start_end_timesteps(model, conds):
|
|||||||
|
|
||||||
def pre_run_control(model, conds):
|
def pre_run_control(model, conds):
|
||||||
s = model.model_sampling
|
s = model.model_sampling
|
||||||
|
# Per-device model lookup so multigpu control clones get the matching
|
||||||
|
# diffusion_model (e.g. QwenFunControlNet stashes it into extra_args).
|
||||||
|
device_models: dict = {}
|
||||||
|
patcher = getattr(model, "current_patcher", None)
|
||||||
|
if patcher is not None:
|
||||||
|
for p in patcher.get_additional_models_with_key("multigpu"):
|
||||||
|
device_models[p.load_device] = p.model
|
||||||
for t in range(len(conds)):
|
for t in range(len(conds)):
|
||||||
x = conds[t]
|
x = conds[t]
|
||||||
|
|
||||||
percent_to_timestep_function = lambda a: s.percent_to_sigma(a)
|
percent_to_timestep_function = lambda a: s.percent_to_sigma(a)
|
||||||
if 'control' in x:
|
if 'control' in x:
|
||||||
x['control'].pre_run(model, percent_to_timestep_function)
|
x['control'].pre_run(model, percent_to_timestep_function)
|
||||||
for device_cnet in x['control'].multigpu_clones.values():
|
for device, device_cnet in x['control'].multigpu_clones.items():
|
||||||
device_cnet.pre_run(model, percent_to_timestep_function)
|
device_cnet.pre_run(device_models.get(device, model), percent_to_timestep_function)
|
||||||
|
|
||||||
def apply_empty_x_to_equal_area(conds, uncond, name, uncond_fill_func):
|
def apply_empty_x_to_equal_area(conds, uncond, name, uncond_fill_func):
|
||||||
cond_cnets = []
|
cond_cnets = []
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user