From e87aa1873f6a01148898b41dc3498ca6f82410d8 Mon Sep 17 00:00:00 2001 From: missionfloyd Date: Wed, 12 Apr 2023 19:36:35 -0600 Subject: [PATCH 01/45] Add slider setting type --- web/scripts/ui.js | 24 ++++++++++++++++++++++++ web/style.css | 8 ++++++++ 2 files changed, 32 insertions(+) diff --git a/web/scripts/ui.js b/web/scripts/ui.js index 09861c440..1c7fdc8a1 100644 --- a/web/scripts/ui.js +++ b/web/scripts/ui.js @@ -270,6 +270,30 @@ class ComfySettingsDialog extends ComfyDialog { ]), ]); break; + case "slider": + element = $el("div", [ + $el("label", { textContent: name }, [ + $el("input", { + type: "range", + value, + oninput: (e) => { + setter(e.target.value); + e.target.nextElementSibling.value = e.target.value; + }, + ...attrs + }), + $el("input", { + type: "number", + value, + oninput: (e) => { + setter(e.target.value); + e.target.previousElementSibling.value = e.target.value; + }, + ...attrs + }), + ]), + ]); + break; default: console.warn("Unsupported setting type, defaulting to text"); element = $el("div", [ diff --git a/web/style.css b/web/style.css index 34e31726c..e3b445762 100644 --- a/web/style.css +++ b/web/style.css @@ -217,6 +217,14 @@ button.comfy-queue-btn { z-index: 99; } +.comfy-modal.comfy-settings input[type="range"] { + vertical-align: middle; +} + +.comfy-modal.comfy-settings input[type="range"] + input[type="number"] { + width: 3.5em; +} + .comfy-modal input, .comfy-modal select { color: var(--input-text); From 8810e1b4b9e2c14860800a2bdc97d50d1aa2f904 Mon Sep 17 00:00:00 2001 From: missionfloyd Date: Wed, 12 Apr 2023 21:15:21 -0600 Subject: [PATCH 02/45] Fix indentation --- web/scripts/ui.js | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/web/scripts/ui.js b/web/scripts/ui.js index 1c7fdc8a1..6cbc9383e 100644 --- a/web/scripts/ui.js +++ b/web/scripts/ui.js @@ -270,7 +270,7 @@ class ComfySettingsDialog extends ComfyDialog { ]), ]); break; - case "slider": + case "slider": element = $el("div", [ $el("label", { textContent: name }, [ $el("input", { @@ -278,16 +278,16 @@ class ComfySettingsDialog extends ComfyDialog { value, oninput: (e) => { setter(e.target.value); - e.target.nextElementSibling.value = e.target.value; + e.target.nextElementSibling.value = e.target.value; }, ...attrs }), - $el("input", { + $el("input", { type: "number", value, oninput: (e) => { setter(e.target.value); - e.target.previousElementSibling.value = e.target.value; + e.target.previousElementSibling.value = e.target.value; }, ...attrs }), From 307ef543bf66e5ffd718b3a0b148c72287b65a89 Mon Sep 17 00:00:00 2001 From: missionfloyd Date: Thu, 13 Apr 2023 10:04:06 -0600 Subject: [PATCH 03/45] Change grid size to slider --- web/extensions/core/snapToGrid.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/web/extensions/core/snapToGrid.js b/web/extensions/core/snapToGrid.js index 20b245e18..cb5fc154b 100644 --- a/web/extensions/core/snapToGrid.js +++ b/web/extensions/core/snapToGrid.js @@ -9,7 +9,7 @@ app.registerExtension({ app.ui.settings.addSetting({ id: "Comfy.SnapToGrid.GridSize", name: "Grid Size", - type: "number", + type: "slider", attrs: { min: 1, max: 500, From 22bde7957e18e8f9c4fb206227a6117dae391417 Mon Sep 17 00:00:00 2001 From: Tomoaki Hayasaka Date: Mon, 17 Apr 2023 01:58:33 +0900 Subject: [PATCH 04/45] Fix "Ctrl+Enter doesn't work when textarea has focus" regression introduced in #491. --- web/extensions/core/keybinds.js | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/web/extensions/core/keybinds.js b/web/extensions/core/keybinds.js index 1825007a6..42c228017 100644 --- a/web/extensions/core/keybinds.js +++ b/web/extensions/core/keybinds.js @@ -5,12 +5,6 @@ app.registerExtension({ name: id, init() { const keybindListener = function(event) { - const target = event.composedPath()[0]; - - if (target.tagName === "INPUT" || target.tagName === "TEXTAREA") { - return; - } - const modifierPressed = event.ctrlKey || event.metaKey; // Queue prompt using ctrl or command + enter @@ -19,6 +13,12 @@ app.registerExtension({ return; } + const target = event.composedPath()[0]; + + if (target.tagName === "INPUT" || target.tagName === "TEXTAREA") { + return; + } + const modifierKeyIdMap = { "s": "#comfy-save-button", 83: "#comfy-save-button", From 0ab5c619eafa026d4be1a3f6bf462a6f7f9d25d6 Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Mon, 17 Apr 2023 01:04:54 -0400 Subject: [PATCH 05/45] Clarify in README that it's AMD GPUs. --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index f610f9497..be2cb8ec5 100644 --- a/README.md +++ b/README.md @@ -83,7 +83,7 @@ Put your VAE in: models/vae At the time of writing this pytorch has issues with python versions higher than 3.10 so make sure your python/pip versions are 3.10. -### AMD (Linux only) +### AMD GPUs (Linux only) AMD users can install rocm and pytorch with pip if you don't have it already installed, this is the command to install the stable version: ```pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/rocm5.4.2``` From 884ea653c8d6fe19b3724f45a04a0d74cd881f2f Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Mon, 17 Apr 2023 11:05:15 -0400 Subject: [PATCH 06/45] Add a way for nodes to set a custom CFG function. --- comfy/samplers.py | 5 ++++- comfy/sd.py | 3 +++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/comfy/samplers.py b/comfy/samplers.py index ed36442a9..05af6fe88 100644 --- a/comfy/samplers.py +++ b/comfy/samplers.py @@ -211,7 +211,10 @@ def sampling_function(model_function, x, timestep, uncond, cond, cond_scale, con max_total_area = model_management.maximum_batch_area() cond, uncond = calc_cond_uncond_batch(model_function, cond, uncond, x, timestep, max_total_area, cond_concat, model_options) - return uncond + (cond - uncond) * cond_scale + if "sampler_cfg_function" in model_options: + return model_options["sampler_cfg_function"](cond, uncond, cond_scale) + else: + return uncond + (cond - uncond) * cond_scale class CompVisVDenoiser(k_diffusion_external.DiscreteVDDPMDenoiser): diff --git a/comfy/sd.py b/comfy/sd.py index 9c632e240..1d7774742 100644 --- a/comfy/sd.py +++ b/comfy/sd.py @@ -250,6 +250,9 @@ class ModelPatcher: def set_model_tomesd(self, ratio): self.model_options["transformer_options"]["tomesd"] = {"ratio": ratio} + def set_model_sampler_cfg_function(self, sampler_cfg_function): + self.model_options["sampler_cfg_function"] = sampler_cfg_function + def model_dtype(self): return self.model.diffusion_model.dtype From 6f7852bc47de2fa432672a1b93c1727c0824d78b Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Mon, 17 Apr 2023 17:24:58 -0400 Subject: [PATCH 07/45] Add a LatentFromBatch node to pick a single latent from a batch. Works before and after sampling. --- nodes.py | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/nodes.py b/nodes.py index c775da00c..c745ce280 100644 --- a/nodes.py +++ b/nodes.py @@ -510,6 +510,24 @@ class EmptyLatentImage: return ({"samples":latent}, ) +class LatentFromBatch: + @classmethod + def INPUT_TYPES(s): + return {"required": { "samples": ("LATENT",), + "batch_index": ("INT", {"default": 0, "min": 0, "max": 63}), + }} + RETURN_TYPES = ("LATENT",) + FUNCTION = "rotate" + + CATEGORY = "latent" + + def rotate(self, samples, batch_index): + s = samples.copy() + s_in = samples["samples"] + batch_index = min(s_in.shape[0] - 1, batch_index) + s["samples"] = s_in[batch_index:batch_index + 1].clone() + s["batch_index"] = batch_index + return (s,) class LatentUpscale: upscale_methods = ["nearest-exact", "bilinear", "area"] @@ -685,7 +703,13 @@ def common_ksampler(model, seed, steps, cfg, sampler_name, scheduler, positive, if disable_noise: noise = torch.zeros(latent_image.size(), dtype=latent_image.dtype, layout=latent_image.layout, device="cpu") else: - noise = torch.randn(latent_image.size(), dtype=latent_image.dtype, layout=latent_image.layout, generator=torch.manual_seed(seed), device="cpu") + batch_index = 0 + if "batch_index" in latent: + batch_index = latent["batch_index"] + + generator = torch.manual_seed(seed) + for i in range(batch_index + 1): + noise = torch.randn([1] + list(latent_image.size())[1:], dtype=latent_image.dtype, layout=latent_image.layout, generator=generator, device="cpu") if "noise_mask" in latent: noise_mask = latent['noise_mask'] @@ -1073,6 +1097,7 @@ NODE_CLASS_MAPPINGS = { "VAELoader": VAELoader, "EmptyLatentImage": EmptyLatentImage, "LatentUpscale": LatentUpscale, + "LatentFromBatch": LatentFromBatch, "SaveImage": SaveImage, "PreviewImage": PreviewImage, "LoadImage": LoadImage, From 7b5eb196dbf4248eb6c67af2843cacb28863ce2f Mon Sep 17 00:00:00 2001 From: EllangoK Date: Mon, 17 Apr 2023 17:29:22 -0400 Subject: [PATCH 08/45] allows control arrow to edit attention in textarea --- web/extensions/core/editAttention.js | 117 +++++++++++++++++++++++++++ 1 file changed, 117 insertions(+) create mode 100644 web/extensions/core/editAttention.js diff --git a/web/extensions/core/editAttention.js b/web/extensions/core/editAttention.js new file mode 100644 index 000000000..d943290ce --- /dev/null +++ b/web/extensions/core/editAttention.js @@ -0,0 +1,117 @@ +import { app } from "/scripts/app.js"; + +// Allows you to edit the attention weight by holding ctrl (or cmd) and using the up/down arrow keys + +const id = "Comfy.EditAttention"; +app.registerExtension({ +name:id, + init() { + function incrementWeight(weight, delta) { + const floatWeight = parseFloat(weight); + if (isNaN(floatWeight)) return weight; + const newWeight = floatWeight + delta; + if (newWeight < 0) return "0"; + return String(Number(newWeight.toFixed(10))); + } + + function findNearestEnclosure(text, cursorPos) { + let start = cursorPos, end = cursorPos; + let openCount = 0, closeCount = 0; + + // Find opening parenthesis before cursor + while (start >= 0) { + start--; + if (text[start] === "(" && openCount === closeCount) break; + if (text[start] === "(") openCount++; + if (text[start] === ")") closeCount++; + } + if (start < 0) return false; + + openCount = 0; + closeCount = 0; + + // Find closing parenthesis after cursor + while (end < text.length) { + if (text[end] === ")" && openCount === closeCount) break; + if (text[end] === "(") openCount++; + if (text[end] === ")") closeCount++; + end++; + } + if (end === text.length) return false; + + return { start: start + 1, end: end }; + } + + function addWeightToParentheses(text) { + const parenRegex = /^\((.*)\)$/; + const parenMatch = text.match(parenRegex); + + const floatRegex = /:([+-]?(\d*\.)?\d+([eE][+-]?\d+)?)/; + const floatMatch = text.match(floatRegex); + + if (parenMatch && !floatMatch) { + return `(${parenMatch[1]}:1.0)`; + } else { + return text; + } + }; + + function editAttention(event) { + const inputField = event.composedPath()[0]; + const delta = 0.1; + + if (inputField.tagName !== "TEXTAREA") return; + if (!(event.key === "ArrowUp" || event.key === "ArrowDown")) return; + if (!event.ctrlKey && !event.metaKey) return; + + event.preventDefault(); + + let start = inputField.selectionStart; + let end = inputField.selectionEnd; + let selectedText = inputField.value.substring(start, end); + + // If there is no selection, attempt to find the nearest enclosure + if (!selectedText) { + const nearestEnclosure = findNearestEnclosure(inputField.value, start); + if (nearestEnclosure) { + start = nearestEnclosure.start; + end = nearestEnclosure.end; + selectedText = inputField.value.substring(start, end); + } else { + return; + } + } + + // If the selection ends with a space, remove it + if (selectedText[selectedText.length - 1] === " ") { + selectedText = selectedText.substring(0, selectedText.length - 1); + end -= 1; + } + + // If there are parentheses left and right of the selection, select them + if (inputField.value[start - 1] === "(" && inputField.value[end] === ")") { + start -= 1; + end += 1; + selectedText = inputField.value.substring(start, end); + } + + // If the selection is not enclosed in parentheses, add them + if (selectedText[0] !== "(" || selectedText[selectedText.length - 1] !== ")") { + console.log("adding parentheses", inputField.value[start], inputField.value[end], selectedText); + selectedText = `(${selectedText})`; + } + + // If the selection does not have a weight, add a weight of 1.0 + selectedText = addWeightToParentheses(selectedText); + + // Increment the weight + const weightDelta = event.key === "ArrowUp" ? delta : -delta; + const updatedText = selectedText.replace(/(.*:)(\d+(\.\d+)?)(.*)/, (match, prefix, weight, _, suffix) => { + return prefix + incrementWeight(weight, weightDelta) + suffix; + }); + + inputField.setRangeText(updatedText, start, end, "select"); + } + window.addEventListener("keydown", editAttention); + }, +}); From f03dade5ab8f17a165d63efc205eb34a2330b7d8 Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Mon, 17 Apr 2023 18:19:57 -0400 Subject: [PATCH 09/45] Fix bug. --- nodes.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/nodes.py b/nodes.py index c745ce280..06b69f453 100644 --- a/nodes.py +++ b/nodes.py @@ -708,8 +708,9 @@ def common_ksampler(model, seed, steps, cfg, sampler_name, scheduler, positive, batch_index = latent["batch_index"] generator = torch.manual_seed(seed) - for i in range(batch_index + 1): + for i in range(batch_index): noise = torch.randn([1] + list(latent_image.size())[1:], dtype=latent_image.dtype, layout=latent_image.layout, generator=generator, device="cpu") + noise = torch.randn(latent_image.size(), dtype=latent_image.dtype, layout=latent_image.layout, generator=generator, device="cpu") if "noise_mask" in latent: noise_mask = latent['noise_mask'] From b8c636b10d39e77742f3f435bf6b85c3aa806583 Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Mon, 17 Apr 2023 18:21:24 -0400 Subject: [PATCH 10/45] Lower how much CTRL+arrow key changes the number. --- web/extensions/core/editAttention.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/web/extensions/core/editAttention.js b/web/extensions/core/editAttention.js index d943290ce..fe395c3ca 100644 --- a/web/extensions/core/editAttention.js +++ b/web/extensions/core/editAttention.js @@ -58,7 +58,7 @@ name:id, function editAttention(event) { const inputField = event.composedPath()[0]; - const delta = 0.1; + const delta = 0.025; if (inputField.tagName !== "TEXTAREA") return; if (!(event.key === "ArrowUp" || event.key === "ArrowDown")) return; From 79ba0399d8d70bc655269fc3318455a70d14e180 Mon Sep 17 00:00:00 2001 From: EllangoK Date: Mon, 17 Apr 2023 19:02:08 -0400 Subject: [PATCH 11/45] selects current word automatically --- web/extensions/core/editAttention.js | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/web/extensions/core/editAttention.js b/web/extensions/core/editAttention.js index fe395c3ca..55201953b 100644 --- a/web/extensions/core/editAttention.js +++ b/web/extensions/core/editAttention.js @@ -70,7 +70,7 @@ name:id, let end = inputField.selectionEnd; let selectedText = inputField.value.substring(start, end); - // If there is no selection, attempt to find the nearest enclosure + // If there is no selection, attempt to find the nearest enclosure, or select the current word if (!selectedText) { const nearestEnclosure = findNearestEnclosure(inputField.value, start); if (nearestEnclosure) { @@ -78,7 +78,18 @@ name:id, end = nearestEnclosure.end; selectedText = inputField.value.substring(start, end); } else { - return; + // Select the current word, find the start and end of the word (first space before and after) + start = inputField.value.substring(0, start).lastIndexOf(" ") + 1; + end = inputField.value.substring(end).indexOf(" ") + end; + // Remove all punctuation at the end and beginning of the word + while (inputField.value[start].match(/[.,\/#!$%\^&\*;:{}=\-_`~()]/)) { + start++; + } + while (inputField.value[end - 1].match(/[.,\/#!$%\^&\*;:{}=\-_`~()]/)) { + end--; + } + selectedText = inputField.value.substring(start, end); + if (!selectedText) return; } } @@ -97,7 +108,6 @@ name:id, // If the selection is not enclosed in parentheses, add them if (selectedText[0] !== "(" || selectedText[selectedText.length - 1] !== ")") { - console.log("adding parentheses", inputField.value[start], inputField.value[end], selectedText); selectedText = `(${selectedText})`; } From a962222992479057b104cdd06bf399d2a2cae2fa Mon Sep 17 00:00:00 2001 From: EllangoK Date: Mon, 17 Apr 2023 23:40:44 -0400 Subject: [PATCH 12/45] correctly checks end of the text --- web/extensions/core/editAttention.js | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/web/extensions/core/editAttention.js b/web/extensions/core/editAttention.js index 55201953b..206d0830a 100644 --- a/web/extensions/core/editAttention.js +++ b/web/extensions/core/editAttention.js @@ -79,8 +79,16 @@ name:id, selectedText = inputField.value.substring(start, end); } else { // Select the current word, find the start and end of the word (first space before and after) - start = inputField.value.substring(0, start).lastIndexOf(" ") + 1; - end = inputField.value.substring(end).indexOf(" ") + end; + const wordStart = inputField.value.substring(0, start).lastIndexOf(" ") + 1; + const wordEnd = inputField.value.substring(end).indexOf(" "); + // If there is no space after the word, select to the end of the string + if (wordEnd === -1) { + end = inputField.value.length; + } else { + end += wordEnd; + } + start = wordStart; + // Remove all punctuation at the end and beginning of the word while (inputField.value[start].match(/[.,\/#!$%\^&\*;:{}=\-_`~()]/)) { start++; From a7c7da68dc8a5e6bf1e316b6b36c4a61c7571445 Mon Sep 17 00:00:00 2001 From: missionfloyd Date: Tue, 18 Apr 2023 00:22:05 -0600 Subject: [PATCH 13/45] Editattention setting (#533) * Add editAttention delta setting * Update editAttention.js * Update web/extensions/core/editAttention.js Co-authored-by: Karun * Update editAttention.js * Update editAttention.js * Fix setting value --------- Co-authored-by: Karun --- web/extensions/core/editAttention.js | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/web/extensions/core/editAttention.js b/web/extensions/core/editAttention.js index 206d0830a..66d4a8373 100644 --- a/web/extensions/core/editAttention.js +++ b/web/extensions/core/editAttention.js @@ -2,10 +2,21 @@ import { app } from "/scripts/app.js"; // Allows you to edit the attention weight by holding ctrl (or cmd) and using the up/down arrow keys -const id = "Comfy.EditAttention"; app.registerExtension({ -name:id, + name: "Comfy.EditAttention", init() { + const editAttentionDelta = app.ui.settings.addSetting({ + id: "Comfy.EditAttention.Delta", + name: "Ctrl+up/down precision", + type: "slider", + attrs: { + min: 0.01, + max: 2, + step: 0.01, + }, + defaultValue: 0.1, + }); + function incrementWeight(weight, delta) { const floatWeight = parseFloat(weight); if (isNaN(floatWeight)) return weight; @@ -58,7 +69,7 @@ name:id, function editAttention(event) { const inputField = event.composedPath()[0]; - const delta = 0.025; + const delta = parseFloat(editAttentionDelta.value); if (inputField.tagName !== "TEXTAREA") return; if (!(event.key === "ArrowUp" || event.key === "ArrowDown")) return; @@ -125,7 +136,7 @@ name:id, // Increment the weight const weightDelta = event.key === "ArrowUp" ? delta : -delta; const updatedText = selectedText.replace(/(.*:)(\d+(\.\d+)?)(.*)/, (match, prefix, weight, _, suffix) => { - return prefix + incrementWeight(weight, weightDelta) + suffix; + return prefix + incrementWeight(weight, weightDelta) + suffix; }); inputField.setRangeText(updatedText, start, end, "select"); From b016e2769f0a16fcba21c020023413cad68f704b Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Tue, 18 Apr 2023 02:25:57 -0400 Subject: [PATCH 14/45] Saner range of values. --- web/extensions/core/editAttention.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/web/extensions/core/editAttention.js b/web/extensions/core/editAttention.js index 66d4a8373..bebc80b12 100644 --- a/web/extensions/core/editAttention.js +++ b/web/extensions/core/editAttention.js @@ -11,10 +11,10 @@ app.registerExtension({ type: "slider", attrs: { min: 0.01, - max: 2, + max: 0.5, step: 0.01, }, - defaultValue: 0.1, + defaultValue: 0.05, }); function incrementWeight(weight, delta) { From 472b1cc0d881c4009e5a89e0893c5835f3a4c47d Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Tue, 18 Apr 2023 19:34:07 -0400 Subject: [PATCH 15/45] Add a github action to use pip xformers package for dependencies. --- .../windows_release_cu118_dependencies_2.yml | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 .github/workflows/windows_release_cu118_dependencies_2.yml diff --git a/.github/workflows/windows_release_cu118_dependencies_2.yml b/.github/workflows/windows_release_cu118_dependencies_2.yml new file mode 100644 index 000000000..a88449527 --- /dev/null +++ b/.github/workflows/windows_release_cu118_dependencies_2.yml @@ -0,0 +1,30 @@ +name: "Windows Release cu118 dependencies 2" + +on: + workflow_dispatch: +# push: +# branches: +# - master + +jobs: + build_dependencies: + runs-on: windows-latest + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + with: + python-version: '3.10.9' + + - shell: bash + run: | + python -m pip wheel --no-cache-dir torch torchvision torchaudio xformers==0.0.19.dev516 --extra-index-url https://download.pytorch.org/whl/cu118 -r requirements.txt pygit2 -w ./temp_wheel_dir + python -m pip install --no-cache-dir ./temp_wheel_dir/* + echo installed basic + ls -lah temp_wheel_dir + mv temp_wheel_dir cu118_python_deps + tar cf cu118_python_deps.tar cu118_python_deps + + - uses: actions/cache/save@v3 + with: + path: cu118_python_deps.tar + key: ${{ runner.os }}-build-cu118 From 3696d1699a6fece2485c063317cf65abbcddb79b Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Wed, 19 Apr 2023 09:36:19 -0400 Subject: [PATCH 16/45] Add support for GLIGEN textbox model. --- comfy/gligen.py | 343 ++++++++++++++++++ comfy/ldm/modules/attention.py | 16 + .../modules/diffusionmodules/openaimodel.py | 2 + comfy/model_management.py | 6 +- comfy/samplers.py | 57 ++- comfy/sd.py | 22 +- folder_paths.py | 2 + models/gligen/put_gligen_models_here | 0 nodes.py | 71 +++- 9 files changed, 491 insertions(+), 28 deletions(-) create mode 100644 comfy/gligen.py create mode 100644 models/gligen/put_gligen_models_here diff --git a/comfy/gligen.py b/comfy/gligen.py new file mode 100644 index 000000000..8770383e5 --- /dev/null +++ b/comfy/gligen.py @@ -0,0 +1,343 @@ +import torch +from torch import nn, einsum +from ldm.modules.attention import CrossAttention +from inspect import isfunction + + +def exists(val): + return val is not None + + +def uniq(arr): + return{el: True for el in arr}.keys() + + +def default(val, d): + if exists(val): + return val + return d() if isfunction(d) else d + + +# feedforward +class GEGLU(nn.Module): + def __init__(self, dim_in, dim_out): + super().__init__() + self.proj = nn.Linear(dim_in, dim_out * 2) + + def forward(self, x): + x, gate = self.proj(x).chunk(2, dim=-1) + return x * torch.nn.functional.gelu(gate) + + +class FeedForward(nn.Module): + def __init__(self, dim, dim_out=None, mult=4, glu=False, dropout=0.): + super().__init__() + inner_dim = int(dim * mult) + dim_out = default(dim_out, dim) + project_in = nn.Sequential( + nn.Linear(dim, inner_dim), + nn.GELU() + ) if not glu else GEGLU(dim, inner_dim) + + self.net = nn.Sequential( + project_in, + nn.Dropout(dropout), + nn.Linear(inner_dim, dim_out) + ) + + def forward(self, x): + return self.net(x) + + +class GatedCrossAttentionDense(nn.Module): + def __init__(self, query_dim, context_dim, n_heads, d_head): + super().__init__() + + self.attn = CrossAttention( + query_dim=query_dim, + context_dim=context_dim, + heads=n_heads, + dim_head=d_head) + self.ff = FeedForward(query_dim, glu=True) + + self.norm1 = nn.LayerNorm(query_dim) + self.norm2 = nn.LayerNorm(query_dim) + + self.register_parameter('alpha_attn', nn.Parameter(torch.tensor(0.))) + self.register_parameter('alpha_dense', nn.Parameter(torch.tensor(0.))) + + # this can be useful: we can externally change magnitude of tanh(alpha) + # for example, when it is set to 0, then the entire model is same as + # original one + self.scale = 1 + + def forward(self, x, objs): + + x = x + self.scale * \ + torch.tanh(self.alpha_attn) * self.attn(self.norm1(x), objs, objs) + x = x + self.scale * \ + torch.tanh(self.alpha_dense) * self.ff(self.norm2(x)) + + return x + + +class GatedSelfAttentionDense(nn.Module): + def __init__(self, query_dim, context_dim, n_heads, d_head): + super().__init__() + + # we need a linear projection since we need cat visual feature and obj + # feature + self.linear = nn.Linear(context_dim, query_dim) + + self.attn = CrossAttention( + query_dim=query_dim, + context_dim=query_dim, + heads=n_heads, + dim_head=d_head) + self.ff = FeedForward(query_dim, glu=True) + + self.norm1 = nn.LayerNorm(query_dim) + self.norm2 = nn.LayerNorm(query_dim) + + self.register_parameter('alpha_attn', nn.Parameter(torch.tensor(0.))) + self.register_parameter('alpha_dense', nn.Parameter(torch.tensor(0.))) + + # this can be useful: we can externally change magnitude of tanh(alpha) + # for example, when it is set to 0, then the entire model is same as + # original one + self.scale = 1 + + def forward(self, x, objs): + + N_visual = x.shape[1] + objs = self.linear(objs) + + x = x + self.scale * torch.tanh(self.alpha_attn) * self.attn( + self.norm1(torch.cat([x, objs], dim=1)))[:, 0:N_visual, :] + x = x + self.scale * \ + torch.tanh(self.alpha_dense) * self.ff(self.norm2(x)) + + return x + + +class GatedSelfAttentionDense2(nn.Module): + def __init__(self, query_dim, context_dim, n_heads, d_head): + super().__init__() + + # we need a linear projection since we need cat visual feature and obj + # feature + self.linear = nn.Linear(context_dim, query_dim) + + self.attn = CrossAttention( + query_dim=query_dim, context_dim=query_dim, dim_head=d_head) + self.ff = FeedForward(query_dim, glu=True) + + self.norm1 = nn.LayerNorm(query_dim) + self.norm2 = nn.LayerNorm(query_dim) + + self.register_parameter('alpha_attn', nn.Parameter(torch.tensor(0.))) + self.register_parameter('alpha_dense', nn.Parameter(torch.tensor(0.))) + + # this can be useful: we can externally change magnitude of tanh(alpha) + # for example, when it is set to 0, then the entire model is same as + # original one + self.scale = 1 + + def forward(self, x, objs): + + B, N_visual, _ = x.shape + B, N_ground, _ = objs.shape + + objs = self.linear(objs) + + # sanity check + size_v = math.sqrt(N_visual) + size_g = math.sqrt(N_ground) + assert int(size_v) == size_v, "Visual tokens must be square rootable" + assert int(size_g) == size_g, "Grounding tokens must be square rootable" + size_v = int(size_v) + size_g = int(size_g) + + # select grounding token and resize it to visual token size as residual + out = self.attn(self.norm1(torch.cat([x, objs], dim=1)))[ + :, N_visual:, :] + out = out.permute(0, 2, 1).reshape(B, -1, size_g, size_g) + out = torch.nn.functional.interpolate( + out, (size_v, size_v), mode='bicubic') + residual = out.reshape(B, -1, N_visual).permute(0, 2, 1) + + # add residual to visual feature + x = x + self.scale * torch.tanh(self.alpha_attn) * residual + x = x + self.scale * \ + torch.tanh(self.alpha_dense) * self.ff(self.norm2(x)) + + return x + + +class FourierEmbedder(): + def __init__(self, num_freqs=64, temperature=100): + + self.num_freqs = num_freqs + self.temperature = temperature + self.freq_bands = temperature ** (torch.arange(num_freqs) / num_freqs) + + @torch.no_grad() + def __call__(self, x, cat_dim=-1): + "x: arbitrary shape of tensor. dim: cat dim" + out = [] + for freq in self.freq_bands: + out.append(torch.sin(freq * x)) + out.append(torch.cos(freq * x)) + return torch.cat(out, cat_dim) + + +class PositionNet(nn.Module): + def __init__(self, in_dim, out_dim, fourier_freqs=8): + super().__init__() + self.in_dim = in_dim + self.out_dim = out_dim + + self.fourier_embedder = FourierEmbedder(num_freqs=fourier_freqs) + self.position_dim = fourier_freqs * 2 * 4 # 2 is sin&cos, 4 is xyxy + + self.linears = nn.Sequential( + nn.Linear(self.in_dim + self.position_dim, 512), + nn.SiLU(), + nn.Linear(512, 512), + nn.SiLU(), + nn.Linear(512, out_dim), + ) + + self.null_positive_feature = torch.nn.Parameter( + torch.zeros([self.in_dim])) + self.null_position_feature = torch.nn.Parameter( + torch.zeros([self.position_dim])) + + def forward(self, boxes, masks, positive_embeddings): + B, N, _ = boxes.shape + masks = masks.unsqueeze(-1) + + # embedding position (it may includes padding as placeholder) + xyxy_embedding = self.fourier_embedder(boxes) # B*N*4 --> B*N*C + + # learnable null embedding + positive_null = self.null_positive_feature.view(1, 1, -1) + xyxy_null = self.null_position_feature.view(1, 1, -1) + + # replace padding with learnable null embedding + positive_embeddings = positive_embeddings * \ + masks + (1 - masks) * positive_null + xyxy_embedding = xyxy_embedding * masks + (1 - masks) * xyxy_null + + objs = self.linears( + torch.cat([positive_embeddings, xyxy_embedding], dim=-1)) + assert objs.shape == torch.Size([B, N, self.out_dim]) + return objs + + +class Gligen(nn.Module): + def __init__(self, modules, position_net, key_dim): + super().__init__() + self.module_list = nn.ModuleList(modules) + self.position_net = position_net + self.key_dim = key_dim + self.max_objs = 30 + + def _set_position(self, boxes, masks, positive_embeddings): + objs = self.position_net(boxes, masks, positive_embeddings) + + def func(key, x): + module = self.module_list[key] + return module(x, objs) + return func + + def set_position(self, latent_image_shape, position_params, device): + batch, c, h, w = latent_image_shape + masks = torch.zeros([self.max_objs], device="cpu") + boxes = [] + positive_embeddings = [] + for p in position_params: + x1 = (p[4]) / w + y1 = (p[3]) / h + x2 = (p[4] + p[2]) / w + y2 = (p[3] + p[1]) / h + masks[len(boxes)] = 1.0 + boxes += [torch.tensor((x1, y1, x2, y2)).unsqueeze(0)] + positive_embeddings += [p[0]] + append_boxes = [] + append_conds = [] + if len(boxes) < self.max_objs: + append_boxes = [torch.zeros( + [self.max_objs - len(boxes), 4], device="cpu")] + append_conds = [torch.zeros( + [self.max_objs - len(boxes), self.key_dim], device="cpu")] + + box_out = torch.cat( + boxes + append_boxes).unsqueeze(0).repeat(batch, 1, 1) + masks = masks.unsqueeze(0).repeat(batch, 1) + conds = torch.cat(positive_embeddings + + append_conds).unsqueeze(0).repeat(batch, 1, 1) + return self._set_position( + box_out.to(device), + masks.to(device), + conds.to(device)) + + def set_empty(self, latent_image_shape, device): + batch, c, h, w = latent_image_shape + masks = torch.zeros([self.max_objs], device="cpu").repeat(batch, 1) + box_out = torch.zeros([self.max_objs, 4], + device="cpu").repeat(batch, 1, 1) + conds = torch.zeros([self.max_objs, self.key_dim], + device="cpu").repeat(batch, 1, 1) + return self._set_position( + box_out.to(device), + masks.to(device), + conds.to(device)) + + def cleanup(self): + pass + + def get_models(self): + return [self] + +def load_gligen(sd): + sd_k = sd.keys() + output_list = [] + key_dim = 768 + for a in ["input_blocks", "middle_block", "output_blocks"]: + for b in range(20): + k_temp = filter(lambda k: "{}.{}.".format(a, b) + in k and ".fuser." in k, sd_k) + k_temp = map(lambda k: (k, k.split(".fuser.")[-1]), k_temp) + + n_sd = {} + for k in k_temp: + n_sd[k[1]] = sd[k[0]] + if len(n_sd) > 0: + query_dim = n_sd["linear.weight"].shape[0] + key_dim = n_sd["linear.weight"].shape[1] + + if key_dim == 768: # SD1.x + n_heads = 8 + d_head = query_dim // n_heads + else: + d_head = 64 + n_heads = query_dim // d_head + + gated = GatedSelfAttentionDense( + query_dim, key_dim, n_heads, d_head) + gated.load_state_dict(n_sd, strict=False) + output_list.append(gated) + + if "position_net.null_positive_feature" in sd_k: + in_dim = sd["position_net.null_positive_feature"].shape[0] + out_dim = sd["position_net.linears.4.weight"].shape[0] + + class WeightsLoader(torch.nn.Module): + pass + w = WeightsLoader() + w.position_net = PositionNet(in_dim, out_dim) + w.load_state_dict(sd, strict=False) + + gligen = Gligen(output_list, w.position_net, key_dim) + return gligen diff --git a/comfy/ldm/modules/attention.py b/comfy/ldm/modules/attention.py index c83387348..98dbda635 100644 --- a/comfy/ldm/modules/attention.py +++ b/comfy/ldm/modules/attention.py @@ -510,6 +510,14 @@ class BasicTransformerBlock(nn.Module): return checkpoint(self._forward, (x, context, transformer_options), self.parameters(), self.checkpoint) def _forward(self, x, context=None, transformer_options={}): + current_index = None + if "current_index" in transformer_options: + current_index = transformer_options["current_index"] + if "patches" in transformer_options: + transformer_patches = transformer_options["patches"] + else: + transformer_patches = {} + n = self.norm1(x) if "tomesd" in transformer_options: m, u = tomesd.get_functions(x, transformer_options["tomesd"]["ratio"], transformer_options["original_shape"]) @@ -518,11 +526,19 @@ class BasicTransformerBlock(nn.Module): n = self.attn1(n, context=context if self.disable_self_attn else None) x += n + if "middle_patch" in transformer_patches: + patch = transformer_patches["middle_patch"] + for p in patch: + x = p(current_index, x) + n = self.norm2(x) n = self.attn2(n, context=context) x += n x = self.ff(self.norm3(x)) + x + + if current_index is not None: + transformer_options["current_index"] += 1 return x diff --git a/comfy/ldm/modules/diffusionmodules/openaimodel.py b/comfy/ldm/modules/diffusionmodules/openaimodel.py index 8a4e8b3e1..4c69c8567 100644 --- a/comfy/ldm/modules/diffusionmodules/openaimodel.py +++ b/comfy/ldm/modules/diffusionmodules/openaimodel.py @@ -782,6 +782,8 @@ class UNetModel(nn.Module): :return: an [N x C x ...] Tensor of outputs. """ transformer_options["original_shape"] = list(x.shape) + transformer_options["current_index"] = 0 + assert (y is not None) == ( self.num_classes is not None ), "must specify y if and only if the model is class-conditional" diff --git a/comfy/model_management.py b/comfy/model_management.py index 76455e4a2..a0d1313d2 100644 --- a/comfy/model_management.py +++ b/comfy/model_management.py @@ -176,7 +176,7 @@ def load_model_gpu(model): model_accelerated = True return current_loaded_model -def load_controlnet_gpu(models): +def load_controlnet_gpu(control_models): global current_gpu_controlnets global vram_state if vram_state == VRAMState.CPU: @@ -186,6 +186,10 @@ def load_controlnet_gpu(models): #don't load controlnets like this if low vram because they will be loaded right before running and unloaded right after return + models = [] + for m in control_models: + models += m.get_models() + for m in current_gpu_controlnets: if m not in models: m.cpu() diff --git a/comfy/samplers.py b/comfy/samplers.py index 05af6fe88..31968e185 100644 --- a/comfy/samplers.py +++ b/comfy/samplers.py @@ -70,7 +70,21 @@ def sampling_function(model_function, x, timestep, uncond, cond, cond_scale, con control = None if 'control' in cond[1]: control = cond[1]['control'] - return (input_x, mult, conditionning, area, control) + + patches = None + if 'gligen' in cond[1]: + gligen = cond[1]['gligen'] + patches = {} + gligen_type = gligen[0] + gligen_model = gligen[1] + if gligen_type == "position": + gligen_patch = gligen_model.set_position(input_x.shape, gligen[2], input_x.device) + else: + gligen_patch = gligen_model.set_empty(input_x.shape, input_x.device) + + patches['middle_patch'] = [gligen_patch] + + return (input_x, mult, conditionning, area, control, patches) def cond_equal_size(c1, c2): if c1 is c2: @@ -91,12 +105,21 @@ def sampling_function(model_function, x, timestep, uncond, cond, cond_scale, con def can_concat_cond(c1, c2): if c1[0].shape != c2[0].shape: return False + + #control if (c1[4] is None) != (c2[4] is None): return False if c1[4] is not None: if c1[4] is not c2[4]: return False + #patches + if (c1[5] is None) != (c2[5] is None): + return False + if (c1[5] is not None): + if c1[5] is not c2[5]: + return False + return cond_equal_size(c1[2], c2[2]) def cond_cat(c_list): @@ -166,6 +189,7 @@ def sampling_function(model_function, x, timestep, uncond, cond, cond_scale, con cond_or_uncond = [] area = [] control = None + patches = None for x in to_batch: o = to_run.pop(x) p = o[0] @@ -175,6 +199,7 @@ def sampling_function(model_function, x, timestep, uncond, cond, cond_scale, con area += [p[3]] cond_or_uncond += [o[1]] control = p[4] + patches = p[5] batch_chunks = len(cond_or_uncond) input_x = torch.cat(input_x) @@ -184,8 +209,14 @@ def sampling_function(model_function, x, timestep, uncond, cond, cond_scale, con if control is not None: c['control'] = control.get_control(input_x, timestep_, c['c_crossattn'], len(cond_or_uncond)) + transformer_options = {} if 'transformer_options' in model_options: - c['transformer_options'] = model_options['transformer_options'] + transformer_options = model_options['transformer_options'].copy() + + if patches is not None: + transformer_options["patches"] = patches + + c['transformer_options'] = transformer_options output = model_function(input_x, timestep_, cond=c).chunk(batch_chunks) del input_x @@ -309,8 +340,7 @@ def create_cond_with_same_area_if_none(conds, c): n = c[1].copy() conds += [[smallest[0], n]] - -def apply_control_net_to_equal_area(conds, uncond): +def apply_empty_x_to_equal_area(conds, uncond, name, uncond_fill_func): cond_cnets = [] cond_other = [] uncond_cnets = [] @@ -318,15 +348,15 @@ def apply_control_net_to_equal_area(conds, uncond): for t in range(len(conds)): x = conds[t] if 'area' not in x[1]: - if 'control' in x[1] and x[1]['control'] is not None: - cond_cnets.append(x[1]['control']) + if name in x[1] and x[1][name] is not None: + cond_cnets.append(x[1][name]) else: cond_other.append((x, t)) for t in range(len(uncond)): x = uncond[t] if 'area' not in x[1]: - if 'control' in x[1] and x[1]['control'] is not None: - uncond_cnets.append(x[1]['control']) + if name in x[1] and x[1][name] is not None: + uncond_cnets.append(x[1][name]) else: uncond_other.append((x, t)) @@ -336,15 +366,16 @@ def apply_control_net_to_equal_area(conds, uncond): for x in range(len(cond_cnets)): temp = uncond_other[x % len(uncond_other)] o = temp[0] - if 'control' in o[1] and o[1]['control'] is not None: + if name in o[1] and o[1][name] is not None: n = o[1].copy() - n['control'] = cond_cnets[x] + n[name] = uncond_fill_func(cond_cnets, x) uncond += [[o[0], n]] else: n = o[1].copy() - n['control'] = cond_cnets[x] + n[name] = uncond_fill_func(cond_cnets, x) uncond[temp[1]] = [o[0], n] + def encode_adm(noise_augmentor, conds, batch_size, device): for t in range(len(conds)): x = conds[t] @@ -378,6 +409,7 @@ def encode_adm(noise_augmentor, conds, batch_size, device): return conds + class KSampler: SCHEDULERS = ["karras", "normal", "simple", "ddim_uniform"] SAMPLERS = ["euler", "euler_ancestral", "heun", "dpm_2", "dpm_2_ancestral", @@ -466,7 +498,8 @@ class KSampler: for c in negative: create_cond_with_same_area_if_none(positive, c) - apply_control_net_to_equal_area(positive, negative) + apply_empty_x_to_equal_area(positive, negative, 'control', lambda cond_cnets, x: cond_cnets[x]) + apply_empty_x_to_equal_area(positive, negative, 'gligen', lambda cond_cnets, x: cond_cnets[x]) if self.model.model.diffusion_model.dtype == torch.float16: precision_scope = torch.autocast diff --git a/comfy/sd.py b/comfy/sd.py index 1d7774742..211acd70e 100644 --- a/comfy/sd.py +++ b/comfy/sd.py @@ -13,6 +13,7 @@ from .t2i_adapter import adapter from . import utils from . import clip_vision +from . import gligen def load_model_weights(model, sd, verbose=False, load_state_dict_to=[]): m, u = model.load_state_dict(sd, strict=False) @@ -378,7 +379,7 @@ class CLIP: def tokenize(self, text, return_word_ids=False): return self.tokenizer.tokenize_with_weights(text, return_word_ids) - def encode_from_tokens(self, tokens): + def encode_from_tokens(self, tokens, return_pooled=False): if self.layer_idx is not None: self.cond_stage_model.clip_layer(self.layer_idx) try: @@ -388,6 +389,10 @@ class CLIP: except Exception as e: self.patcher.unpatch_model() raise e + if return_pooled: + eos_token_index = max(range(len(tokens[0])), key=tokens[0].__getitem__) + pooled = cond[:, eos_token_index] + return cond, pooled return cond def encode(self, text): @@ -564,10 +569,10 @@ class ControlNet: c.strength = self.strength return c - def get_control_models(self): + def get_models(self): out = [] if self.previous_controlnet is not None: - out += self.previous_controlnet.get_control_models() + out += self.previous_controlnet.get_models() out.append(self.control_model) return out @@ -737,10 +742,10 @@ class T2IAdapter: del self.cond_hint self.cond_hint = None - def get_control_models(self): + def get_models(self): out = [] if self.previous_controlnet is not None: - out += self.previous_controlnet.get_control_models() + out += self.previous_controlnet.get_models() return out def load_t2i_adapter(t2i_data): @@ -787,6 +792,13 @@ def load_clip(ckpt_path, embedding_directory=None): clip.load_from_state_dict(clip_data) return clip +def load_gligen(ckpt_path): + data = utils.load_torch_file(ckpt_path) + model = gligen.load_gligen(data) + if model_management.should_use_fp16(): + model = model.half() + return model + def load_checkpoint(config_path, ckpt_path, output_vae=True, output_clip=True, embedding_directory=None): with open(config_path, 'r') as stream: config = yaml.safe_load(stream) diff --git a/folder_paths.py b/folder_paths.py index 61f446c96..3c4ad3711 100644 --- a/folder_paths.py +++ b/folder_paths.py @@ -26,6 +26,8 @@ folder_names_and_paths["embeddings"] = ([os.path.join(models_dir, "embeddings")] folder_names_and_paths["diffusers"] = ([os.path.join(models_dir, "diffusers")], ["folder"]) folder_names_and_paths["controlnet"] = ([os.path.join(models_dir, "controlnet"), os.path.join(models_dir, "t2i_adapter")], supported_pt_extensions) +folder_names_and_paths["gligen"] = ([os.path.join(models_dir, "gligen")], supported_pt_extensions) + folder_names_and_paths["upscale_models"] = ([os.path.join(models_dir, "upscale_models")], supported_pt_extensions) folder_names_and_paths["custom_nodes"] = ([os.path.join(base_path, "custom_nodes")], []) diff --git a/models/gligen/put_gligen_models_here b/models/gligen/put_gligen_models_here new file mode 100644 index 000000000..e69de29bb diff --git a/nodes.py b/nodes.py index 06b69f453..8555f272a 100644 --- a/nodes.py +++ b/nodes.py @@ -490,6 +490,51 @@ class unCLIPConditioning: c.append(n) return (c, ) +class GLIGENLoader: + @classmethod + def INPUT_TYPES(s): + return {"required": { "gligen_name": (folder_paths.get_filename_list("gligen"), )}} + + RETURN_TYPES = ("GLIGEN",) + FUNCTION = "load_gligen" + + CATEGORY = "_for_testing/gligen" + + def load_gligen(self, gligen_name): + gligen_path = folder_paths.get_full_path("gligen", gligen_name) + gligen = comfy.sd.load_gligen(gligen_path) + return (gligen,) + +class GLIGENTextBoxApply: + @classmethod + def INPUT_TYPES(s): + return {"required": {"conditioning_to": ("CONDITIONING", ), + "clip": ("CLIP", ), + "gligen_textbox_model": ("GLIGEN", ), + "text": ("STRING", {"multiline": True}), + "width": ("INT", {"default": 64, "min": 8, "max": MAX_RESOLUTION, "step": 8}), + "height": ("INT", {"default": 64, "min": 8, "max": MAX_RESOLUTION, "step": 8}), + "x": ("INT", {"default": 0, "min": 0, "max": MAX_RESOLUTION, "step": 8}), + "y": ("INT", {"default": 0, "min": 0, "max": MAX_RESOLUTION, "step": 8}), + }} + RETURN_TYPES = ("CONDITIONING",) + FUNCTION = "append" + + CATEGORY = "_for_testing/gligen" + + def append(self, conditioning_to, clip, gligen_textbox_model, text, width, height, x, y): + c = [] + cond, cond_pooled = clip.encode_from_tokens(clip.tokenize(text), return_pooled=True) + for t in conditioning_to: + n = [t[0], t[1].copy()] + position_params = [(cond_pooled, height // 8, width // 8, y // 8, x // 8)] + prev = [] + if "gligen" in n[1]: + prev = n[1]['gligen'][2] + + n[1]['gligen'] = ("position", gligen_textbox_model, prev + position_params) + c.append(n) + return (c, ) class EmptyLatentImage: def __init__(self, device="cpu"): @@ -731,27 +776,30 @@ def common_ksampler(model, seed, steps, cfg, sampler_name, scheduler, positive, negative_copy = [] control_nets = [] + def get_models(cond): + models = [] + for c in cond: + if 'control' in c[1]: + models += [c[1]['control']] + if 'gligen' in c[1]: + models += [c[1]['gligen'][1]] + return models + for p in positive: t = p[0] if t.shape[0] < noise.shape[0]: t = torch.cat([t] * noise.shape[0]) t = t.to(device) - if 'control' in p[1]: - control_nets += [p[1]['control']] positive_copy += [[t] + p[1:]] for n in negative: t = n[0] if t.shape[0] < noise.shape[0]: t = torch.cat([t] * noise.shape[0]) t = t.to(device) - if 'control' in n[1]: - control_nets += [n[1]['control']] negative_copy += [[t] + n[1:]] - control_net_models = [] - for x in control_nets: - control_net_models += x.get_control_models() - comfy.model_management.load_controlnet_gpu(control_net_models) + models = get_models(positive) + get_models(negative) + comfy.model_management.load_controlnet_gpu(models) if sampler_name in comfy.samplers.KSampler.SAMPLERS: sampler = comfy.samplers.KSampler(real_model, steps=steps, device=device, sampler=sampler_name, scheduler=scheduler, denoise=denoise, model_options=model.model_options) @@ -761,8 +809,8 @@ def common_ksampler(model, seed, steps, cfg, sampler_name, scheduler, positive, samples = sampler.sample(noise, positive_copy, negative_copy, cfg=cfg, latent_image=latent_image, start_step=start_step, last_step=last_step, force_full_denoise=force_full_denoise, denoise_mask=noise_mask) samples = samples.cpu() - for c in control_nets: - c.cleanup() + for m in models: + m.cleanup() out = latent.copy() out["samples"] = samples @@ -1128,6 +1176,9 @@ NODE_CLASS_MAPPINGS = { "VAEEncodeTiled": VAEEncodeTiled, "TomePatchModel": TomePatchModel, "unCLIPCheckpointLoader": unCLIPCheckpointLoader, + "GLIGENLoader": GLIGENLoader, + "GLIGENTextBoxApply": GLIGENTextBoxApply, + "CheckpointLoader": CheckpointLoader, "DiffusersLoader": DiffusersLoader, } From 781b724ac667e42900c331988f356a85670c0ec5 Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Wed, 19 Apr 2023 11:30:18 -0400 Subject: [PATCH 17/45] Add GLIGEN model link to colab. --- notebooks/comfyui_colab.ipynb | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/notebooks/comfyui_colab.ipynb b/notebooks/comfyui_colab.ipynb index c088de89c..c1982d8be 100644 --- a/notebooks/comfyui_colab.ipynb +++ b/notebooks/comfyui_colab.ipynb @@ -138,6 +138,11 @@ "# Controlnet Preprocessor nodes by Fannovel16\n", "#!cd custom_nodes && git clone https://github.com/Fannovel16/comfy_controlnet_preprocessors; cd comfy_controlnet_preprocessors && python install.py\n", "\n", + "\n", + "# GLIGEN\n", + "#!wget -c https://huggingface.co/comfyanonymous/GLIGEN_pruned_safetensors/resolve/main/gligen_sd14_textbox_pruned_fp16.safetensors -P ./models/gligen/\n", + "\n", + "\n", "# ESRGAN upscale model\n", "#!wget -c https://huggingface.co/sberbank-ai/Real-ESRGAN/resolve/main/RealESRGAN_x2.pth -P ./models/upscale_models/\n", "#!wget -c https://huggingface.co/sberbank-ai/Real-ESRGAN/resolve/main/RealESRGAN_x4.pth -P ./models/upscale_models/\n", From 2d546d510d1f7919bbae3ac08108e0d05e9c0bae Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Wed, 19 Apr 2023 11:47:49 -0400 Subject: [PATCH 18/45] Add gligen entry to extra_model_paths example. --- extra_model_paths.yaml.example | 1 + 1 file changed, 1 insertion(+) diff --git a/extra_model_paths.yaml.example b/extra_model_paths.yaml.example index f421f54dc..ac1ffe9d2 100644 --- a/extra_model_paths.yaml.example +++ b/extra_model_paths.yaml.example @@ -18,6 +18,7 @@ a111: #other_ui: # base_path: path/to/ui # checkpoints: models/checkpoints +# gligen: models/gligen # custom_nodes: path/custom_nodes From 96b57a9ad6447b95921b91e5f52fb3684f73514f Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Wed, 19 Apr 2023 21:11:38 -0400 Subject: [PATCH 19/45] Don't pass adm to model when it doesn't support it. --- comfy/samplers.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/comfy/samplers.py b/comfy/samplers.py index 31968e185..19ebc97d9 100644 --- a/comfy/samplers.py +++ b/comfy/samplers.py @@ -36,8 +36,8 @@ def sampling_function(model_function, x, timestep, uncond, cond, cond_scale, con strength = cond[1]['strength'] adm_cond = None - if 'adm' in cond[1]: - adm_cond = cond[1]['adm'] + if 'adm_encoded' in cond[1]: + adm_cond = cond[1]['adm_encoded'] input_x = x_in[:,:,area[2]:area[0] + area[2],area[3]:area[1] + area[3]] mult = torch.ones_like(input_x) * strength @@ -405,7 +405,7 @@ def encode_adm(noise_augmentor, conds, batch_size, device): else: adm_out = torch.zeros((1, noise_augmentor.time_embed.dim * 2), device=device) x[1] = x[1].copy() - x[1]["adm"] = torch.cat([adm_out] * batch_size) + x[1]["adm_encoded"] = torch.cat([adm_out] * batch_size) return conds From 94e9798a4b614627805be197aa3da415a1de7ee4 Mon Sep 17 00:00:00 2001 From: omar92 Date: Thu, 20 Apr 2023 06:19:56 +0200 Subject: [PATCH 20/45] when drag from node input or output show all possible nodes that you can connect --- web/extensions/core/slotDefaults.js | 50 ++++++++++++++++++++++------- 1 file changed, 39 insertions(+), 11 deletions(-) diff --git a/web/extensions/core/slotDefaults.js b/web/extensions/core/slotDefaults.js index 0b6a0a150..3ff5fdb06 100644 --- a/web/extensions/core/slotDefaults.js +++ b/web/extensions/core/slotDefaults.js @@ -1,21 +1,49 @@ import { app } from "/scripts/app.js"; - +import { ComfyWidgets } from "/scripts/widgets.js"; // Adds defaults for quickly adding nodes with middle click on the input/output app.registerExtension({ name: "Comfy.SlotDefaults", init() { LiteGraph.middle_click_slot_add_default_node = true; - LiteGraph.slot_types_default_in = { - MODEL: "CheckpointLoaderSimple", - LATENT: "EmptyLatentImage", - VAE: "VAELoader", - }; + }, + async beforeRegisterNodeDef(nodeType, nodeData, app) { + var nodeId = nodeData.name; + var inputs = []; + //if (nodeData["input"]["optional"] != undefined) { + // inputs = Object.assign({}, nodeData["input"]["required"], nodeData["input"]["optional"]); + //} else { + inputs = nodeData["input"]["required"]; //only show required inputs to reduce the mess also not logica to create node with optional inputs + //} + for (const inputKey in inputs) { + var input = (inputs[inputKey]); + //make sure input[0] is a string + if (typeof input[0] !== "string") continue; + + // for (const slotKey in inputs[inputKey]) { + var type = input[0] + if (type in ComfyWidgets) { + var customProperties = input[1] + //console.log(customProperties) + if (!(customProperties?.forceInput)) continue; //ignore widgets that don't force input + } + + if (!(type in LiteGraph.slot_types_default_out)) { + LiteGraph.slot_types_default_out[type] = ["Reroute"]; + } + if (LiteGraph.slot_types_default_out[type].includes(nodeId)) continue; + LiteGraph.slot_types_default_out[type].push(nodeId); + // } + } + + var outputs = nodeData["output"]; + for (const key in outputs) { + var type = outputs[key]; + if (!(type in LiteGraph.slot_types_default_in)) { + LiteGraph.slot_types_default_in[type] = ["Reroute"];// ["Reroute", "Primitive"]; primitive doesn't always work :'() + } + LiteGraph.slot_types_default_in[type].push(nodeId); + } - LiteGraph.slot_types_default_out = { - LATENT: "VAEDecode", - IMAGE: "SaveImage", - CLIP: "CLIPTextEncode", - }; }, }); From 5229c1f972b4130d5d0ddc19362604c6ec57d1fd Mon Sep 17 00:00:00 2001 From: omar92 Date: Thu, 20 Apr 2023 21:13:14 +0200 Subject: [PATCH 21/45] add option on the settings to change the number of the suggestions --- web/extensions/core/slotDefaults.js | 61 ++++++++++++++++++++--------- 1 file changed, 42 insertions(+), 19 deletions(-) diff --git a/web/extensions/core/slotDefaults.js b/web/extensions/core/slotDefaults.js index 3ff5fdb06..04baadc6a 100644 --- a/web/extensions/core/slotDefaults.js +++ b/web/extensions/core/slotDefaults.js @@ -4,46 +4,69 @@ import { ComfyWidgets } from "/scripts/widgets.js"; app.registerExtension({ name: "Comfy.SlotDefaults", + suggestionsNumber: null, init() { LiteGraph.middle_click_slot_add_default_node = true; + this.suggestionsNumber = app.ui.settings.addSetting({ + id: "Comfy.NodeSuggestions.number", + name: "number of nodes suggestions", + type: "slider", + attrs: { + min: 1, + max: 100, + step: 1, + }, + defaultValue: 5, + onChange: (newVal, oldVal) => { + this.setDefaults(newVal); + } + }); }, + slot_types_default_out: {}, + slot_types_default_in: {}, async beforeRegisterNodeDef(nodeType, nodeData, app) { - var nodeId = nodeData.name; + var nodeId = nodeData.name; var inputs = []; - //if (nodeData["input"]["optional"] != undefined) { - // inputs = Object.assign({}, nodeData["input"]["required"], nodeData["input"]["optional"]); - //} else { - inputs = nodeData["input"]["required"]; //only show required inputs to reduce the mess also not logica to create node with optional inputs - //} + inputs = nodeData["input"]["required"]; //only show required inputs to reduce the mess also not logical to create node with optional inputs for (const inputKey in inputs) { var input = (inputs[inputKey]); - //make sure input[0] is a string if (typeof input[0] !== "string") continue; - // for (const slotKey in inputs[inputKey]) { var type = input[0] if (type in ComfyWidgets) { var customProperties = input[1] - //console.log(customProperties) if (!(customProperties?.forceInput)) continue; //ignore widgets that don't force input } - if (!(type in LiteGraph.slot_types_default_out)) { - LiteGraph.slot_types_default_out[type] = ["Reroute"]; + if (!(type in this.slot_types_default_out)) { + this.slot_types_default_out[type] = ["Reroute"]; } - if (LiteGraph.slot_types_default_out[type].includes(nodeId)) continue; - LiteGraph.slot_types_default_out[type].push(nodeId); - // } - } + if (this.slot_types_default_out[type].includes(nodeId)) continue; + this.slot_types_default_out[type].push(nodeId); + } var outputs = nodeData["output"]; for (const key in outputs) { var type = outputs[key]; - if (!(type in LiteGraph.slot_types_default_in)) { - LiteGraph.slot_types_default_in[type] = ["Reroute"];// ["Reroute", "Primitive"]; primitive doesn't always work :'() + if (!(type in this.slot_types_default_in)) { + this.slot_types_default_in[type] = ["Reroute"];// ["Reroute", "Primitive"]; primitive doesn't always work :'() } - LiteGraph.slot_types_default_in[type].push(nodeId); - } + this.slot_types_default_in[type].push(nodeId); + } + var maxNum = this.suggestionsNumber ? this.suggestionsNumber.value : 5; + this.setDefaults(maxNum); }, + setDefaults(maxNum) { + + LiteGraph.slot_types_default_out = {}; + LiteGraph.slot_types_default_in = {}; + + for (const type in this.slot_types_default_out) { + LiteGraph.slot_types_default_out[type] = this.slot_types_default_out[type].slice(0, maxNum); + } + for (const type in this.slot_types_default_in) { + LiteGraph.slot_types_default_in[type] = this.slot_types_default_in[type].slice(0, maxNum); + } + } }); From 31e60adb2802874a5889623a83149faa32924a98 Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Thu, 20 Apr 2023 17:30:10 -0400 Subject: [PATCH 22/45] Add GLIGEN example to README. --- README.md | 1 + nodes.py | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index be2cb8ec5..bf16006bf 100644 --- a/README.md +++ b/README.md @@ -25,6 +25,7 @@ This ui will let you design and execute advanced stable diffusion pipelines usin - [ControlNet and T2I-Adapter](https://comfyanonymous.github.io/ComfyUI_examples/controlnet/) - [Upscale Models (ESRGAN, ESRGAN variants, SwinIR, Swin2SR, etc...)](https://comfyanonymous.github.io/ComfyUI_examples/upscale_models/) - [unCLIP Models](https://comfyanonymous.github.io/ComfyUI_examples/unclip/) +- [GLIGEN](https://comfyanonymous.github.io/ComfyUI_examples/gligen/) - Starts up very fast. - Works fully offline: will never download anything. - [Config file](extra_model_paths.yaml.example) to set the search paths for models. diff --git a/nodes.py b/nodes.py index 8555f272a..48c3ee9c3 100644 --- a/nodes.py +++ b/nodes.py @@ -498,7 +498,7 @@ class GLIGENLoader: RETURN_TYPES = ("GLIGEN",) FUNCTION = "load_gligen" - CATEGORY = "_for_testing/gligen" + CATEGORY = "loaders" def load_gligen(self, gligen_name): gligen_path = folder_paths.get_full_path("gligen", gligen_name) @@ -520,7 +520,7 @@ class GLIGENTextBoxApply: RETURN_TYPES = ("CONDITIONING",) FUNCTION = "append" - CATEGORY = "_for_testing/gligen" + CATEGORY = "conditioning/gligen" def append(self, conditioning_to, clip, gligen_textbox_model, text, width, height, x, y): c = [] From d2ef3465ca838e528008cb5e20b40d25079d5176 Mon Sep 17 00:00:00 2001 From: missionfloyd Date: Thu, 20 Apr 2023 18:23:51 -0600 Subject: [PATCH 23/45] Improve current word selection --- web/extensions/core/editAttention.js | 25 +++++++++---------------- 1 file changed, 9 insertions(+), 16 deletions(-) diff --git a/web/extensions/core/editAttention.js b/web/extensions/core/editAttention.js index bebc80b12..cc51a04e5 100644 --- a/web/extensions/core/editAttention.js +++ b/web/extensions/core/editAttention.js @@ -89,24 +89,17 @@ app.registerExtension({ end = nearestEnclosure.end; selectedText = inputField.value.substring(start, end); } else { - // Select the current word, find the start and end of the word (first space before and after) - const wordStart = inputField.value.substring(0, start).lastIndexOf(" ") + 1; - const wordEnd = inputField.value.substring(end).indexOf(" "); - // If there is no space after the word, select to the end of the string - if (wordEnd === -1) { - end = inputField.value.length; - } else { - end += wordEnd; + // Select the current word, find the start and end of the word + const delimiters = " .,\\/!?%^*;:{}=-_`~()\r\n\t"; + + while (!delimiters.includes(inputField.value[start - 1]) && start > 0) { + start--; + } + + while (!delimiters.includes(inputField.value[end]) && end < inputField.value.length) { + end++; } - start = wordStart; - // Remove all punctuation at the end and beginning of the word - while (inputField.value[start].match(/[.,\/#!$%\^&\*;:{}=\-_`~()]/)) { - start++; - } - while (inputField.value[end - 1].match(/[.,\/#!$%\^&\*;:{}=\-_`~()]/)) { - end--; - } selectedText = inputField.value.substring(start, end); if (!selectedText) return; } From 907010e0824eeab12c5948e5afa4df6d0934be9a Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Thu, 20 Apr 2023 23:58:25 -0400 Subject: [PATCH 24/45] Remove some useless code. --- comfy/samplers.py | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/comfy/samplers.py b/comfy/samplers.py index 19ebc97d9..15527224e 100644 --- a/comfy/samplers.py +++ b/comfy/samplers.py @@ -7,23 +7,6 @@ from comfy import model_management from .ldm.models.diffusion.ddim import DDIMSampler from .ldm.modules.diffusionmodules.util import make_ddim_timesteps -class CFGDenoiser(torch.nn.Module): - def __init__(self, model): - super().__init__() - self.inner_model = model - - def forward(self, x, sigma, uncond, cond, cond_scale): - if len(uncond[0]) == len(cond[0]) and x.shape[0] * x.shape[2] * x.shape[3] < (96 * 96): #TODO check memory instead - x_in = torch.cat([x] * 2) - sigma_in = torch.cat([sigma] * 2) - cond_in = torch.cat([uncond, cond]) - uncond, cond = self.inner_model(x_in, sigma_in, cond=cond_in).chunk(2) - else: - cond = self.inner_model(x, sigma, cond=cond) - uncond = self.inner_model(x, sigma, cond=uncond) - return uncond + (cond - uncond) * cond_scale - - #The main sampling function shared by all the samplers #Returns predicted noise def sampling_function(model_function, x, timestep, uncond, cond, cond_scale, cond_concat=None, model_options={}): From 98ae4bbfdee1ea9da62e3d22a3c6428032a78398 Mon Sep 17 00:00:00 2001 From: missionfloyd Date: Thu, 20 Apr 2023 23:55:20 -0600 Subject: [PATCH 25/45] Remove brackets if weight == 1 --- web/extensions/core/editAttention.js | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/web/extensions/core/editAttention.js b/web/extensions/core/editAttention.js index cc51a04e5..b937bb103 100644 --- a/web/extensions/core/editAttention.js +++ b/web/extensions/core/editAttention.js @@ -128,8 +128,13 @@ app.registerExtension({ // Increment the weight const weightDelta = event.key === "ArrowUp" ? delta : -delta; - const updatedText = selectedText.replace(/(.*:)(\d+(\.\d+)?)(.*)/, (match, prefix, weight, _, suffix) => { - return prefix + incrementWeight(weight, weightDelta) + suffix; + const updatedText = selectedText.replace(/\((.*):(\d+(?:\.\d+)?)\)/, (match, text, weight) => { + weight = incrementWeight(weight, weightDelta); + if (weight == 1) { + return text; + } else { + return `(${text}:${weight})`; + } }); inputField.setRangeText(updatedText, start, end, "select"); From 989acd769a6b5f3a5d6e3cd03fafbd9668c2dbdf Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Fri, 21 Apr 2023 23:43:38 -0400 Subject: [PATCH 26/45] Cleanup. --- web/extensions/core/slotDefaults.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/web/extensions/core/slotDefaults.js b/web/extensions/core/slotDefaults.js index 04baadc6a..3ec605900 100644 --- a/web/extensions/core/slotDefaults.js +++ b/web/extensions/core/slotDefaults.js @@ -54,7 +54,7 @@ app.registerExtension({ this.slot_types_default_in[type].push(nodeId); } - var maxNum = this.suggestionsNumber ? this.suggestionsNumber.value : 5; + var maxNum = this.suggestionsNumber.value; this.setDefaults(maxNum); }, setDefaults(maxNum) { From 6908f9c94992b32fbb96be0f6cd8c5b362d72a77 Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Sat, 22 Apr 2023 14:30:39 -0400 Subject: [PATCH 27/45] This makes pytorch2.0 attention perform a bit faster. --- comfy/ldm/modules/attention.py | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/comfy/ldm/modules/attention.py b/comfy/ldm/modules/attention.py index 98dbda635..c27d032a3 100644 --- a/comfy/ldm/modules/attention.py +++ b/comfy/ldm/modules/attention.py @@ -455,11 +455,7 @@ class CrossAttentionPytorch(nn.Module): b, _, _ = q.shape q, k, v = map( - lambda t: t.unsqueeze(3) - .reshape(b, t.shape[1], self.heads, self.dim_head) - .permute(0, 2, 1, 3) - .reshape(b * self.heads, t.shape[1], self.dim_head) - .contiguous(), + lambda t: t.view(b, -1, self.heads, self.dim_head).transpose(1, 2), (q, k, v), ) @@ -468,10 +464,7 @@ class CrossAttentionPytorch(nn.Module): if exists(mask): raise NotImplementedError out = ( - out.unsqueeze(0) - .reshape(b, self.heads, out.shape[1], self.dim_head) - .permute(0, 2, 1, 3) - .reshape(b, out.shape[1], self.heads * self.dim_head) + out.transpose(1, 2).reshape(b, -1, self.heads * self.dim_head) ) return self.to_out(out) From ee030d281bbd25d385ba9ca10badb66b487cca21 Mon Sep 17 00:00:00 2001 From: Jacob Segal Date: Sat, 22 Apr 2023 16:02:26 -0700 Subject: [PATCH 28/45] Add support for multiple unique inpainting masks This enables workflows like "Inpaint at full resolution" when using batch sizes greater than 1. --- nodes.py | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/nodes.py b/nodes.py index 48c3ee9c3..9335d5243 100644 --- a/nodes.py +++ b/nodes.py @@ -171,24 +171,28 @@ class VAEEncodeForInpaint: def encode(self, vae, pixels, mask): x = (pixels.shape[1] // 64) * 64 y = (pixels.shape[2] // 64) * 64 - mask = torch.nn.functional.interpolate(mask[None,None,], size=(pixels.shape[1], pixels.shape[2]), mode="bilinear")[0][0] + if len(mask.shape) < 3: + mask = mask.unsqueeze(0).unsqueeze(0) + elif len(mask.shape) < 4: + mask = mask.unsqueeze(1) + mask = torch.nn.functional.interpolate(mask, size=(pixels.shape[1], pixels.shape[2]), mode="bilinear") pixels = pixels.clone() if pixels.shape[1] != x or pixels.shape[2] != y: pixels = pixels[:,:x,:y,:] - mask = mask[:x,:y] + mask = mask[:,:x,:y,:] #grow mask by a few pixels to keep things seamless in latent space kernel_tensor = torch.ones((1, 1, 6, 6)) - mask_erosion = torch.clamp(torch.nn.functional.conv2d((mask.round())[None], kernel_tensor, padding=3), 0, 1) - m = (1.0 - mask.round()) + mask_erosion = torch.clamp(torch.nn.functional.conv2d(mask.round(), kernel_tensor, padding=3), 0, 1) + m = (1.0 - mask.round()).squeeze(1) for i in range(3): pixels[:,:,:,i] -= 0.5 pixels[:,:,:,i] *= m pixels[:,:,:,i] += 0.5 t = vae.encode(pixels) - return ({"samples":t, "noise_mask": (mask_erosion[0][:x,:y].round())}, ) + return ({"samples":t, "noise_mask": (mask_erosion[:,:x,:y,:].round())}, ) class CheckpointLoader: @classmethod @@ -759,10 +763,15 @@ def common_ksampler(model, seed, steps, cfg, sampler_name, scheduler, positive, if "noise_mask" in latent: noise_mask = latent['noise_mask'] - noise_mask = torch.nn.functional.interpolate(noise_mask[None,None,], size=(noise.shape[2], noise.shape[3]), mode="bilinear") + if len(noise_mask.shape) < 3: + noise_mask = noise_mask.unsqueeze(0).unsqueeze(0) + elif len(noise_mask.shape) < 4: + noise_mask = noise_mask.unsqueeze(1) + noise_mask = torch.nn.functional.interpolate(noise_mask, size=(noise.shape[2], noise.shape[3]), mode="bilinear") noise_mask = noise_mask.round() noise_mask = torch.cat([noise_mask] * noise.shape[1], dim=1) - noise_mask = torch.cat([noise_mask] * noise.shape[0]) + if noise_mask.shape[0] < latent_image.shape[0]: + noise_mask = noise_mask.repeat(latent_image.shape[0] // noise_mask.shape[0], 1, 1, 1) noise_mask = noise_mask.to(device) real_model = None From c8355ed39ff39a10eb7a3d262f278dc99ad2e73b Mon Sep 17 00:00:00 2001 From: pythongosssss <125205205+pythongosssss@users.noreply.github.com> Date: Sun, 23 Apr 2023 10:31:21 +0100 Subject: [PATCH 29/45] use window.name instead of session storage - prevents duplicate stealing session id --- web/scripts/api.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/web/scripts/api.js b/web/scripts/api.js index 2b90c2abc..d29faa5ba 100644 --- a/web/scripts/api.js +++ b/web/scripts/api.js @@ -35,7 +35,7 @@ class ComfyApi extends EventTarget { } let opened = false; - let existingSession = sessionStorage["Comfy.SessionId"] || ""; + let existingSession = window.name; if (existingSession) { existingSession = "?clientId=" + existingSession; } @@ -75,7 +75,7 @@ class ComfyApi extends EventTarget { case "status": if (msg.data.sid) { this.clientId = msg.data.sid; - sessionStorage["Comfy.SessionId"] = this.clientId; + window.name = this.clientId; } this.dispatchEvent(new CustomEvent("status", { detail: msg.data.status })); break; From 5282f5643476ba0f55197c3ca8b72ce43525b025 Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Sun, 23 Apr 2023 12:35:25 -0400 Subject: [PATCH 30/45] Implement Linear hypernetworks. Add a HypernetworkLoader node to use hypernetworks. --- comfy/ldm/modules/attention.py | 69 +++++++++++++--- comfy/model_management.py | 3 + comfy/samplers.py | 10 ++- comfy/sd.py | 23 ++++++ comfy/utils.py | 7 +- comfy_extras/nodes_hypernetwork.py | 87 +++++++++++++++++++++ folder_paths.py | 1 + models/hypernetworks/put_hypernetworks_here | 0 nodes.py | 1 + 9 files changed, 185 insertions(+), 16 deletions(-) create mode 100644 comfy_extras/nodes_hypernetwork.py create mode 100644 models/hypernetworks/put_hypernetworks_here diff --git a/comfy/ldm/modules/attention.py b/comfy/ldm/modules/attention.py index c27d032a3..ce7180d91 100644 --- a/comfy/ldm/modules/attention.py +++ b/comfy/ldm/modules/attention.py @@ -163,13 +163,17 @@ class CrossAttentionBirchSan(nn.Module): nn.Dropout(dropout) ) - def forward(self, x, context=None, mask=None): + def forward(self, x, context=None, value=None, mask=None): h = self.heads query = self.to_q(x) context = default(context, x) key = self.to_k(context) - value = self.to_v(context) + if value is not None: + value = self.to_v(value) + else: + value = self.to_v(context) + del context, x query = query.unflatten(-1, (self.heads, -1)).transpose(1,2).flatten(end_dim=1) @@ -256,13 +260,17 @@ class CrossAttentionDoggettx(nn.Module): nn.Dropout(dropout) ) - def forward(self, x, context=None, mask=None): + def forward(self, x, context=None, value=None, mask=None): h = self.heads q_in = self.to_q(x) context = default(context, x) k_in = self.to_k(context) - v_in = self.to_v(context) + if value is not None: + v_in = self.to_v(value) + del value + else: + v_in = self.to_v(context) del context, x q, k, v = map(lambda t: rearrange(t, 'b n (h d) -> (b h) n d', h=h), (q_in, k_in, v_in)) @@ -350,13 +358,17 @@ class CrossAttention(nn.Module): nn.Dropout(dropout) ) - def forward(self, x, context=None, mask=None): + def forward(self, x, context=None, value=None, mask=None): h = self.heads q = self.to_q(x) context = default(context, x) k = self.to_k(context) - v = self.to_v(context) + if value is not None: + v = self.to_v(value) + del value + else: + v = self.to_v(context) q, k, v = map(lambda t: rearrange(t, 'b n (h d) -> (b h) n d', h=h), (q, k, v)) @@ -402,11 +414,15 @@ class MemoryEfficientCrossAttention(nn.Module): self.to_out = nn.Sequential(nn.Linear(inner_dim, query_dim), nn.Dropout(dropout)) self.attention_op: Optional[Any] = None - def forward(self, x, context=None, mask=None): + def forward(self, x, context=None, value=None, mask=None): q = self.to_q(x) context = default(context, x) k = self.to_k(context) - v = self.to_v(context) + if value is not None: + v = self.to_v(value) + del value + else: + v = self.to_v(context) b, _, _ = q.shape q, k, v = map( @@ -447,11 +463,15 @@ class CrossAttentionPytorch(nn.Module): self.to_out = nn.Sequential(nn.Linear(inner_dim, query_dim), nn.Dropout(dropout)) self.attention_op: Optional[Any] = None - def forward(self, x, context=None, mask=None): + def forward(self, x, context=None, value=None, mask=None): q = self.to_q(x) context = default(context, x) k = self.to_k(context) - v = self.to_v(context) + if value is not None: + v = self.to_v(value) + del value + else: + v = self.to_v(context) b, _, _ = q.shape q, k, v = map( @@ -512,11 +532,25 @@ class BasicTransformerBlock(nn.Module): transformer_patches = {} n = self.norm1(x) + if self.disable_self_attn: + context_attn1 = context + else: + context_attn1 = None + value_attn1 = None + + if "attn1_patch" in transformer_patches: + patch = transformer_patches["attn1_patch"] + if context_attn1 is None: + context_attn1 = n + value_attn1 = context_attn1 + for p in patch: + n, context_attn1, value_attn1 = p(current_index, n, context_attn1, value_attn1) + if "tomesd" in transformer_options: m, u = tomesd.get_functions(x, transformer_options["tomesd"]["ratio"], transformer_options["original_shape"]) - n = u(self.attn1(m(n), context=context if self.disable_self_attn else None)) + n = u(self.attn1(m(n), context=context_attn1, value=value_attn1)) else: - n = self.attn1(n, context=context if self.disable_self_attn else None) + n = self.attn1(n, context=context_attn1, value=value_attn1) x += n if "middle_patch" in transformer_patches: @@ -525,7 +559,16 @@ class BasicTransformerBlock(nn.Module): x = p(current_index, x) n = self.norm2(x) - n = self.attn2(n, context=context) + + context_attn2 = context + value_attn2 = None + if "attn2_patch" in transformer_patches: + patch = transformer_patches["attn2_patch"] + value_attn2 = context_attn2 + for p in patch: + n, context_attn2, value_attn2 = p(current_index, n, context_attn2, value_attn2) + + n = self.attn2(n, context=context_attn2, value=value_attn2) x += n x = self.ff(self.norm3(x)) + x diff --git a/comfy/model_management.py b/comfy/model_management.py index a0d1313d2..6e3a03530 100644 --- a/comfy/model_management.py +++ b/comfy/model_management.py @@ -133,6 +133,7 @@ def unload_model(): #never unload models from GPU on high vram if vram_state != VRAMState.HIGH_VRAM: current_loaded_model.model.cpu() + current_loaded_model.model_patches_to("cpu") current_loaded_model.unpatch_model() current_loaded_model = None @@ -156,6 +157,8 @@ def load_model_gpu(model): except Exception as e: model.unpatch_model() raise e + + model.model_patches_to(get_torch_device()) current_loaded_model = model if vram_state == VRAMState.CPU: pass diff --git a/comfy/samplers.py b/comfy/samplers.py index 15527224e..b860f25f1 100644 --- a/comfy/samplers.py +++ b/comfy/samplers.py @@ -197,7 +197,15 @@ def sampling_function(model_function, x, timestep, uncond, cond, cond_scale, con transformer_options = model_options['transformer_options'].copy() if patches is not None: - transformer_options["patches"] = patches + if "patches" in transformer_options: + cur_patches = transformer_options["patches"].copy() + for p in patches: + if p in cur_patches: + cur_patches[p] = cur_patches[p] + patches[p] + else: + cur_patches[p] = patches[p] + else: + transformer_options["patches"] = patches c['transformer_options'] = transformer_options diff --git a/comfy/sd.py b/comfy/sd.py index 211acd70e..92dbb931d 100644 --- a/comfy/sd.py +++ b/comfy/sd.py @@ -254,6 +254,29 @@ class ModelPatcher: def set_model_sampler_cfg_function(self, sampler_cfg_function): self.model_options["sampler_cfg_function"] = sampler_cfg_function + + def set_model_patch(self, patch, name): + to = self.model_options["transformer_options"] + if "patches" not in to: + to["patches"] = {} + to["patches"][name] = to["patches"].get(name, []) + [patch] + + def set_model_attn1_patch(self, patch): + self.set_model_patch(patch, "attn1_patch") + + def set_model_attn2_patch(self, patch): + self.set_model_patch(patch, "attn2_patch") + + def model_patches_to(self, device): + to = self.model_options["transformer_options"] + if "patches" in to: + patches = to["patches"] + for name in patches: + patch_list = patches[name] + for i in range(len(patch_list)): + if hasattr(patch_list[i], "to"): + patch_list[i] = patch_list[i].to(device) + def model_dtype(self): return self.model.diffusion_model.dtype diff --git a/comfy/utils.py b/comfy/utils.py index 0380b91dd..68f93403c 100644 --- a/comfy/utils.py +++ b/comfy/utils.py @@ -1,11 +1,14 @@ import torch -def load_torch_file(ckpt): +def load_torch_file(ckpt, safe_load=False): if ckpt.lower().endswith(".safetensors"): import safetensors.torch sd = safetensors.torch.load_file(ckpt, device="cpu") else: - pl_sd = torch.load(ckpt, map_location="cpu") + if safe_load: + pl_sd = torch.load(ckpt, map_location="cpu", weights_only=True) + else: + pl_sd = torch.load(ckpt, map_location="cpu") if "global_step" in pl_sd: print(f"Global Step: {pl_sd['global_step']}") if "state_dict" in pl_sd: diff --git a/comfy_extras/nodes_hypernetwork.py b/comfy_extras/nodes_hypernetwork.py new file mode 100644 index 000000000..db2f8695c --- /dev/null +++ b/comfy_extras/nodes_hypernetwork.py @@ -0,0 +1,87 @@ +import comfy.utils +import folder_paths +import torch + +def load_hypernetwork_patch(path, strength): + sd = comfy.utils.load_torch_file(path, safe_load=True) + activation_func = sd.get('activation_func', 'linear') + is_layer_norm = sd.get('is_layer_norm', False) + use_dropout = sd.get('use_dropout', False) + activate_output = sd.get('activate_output', False) + last_layer_dropout = sd.get('last_layer_dropout', False) + + if activation_func != 'linear' or is_layer_norm != False or use_dropout != False or activate_output != False or last_layer_dropout != False: + print("Unsupported Hypernetwork format, if you report it I might implement it.", path, " ", activation_func, is_layer_norm, use_dropout, activate_output, last_layer_dropout) + return None + + out = {} + + for d in sd: + try: + dim = int(d) + except: + continue + + output = [] + for index in [0, 1]: + attn_weights = sd[dim][index] + keys = attn_weights.keys() + + linears = filter(lambda a: a.endswith(".weight"), keys) + linears = sorted(list(map(lambda a: a[:-len(".weight")], linears))) + layers = [] + + for lin_name in linears: + lin_weight = attn_weights['{}.weight'.format(lin_name)] + lin_bias = attn_weights['{}.bias'.format(lin_name)] + layer = torch.nn.Linear(lin_weight.shape[1], lin_weight.shape[0]) + layer.load_state_dict({"weight": lin_weight, "bias": lin_bias}) + layers += [layer] + + output.append(torch.nn.Sequential(*layers)) + out[dim] = torch.nn.ModuleList(output) + + class hypernetwork_patch: + def __init__(self, hypernet, strength): + self.hypernet = hypernet + self.strength = strength + def __call__(self, current_index, q, k, v): + dim = k.shape[-1] + if dim in self.hypernet: + hn = self.hypernet[dim] + k = k + hn[0](k) * self.strength + v = v + hn[1](v) * self.strength + + return q, k, v + + def to(self, device): + for d in self.hypernet.keys(): + self.hypernet[d] = self.hypernet[d].to(device) + return self + + return hypernetwork_patch(out, strength) + +class HypernetworkLoader: + @classmethod + def INPUT_TYPES(s): + return {"required": { "model": ("MODEL",), + "hypernetwork_name": (folder_paths.get_filename_list("hypernetworks"), ), + "strength": ("FLOAT", {"default": 1.0, "min": -10.0, "max": 10.0, "step": 0.01}), + }} + RETURN_TYPES = ("MODEL",) + FUNCTION = "load_hypernetwork" + + CATEGORY = "_for_testing" + + def load_hypernetwork(self, model, hypernetwork_name, strength): + hypernetwork_path = folder_paths.get_full_path("hypernetworks", hypernetwork_name) + model_hypernetwork = model.clone() + patch = load_hypernetwork_patch(hypernetwork_path, strength) + if patch is not None: + model_hypernetwork.set_model_attn1_patch(patch) + model_hypernetwork.set_model_attn2_patch(patch) + return (model_hypernetwork,) + +NODE_CLASS_MAPPINGS = { + "HypernetworkLoader": HypernetworkLoader +} diff --git a/folder_paths.py b/folder_paths.py index 3c4ad3711..bb0d65524 100644 --- a/folder_paths.py +++ b/folder_paths.py @@ -32,6 +32,7 @@ folder_names_and_paths["upscale_models"] = ([os.path.join(models_dir, "upscale_m folder_names_and_paths["custom_nodes"] = ([os.path.join(base_path, "custom_nodes")], []) +folder_names_and_paths["hypernetworks"] = ([os.path.join(models_dir, "hypernetworks")], supported_pt_extensions) output_directory = os.path.join(os.path.dirname(os.path.realpath(__file__)), "output") temp_directory = os.path.join(os.path.dirname(os.path.realpath(__file__)), "temp") diff --git a/models/hypernetworks/put_hypernetworks_here b/models/hypernetworks/put_hypernetworks_here new file mode 100644 index 000000000..e69de29bb diff --git a/nodes.py b/nodes.py index 48c3ee9c3..6ca73fa0c 100644 --- a/nodes.py +++ b/nodes.py @@ -1268,6 +1268,7 @@ def load_custom_nodes(): def init_custom_nodes(): load_custom_nodes() + load_custom_node(os.path.join(os.path.join(os.path.dirname(os.path.realpath(__file__)), "comfy_extras"), "nodes_hypernetwork.py")) load_custom_node(os.path.join(os.path.join(os.path.dirname(os.path.realpath(__file__)), "comfy_extras"), "nodes_upscale_model.py")) load_custom_node(os.path.join(os.path.join(os.path.dirname(os.path.realpath(__file__)), "comfy_extras"), "nodes_post_processing.py")) load_custom_node(os.path.join(os.path.join(os.path.dirname(os.path.realpath(__file__)), "comfy_extras"), "nodes_mask.py")) From 2a09e2aa27620c492f694b66cc10c5f41b101c12 Mon Sep 17 00:00:00 2001 From: BlenderNeko <126974546+BlenderNeko@users.noreply.github.com> Date: Sun, 23 Apr 2023 20:02:08 +0200 Subject: [PATCH 31/45] refactor/split various bits of code for sampling --- comfy/sample.py | 62 +++++++++++++++++++++++++++++++++++++++++++++ comfy/samplers.py | 64 +++++++++++++++++++++++++++-------------------- nodes.py | 60 +++++++------------------------------------- 3 files changed, 108 insertions(+), 78 deletions(-) create mode 100644 comfy/sample.py diff --git a/comfy/sample.py b/comfy/sample.py new file mode 100644 index 000000000..ede89890b --- /dev/null +++ b/comfy/sample.py @@ -0,0 +1,62 @@ +import torch +import comfy.model_management + + +def prepare_noise(latent, seed, disable_noise): + latent_image = latent["samples"] + if disable_noise: + noise = torch.zeros(latent_image.size(), dtype=latent_image.dtype, layout=latent_image.layout, device="cpu") + else: + batch_index = 0 + if "batch_index" in latent: + batch_index = latent["batch_index"] + + generator = torch.manual_seed(seed) + for i in range(batch_index): + noise = torch.randn([1] + list(latent_image.size())[1:], dtype=latent_image.dtype, layout=latent_image.layout, generator=generator, device="cpu") + noise = torch.randn(latent_image.size(), dtype=latent_image.dtype, layout=latent_image.layout, generator=generator, device="cpu") + return noise + +def create_mask(latent, noise): + noise_mask = None + device = comfy.model_management.get_torch_device() + if "noise_mask" in latent: + noise_mask = latent['noise_mask'] + noise_mask = torch.nn.functional.interpolate(noise_mask[None,None,], size=(noise.shape[2], noise.shape[3]), mode="bilinear") + noise_mask = noise_mask.round() + noise_mask = torch.cat([noise_mask] * noise.shape[1], dim=1) + noise_mask = torch.cat([noise_mask] * noise.shape[0]) + noise_mask = noise_mask.to(device) + return noise_mask + +def broadcast_cond(cond, noise): + device = comfy.model_management.get_torch_device() + copy = [] + for p in cond: + t = p[0] + if t.shape[0] < noise.shape[0]: + t = torch.cat([t] * noise.shape[0]) + t = t.to(device) + copy += [[t] + p[1:]] + return copy + +def load_c_nets(positive, negative): + def get_models(cond): + models = [] + for c in cond: + if 'control' in c[1]: + models += [c[1]['control']] + if 'gligen' in c[1]: + models += [c[1]['gligen'][1]] + return models + + return get_models(positive) + get_models(negative) + +def load_additional_models(positive, negative): + models = load_c_nets(positive, negative) + comfy.model_management.load_controlnet_gpu(models) + return models + +def cleanup_additional_models(models): + for m in models: + m.cleanup() \ No newline at end of file diff --git a/comfy/samplers.py b/comfy/samplers.py index 15527224e..541a8db8d 100644 --- a/comfy/samplers.py +++ b/comfy/samplers.py @@ -392,6 +392,38 @@ def encode_adm(noise_augmentor, conds, batch_size, device): return conds +def calculate_sigmas(model, steps, scheduler, sampler): + """ + Returns a tensor containing the sigmas corresponding to the given model, number of steps, scheduler type and sample technique + """ + if not (isinstance(model, CompVisVDenoiser) or isinstance(model, k_diffusion_external.CompVisDenoiser)): + model = CFGNoisePredictor(model) + if model.inner_model.parameterization == "v": + model = CompVisVDenoiser(model, quantize=True) + else: + model = k_diffusion_external.CompVisDenoiser(model, quantize=True) + + sigmas = None + + discard_penultimate_sigma = False + if sampler in ['dpm_2', 'dpm_2_ancestral']: + steps += 1 + discard_penultimate_sigma = True + + if scheduler == "karras": + sigmas = k_diffusion_sampling.get_sigmas_karras(n=steps, sigma_min=float(model.sigma_min), sigma_max=float(model.sigma_max)) + elif scheduler == "normal": + sigmas = model.get_sigmas(steps) + elif scheduler == "simple": + sigmas = simple_scheduler(model, steps) + elif scheduler == "ddim_uniform": + sigmas = ddim_scheduler(model, steps) + else: + print("error invalid scheduler", scheduler) + + if discard_penultimate_sigma: + sigmas = torch.cat([sigmas[:-2], sigmas[-1:]]) + return sigmas class KSampler: SCHEDULERS = ["karras", "normal", "simple", "ddim_uniform"] @@ -421,41 +453,19 @@ class KSampler: self.denoise = denoise self.model_options = model_options - def _calculate_sigmas(self, steps): - sigmas = None - - discard_penultimate_sigma = False - if self.sampler in ['dpm_2', 'dpm_2_ancestral']: - steps += 1 - discard_penultimate_sigma = True - - if self.scheduler == "karras": - sigmas = k_diffusion_sampling.get_sigmas_karras(n=steps, sigma_min=self.sigma_min, sigma_max=self.sigma_max, device=self.device) - elif self.scheduler == "normal": - sigmas = self.model_wrap.get_sigmas(steps).to(self.device) - elif self.scheduler == "simple": - sigmas = simple_scheduler(self.model_wrap, steps).to(self.device) - elif self.scheduler == "ddim_uniform": - sigmas = ddim_scheduler(self.model_wrap, steps).to(self.device) - else: - print("error invalid scheduler", self.scheduler) - - if discard_penultimate_sigma: - sigmas = torch.cat([sigmas[:-2], sigmas[-1:]]) - return sigmas - def set_steps(self, steps, denoise=None): self.steps = steps if denoise is None or denoise > 0.9999: - self.sigmas = self._calculate_sigmas(steps) + self.sigmas = calculate_sigmas(self.model_wrap, steps, self.scheduler, self.sampler).to(self.device) else: new_steps = int(steps/denoise) - sigmas = self._calculate_sigmas(new_steps) + sigmas = calculate_sigmas(self.model_wrap, new_steps, self.scheduler, self.sampler).to(self.device) self.sigmas = sigmas[-(steps + 1):] - def sample(self, noise, positive, negative, cfg, latent_image=None, start_step=None, last_step=None, force_full_denoise=False, denoise_mask=None): - sigmas = self.sigmas + def sample(self, noise, positive, negative, cfg, latent_image=None, start_step=None, last_step=None, force_full_denoise=False, denoise_mask=None, sigmas=None): + if sigmas is None: + sigmas = self.sigmas sigma_min = self.sigma_min if last_step is not None and last_step < (len(sigmas) - 1): diff --git a/nodes.py b/nodes.py index 48c3ee9c3..601661864 100644 --- a/nodes.py +++ b/nodes.py @@ -16,6 +16,7 @@ sys.path.insert(0, os.path.join(os.path.dirname(os.path.realpath(__file__)), "co import comfy.diffusers_convert import comfy.samplers +import comfy.sample import comfy.sd import comfy.utils @@ -739,31 +740,12 @@ class SetLatentNoiseMask: s["noise_mask"] = mask return (s,) - def common_ksampler(model, seed, steps, cfg, sampler_name, scheduler, positive, negative, latent, denoise=1.0, disable_noise=False, start_step=None, last_step=None, force_full_denoise=False): - latent_image = latent["samples"] - noise_mask = None device = comfy.model_management.get_torch_device() + latent_image = latent["samples"] - if disable_noise: - noise = torch.zeros(latent_image.size(), dtype=latent_image.dtype, layout=latent_image.layout, device="cpu") - else: - batch_index = 0 - if "batch_index" in latent: - batch_index = latent["batch_index"] - - generator = torch.manual_seed(seed) - for i in range(batch_index): - noise = torch.randn([1] + list(latent_image.size())[1:], dtype=latent_image.dtype, layout=latent_image.layout, generator=generator, device="cpu") - noise = torch.randn(latent_image.size(), dtype=latent_image.dtype, layout=latent_image.layout, generator=generator, device="cpu") - - if "noise_mask" in latent: - noise_mask = latent['noise_mask'] - noise_mask = torch.nn.functional.interpolate(noise_mask[None,None,], size=(noise.shape[2], noise.shape[3]), mode="bilinear") - noise_mask = noise_mask.round() - noise_mask = torch.cat([noise_mask] * noise.shape[1], dim=1) - noise_mask = torch.cat([noise_mask] * noise.shape[0]) - noise_mask = noise_mask.to(device) + noise = comfy.sample.prepare_noise(latent, seed, disable_noise) + noise_mask = comfy.sample.create_mask(latent, noise) real_model = None comfy.model_management.load_model_gpu(model) @@ -772,34 +754,10 @@ def common_ksampler(model, seed, steps, cfg, sampler_name, scheduler, positive, noise = noise.to(device) latent_image = latent_image.to(device) - positive_copy = [] - negative_copy = [] + positive_copy = comfy.sample.broadcast_cond(positive, noise) + negative_copy = comfy.sample.broadcast_cond(negative, noise) - control_nets = [] - def get_models(cond): - models = [] - for c in cond: - if 'control' in c[1]: - models += [c[1]['control']] - if 'gligen' in c[1]: - models += [c[1]['gligen'][1]] - return models - - for p in positive: - t = p[0] - if t.shape[0] < noise.shape[0]: - t = torch.cat([t] * noise.shape[0]) - t = t.to(device) - positive_copy += [[t] + p[1:]] - for n in negative: - t = n[0] - if t.shape[0] < noise.shape[0]: - t = torch.cat([t] * noise.shape[0]) - t = t.to(device) - negative_copy += [[t] + n[1:]] - - models = get_models(positive) + get_models(negative) - comfy.model_management.load_controlnet_gpu(models) + models = comfy.sample.load_additional_models(positive, negative) if sampler_name in comfy.samplers.KSampler.SAMPLERS: sampler = comfy.samplers.KSampler(real_model, steps=steps, device=device, sampler=sampler_name, scheduler=scheduler, denoise=denoise, model_options=model.model_options) @@ -809,8 +767,8 @@ def common_ksampler(model, seed, steps, cfg, sampler_name, scheduler, positive, samples = sampler.sample(noise, positive_copy, negative_copy, cfg=cfg, latent_image=latent_image, start_step=start_step, last_step=last_step, force_full_denoise=force_full_denoise, denoise_mask=noise_mask) samples = samples.cpu() - for m in models: - m.cleanup() + + comfy.sample.cleanup_additional_models(models) out = latent.copy() out["samples"] = samples From 5818539743bd390a282a19d7e480177c31bc222b Mon Sep 17 00:00:00 2001 From: BlenderNeko <126974546+BlenderNeko@users.noreply.github.com> Date: Sun, 23 Apr 2023 20:09:09 +0200 Subject: [PATCH 32/45] add docstrings --- comfy/sample.py | 25 ++++++++++++++----------- nodes.py | 6 +++++- 2 files changed, 19 insertions(+), 12 deletions(-) diff --git a/comfy/sample.py b/comfy/sample.py index ede89890b..981781b53 100644 --- a/comfy/sample.py +++ b/comfy/sample.py @@ -2,22 +2,21 @@ import torch import comfy.model_management -def prepare_noise(latent, seed, disable_noise): +def prepare_noise(latent, seed): + """creates random noise given a LATENT and a seed""" latent_image = latent["samples"] - if disable_noise: - noise = torch.zeros(latent_image.size(), dtype=latent_image.dtype, layout=latent_image.layout, device="cpu") - else: - batch_index = 0 - if "batch_index" in latent: - batch_index = latent["batch_index"] + batch_index = 0 + if "batch_index" in latent: + batch_index = latent["batch_index"] - generator = torch.manual_seed(seed) - for i in range(batch_index): - noise = torch.randn([1] + list(latent_image.size())[1:], dtype=latent_image.dtype, layout=latent_image.layout, generator=generator, device="cpu") - noise = torch.randn(latent_image.size(), dtype=latent_image.dtype, layout=latent_image.layout, generator=generator, device="cpu") + generator = torch.manual_seed(seed) + for i in range(batch_index): + noise = torch.randn([1] + list(latent_image.size())[1:], dtype=latent_image.dtype, layout=latent_image.layout, generator=generator, device="cpu") + noise = torch.randn(latent_image.size(), dtype=latent_image.dtype, layout=latent_image.layout, generator=generator, device="cpu") return noise def create_mask(latent, noise): + """creates a mask for a given LATENT and noise""" noise_mask = None device = comfy.model_management.get_torch_device() if "noise_mask" in latent: @@ -30,6 +29,7 @@ def create_mask(latent, noise): return noise_mask def broadcast_cond(cond, noise): + """broadcasts conditioning to the noise batch size""" device = comfy.model_management.get_torch_device() copy = [] for p in cond: @@ -41,6 +41,7 @@ def broadcast_cond(cond, noise): return copy def load_c_nets(positive, negative): + """loads control nets in positive and negative conditioning""" def get_models(cond): models = [] for c in cond: @@ -53,10 +54,12 @@ def load_c_nets(positive, negative): return get_models(positive) + get_models(negative) def load_additional_models(positive, negative): + """loads additional models in positive and negative conditioning""" models = load_c_nets(positive, negative) comfy.model_management.load_controlnet_gpu(models) return models def cleanup_additional_models(models): + """cleanup additional models that were loaded""" for m in models: m.cleanup() \ No newline at end of file diff --git a/nodes.py b/nodes.py index a70668fd7..b8c6d350f 100644 --- a/nodes.py +++ b/nodes.py @@ -744,7 +744,11 @@ def common_ksampler(model, seed, steps, cfg, sampler_name, scheduler, positive, device = comfy.model_management.get_torch_device() latent_image = latent["samples"] - noise = comfy.sample.prepare_noise(latent, seed, disable_noise) + if disable_noise: + noise = torch.zeros(latent_image.size(), dtype=latent_image.dtype, layout=latent_image.layout, device="cpu") + else: + noise = comfy.sample.prepare_noise(latent, seed) + noise_mask = comfy.sample.create_mask(latent, noise) real_model = None From f7a821881476cbd52a513877a9ffe35e6702b850 Mon Sep 17 00:00:00 2001 From: ltdrdata <128333288+ltdrdata@users.noreply.github.com> Date: Mon, 24 Apr 2023 04:58:55 +0900 Subject: [PATCH 33/45] Add clipspace feature. (#541) * Add clipspace feature. * feat: copy content to clipspace * feat: paste content from clipspace Extend validation to allow for validating annotated_path in addition to other parameters. Add support for annotated_filepath in folder_paths function. Generalize the '/upload/image' API to allow for uploading images to the 'input', 'temp', or 'output' directories. * rename contentClipboard -> clipspace * Do deep copy for imgs on copy to clipspace. * add original_imgs into clipspace * Preserve the original image when 'imgs' are modified * robust patch & refactoring folder_paths about annotated_filepath * Only show the Paste menu if the ComfyApp.clipspace is not empty * instant refresh on paste force triggering 'changed' on paste action * subfolder fix on paste logic attach subfolder if subfolder isn't empty --------- Co-authored-by: Lt.Dr.Data --- execution.py | 8 ++++- folder_paths.py | 40 ++++++++++++++++++++++ nodes.py | 8 ++--- server.py | 15 ++++++--- web/scripts/app.js | 83 ++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 145 insertions(+), 9 deletions(-) diff --git a/execution.py b/execution.py index 73be6db03..b062deeb1 100644 --- a/execution.py +++ b/execution.py @@ -11,6 +11,7 @@ import torch import nodes import comfy.model_management +import folder_paths def get_input_data(inputs, class_def, unique_id, outputs={}, prompt={}, extra_data={}): valid_inputs = class_def.INPUT_TYPES() @@ -250,7 +251,12 @@ def validate_inputs(prompt, item): return (False, "Value bigger than max. {}, {}".format(class_type, x)) if isinstance(type_input, list): - if val not in type_input: + is_annotated_path = val.endswith("[temp]") or val.endswith("[input]") or val.endswith("[output]") + if is_annotated_path: + if not folder_paths.exists_annotated_filepath(val): + return (False, "Invalid file path. {}, {}: {}".format(class_type, x, val)) + + elif val not in type_input: return (False, "Value not in list. {}, {}: {} not in {}".format(class_type, x, val, type_input)) return (True, "") diff --git a/folder_paths.py b/folder_paths.py index bb0d65524..99a016695 100644 --- a/folder_paths.py +++ b/folder_paths.py @@ -69,6 +69,46 @@ def get_directory_by_type(type_name): return None +# determine base_dir rely on annotation if name is 'filename.ext [annotation]' format +# otherwise use default_path as base_dir +def touch_annotated_filepath(name): + if name.endswith("[output]"): + base_dir = get_output_directory() + name = name[:-9] + elif name.endswith("[input]"): + base_dir = get_input_directory() + name = name[:-8] + elif name.endswith("[temp]"): + base_dir = get_temp_directory() + name = name[:-7] + else: + return name, None + + return name, base_dir + + +def get_annotated_filepath(name, default_dir=None): + name, base_dir = touch_annotated_filepath(name) + + if base_dir is None: + if default_dir is not None: + base_dir = default_dir + else: + base_dir = get_input_directory() # fallback path + + return os.path.join(base_dir, name) + + +def exists_annotated_filepath(name): + name, base_dir = touch_annotated_filepath(name) + + if base_dir is None: + base_dir = get_input_directory() # fallback path + + filepath = os.path.join(base_dir, name) + return os.path.exists(filepath) + + def add_model_folder_path(folder_name, full_folder_path): global folder_names_and_paths if folder_name in folder_names_and_paths: diff --git a/nodes.py b/nodes.py index 6ca73fa0c..b8b6280d6 100644 --- a/nodes.py +++ b/nodes.py @@ -975,7 +975,7 @@ class LoadImage: FUNCTION = "load_image" def load_image(self, image): input_dir = folder_paths.get_input_directory() - image_path = os.path.join(input_dir, image) + image_path = folder_paths.get_annotated_filepath(image, input_dir) i = Image.open(image_path) image = i.convert("RGB") image = np.array(image).astype(np.float32) / 255.0 @@ -990,7 +990,7 @@ class LoadImage: @classmethod def IS_CHANGED(s, image): input_dir = folder_paths.get_input_directory() - image_path = os.path.join(input_dir, image) + image_path = folder_paths.get_annotated_filepath(image, input_dir) m = hashlib.sha256() with open(image_path, 'rb') as f: m.update(f.read()) @@ -1011,7 +1011,7 @@ class LoadImageMask: FUNCTION = "load_image" def load_image(self, image, channel): input_dir = folder_paths.get_input_directory() - image_path = os.path.join(input_dir, image) + image_path = folder_paths.get_annotated_filepath(image, input_dir) i = Image.open(image_path) if i.getbands() != ("R", "G", "B", "A"): i = i.convert("RGBA") @@ -1029,7 +1029,7 @@ class LoadImageMask: @classmethod def IS_CHANGED(s, image, channel): input_dir = folder_paths.get_input_directory() - image_path = os.path.join(input_dir, image) + image_path = folder_paths.get_annotated_filepath(image, input_dir) m = hashlib.sha256() with open(image_path, 'rb') as f: m.update(f.read()) diff --git a/server.py b/server.py index b5403670f..1c5c17916 100644 --- a/server.py +++ b/server.py @@ -112,13 +112,20 @@ class PromptServer(): @routes.post("/upload/image") async def upload_image(request): - upload_dir = folder_paths.get_input_directory() + post = await request.post() + image = post.get("image") + + if post.get("type") is None: + upload_dir = folder_paths.get_input_directory() + elif post.get("type") == "input": + upload_dir = folder_paths.get_input_directory() + elif post.get("type") == "temp": + upload_dir = folder_paths.get_temp_directory() + elif post.get("type") == "output": + upload_dir = folder_paths.get_output_directory() if not os.path.exists(upload_dir): os.makedirs(upload_dir) - - post = await request.post() - image = post.get("image") if image and image.file: filename = image.filename diff --git a/web/scripts/app.js b/web/scripts/app.js index f158f3457..b3e88d46f 100644 --- a/web/scripts/app.js +++ b/web/scripts/app.js @@ -20,6 +20,12 @@ export class ComfyApp { */ #processingQueue = false; + /** + * Content Clipboard + * @type {serialized node object} + */ + static clipspace = null; + constructor() { this.ui = new ComfyUI(this); @@ -130,6 +136,83 @@ export class ComfyApp { ); } } + + options.push( + { + content: "Copy (Clipspace)", + callback: (obj) => { + var widgets = null; + if(this.widgets) { + widgets = this.widgets.map(({ type, name, value }) => ({ type, name, value })); + } + + let img = new Image(); + var imgs = undefined; + if(this.imgs != undefined) { + img.src = this.imgs[0].src; + imgs = [img]; + } + + ComfyApp.clipspace = { + 'widgets': widgets, + 'imgs': imgs, + 'original_imgs': imgs, + 'images': this.images + }; + } + }); + + if(ComfyApp.clipspace != null) { + options.push( + { + content: "Paste (Clipspace)", + callback: () => { + if(ComfyApp.clipspace != null) { + if(ComfyApp.clipspace.widgets != null && this.widgets != null) { + ComfyApp.clipspace.widgets.forEach(({ type, name, value }) => { + const prop = Object.values(this.widgets).find(obj => obj.type === type && obj.name === name); + if (prop) { + prop.value = value; + } + }); + } + + // image paste + if(ComfyApp.clipspace.imgs != undefined && this.imgs != undefined && this.widgets != null) { + var filename = ""; + if(this.images && ComfyApp.clipspace.images) { + this.images = ComfyApp.clipspace.images; + } + + if(ComfyApp.clipspace.images != undefined) { + const clip_image = ComfyApp.clipspace.images[0]; + if(clip_image.subfolder != '') + filename = `${clip_image.subfolder}/`; + filename += `${clip_image.filename} [${clip_image.type}]`; + } + else if(ComfyApp.clipspace.widgets != undefined) { + const index_in_clip = ComfyApp.clipspace.widgets.findIndex(obj => obj.name === 'image'); + if(index_in_clip >= 0) { + filename = `${ComfyApp.clipspace.widgets[index_in_clip].value}`; + } + } + + const index = this.widgets.findIndex(obj => obj.name === 'image'); + if(index >= 0 && filename != "" && ComfyApp.clipspace.imgs != undefined) { + this.imgs = ComfyApp.clipspace.imgs; + + this.widgets[index].value = filename; + if(this.widgets_values != undefined) { + this.widgets_values[index] = filename; + } + } + } + this.trigger('changed'); + } + } + } + ); + } }; } From ccad603b2e6862a4a719bc34dc6bd32e65a539ad Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Sun, 23 Apr 2023 16:03:26 -0400 Subject: [PATCH 34/45] Add a way for nodes to validate their own inputs. --- execution.py | 21 +++++++++++---------- folder_paths.py | 6 +++--- nodes.py | 32 +++++++++++++++++++++++--------- web/scripts/app.js | 2 +- 4 files changed, 38 insertions(+), 23 deletions(-) diff --git a/execution.py b/execution.py index b062deeb1..115efcbda 100644 --- a/execution.py +++ b/execution.py @@ -11,7 +11,6 @@ import torch import nodes import comfy.model_management -import folder_paths def get_input_data(inputs, class_def, unique_id, outputs={}, prompt={}, extra_data={}): valid_inputs = class_def.INPUT_TYPES() @@ -250,14 +249,15 @@ def validate_inputs(prompt, item): if "max" in info[1] and val > info[1]["max"]: return (False, "Value bigger than max. {}, {}".format(class_type, x)) - if isinstance(type_input, list): - is_annotated_path = val.endswith("[temp]") or val.endswith("[input]") or val.endswith("[output]") - if is_annotated_path: - if not folder_paths.exists_annotated_filepath(val): - return (False, "Invalid file path. {}, {}: {}".format(class_type, x, val)) - - elif val not in type_input: - return (False, "Value not in list. {}, {}: {} not in {}".format(class_type, x, val, type_input)) + if hasattr(obj_class, "VALIDATE_INPUTS"): + input_data_all = get_input_data(inputs, obj_class, unique_id) + ret = obj_class.VALIDATE_INPUTS(**input_data_all) + if ret != True: + return (False, "{}, {}".format(class_type, ret)) + else: + if isinstance(type_input, list): + if val not in type_input: + return (False, "Value not in list. {}, {}: {} not in {}".format(class_type, x, val, type_input)) return (True, "") def validate_prompt(prompt): @@ -279,7 +279,8 @@ def validate_prompt(prompt): m = validate_inputs(prompt, o) valid = m[0] reason = m[1] - except: + except Exception as e: + print(traceback.format_exc()) valid = False reason = "Parsing error" diff --git a/folder_paths.py b/folder_paths.py index 99a016695..e5b89492c 100644 --- a/folder_paths.py +++ b/folder_paths.py @@ -71,7 +71,7 @@ def get_directory_by_type(type_name): # determine base_dir rely on annotation if name is 'filename.ext [annotation]' format # otherwise use default_path as base_dir -def touch_annotated_filepath(name): +def annotated_filepath(name): if name.endswith("[output]"): base_dir = get_output_directory() name = name[:-9] @@ -88,7 +88,7 @@ def touch_annotated_filepath(name): def get_annotated_filepath(name, default_dir=None): - name, base_dir = touch_annotated_filepath(name) + name, base_dir = annotated_filepath(name) if base_dir is None: if default_dir is not None: @@ -100,7 +100,7 @@ def get_annotated_filepath(name, default_dir=None): def exists_annotated_filepath(name): - name, base_dir = touch_annotated_filepath(name) + name, base_dir = annotated_filepath(name) if base_dir is None: base_dir = get_input_directory() # fallback path diff --git a/nodes.py b/nodes.py index b8b6280d6..d1133d1d8 100644 --- a/nodes.py +++ b/nodes.py @@ -974,8 +974,7 @@ class LoadImage: RETURN_TYPES = ("IMAGE", "MASK") FUNCTION = "load_image" def load_image(self, image): - input_dir = folder_paths.get_input_directory() - image_path = folder_paths.get_annotated_filepath(image, input_dir) + image_path = folder_paths.get_annotated_filepath(image) i = Image.open(image_path) image = i.convert("RGB") image = np.array(image).astype(np.float32) / 255.0 @@ -989,20 +988,27 @@ class LoadImage: @classmethod def IS_CHANGED(s, image): - input_dir = folder_paths.get_input_directory() - image_path = folder_paths.get_annotated_filepath(image, input_dir) + image_path = folder_paths.get_annotated_filepath(image) m = hashlib.sha256() with open(image_path, 'rb') as f: m.update(f.read()) return m.digest().hex() + @classmethod + def VALIDATE_INPUTS(s, image): + if not folder_paths.exists_annotated_filepath(image): + return "Invalid image file: {}".format(image) + + return True + class LoadImageMask: + _color_channels = ["alpha", "red", "green", "blue"] @classmethod def INPUT_TYPES(s): input_dir = folder_paths.get_input_directory() return {"required": {"image": (sorted(os.listdir(input_dir)), ), - "channel": (["alpha", "red", "green", "blue"], ),} + "channel": (s._color_channels, ),} } CATEGORY = "mask" @@ -1010,8 +1016,7 @@ class LoadImageMask: RETURN_TYPES = ("MASK",) FUNCTION = "load_image" def load_image(self, image, channel): - input_dir = folder_paths.get_input_directory() - image_path = folder_paths.get_annotated_filepath(image, input_dir) + image_path = folder_paths.get_annotated_filepath(image) i = Image.open(image_path) if i.getbands() != ("R", "G", "B", "A"): i = i.convert("RGBA") @@ -1028,13 +1033,22 @@ class LoadImageMask: @classmethod def IS_CHANGED(s, image, channel): - input_dir = folder_paths.get_input_directory() - image_path = folder_paths.get_annotated_filepath(image, input_dir) + image_path = folder_paths.get_annotated_filepath(image) m = hashlib.sha256() with open(image_path, 'rb') as f: m.update(f.read()) return m.digest().hex() + @classmethod + def VALIDATE_INPUTS(s, image, channel): + if not folder_paths.exists_annotated_filepath(image): + return "Invalid image file: {}".format(image) + + if channel not in s._color_channels: + return "Invalid color channel: {}".format(channel) + + return True + class ImageScale: upscale_methods = ["nearest-exact", "bilinear", "area"] crop_methods = ["disabled", "center"] diff --git a/web/scripts/app.js b/web/scripts/app.js index b3e88d46f..a161bf40e 100644 --- a/web/scripts/app.js +++ b/web/scripts/app.js @@ -172,7 +172,7 @@ export class ComfyApp { ComfyApp.clipspace.widgets.forEach(({ type, name, value }) => { const prop = Object.values(this.widgets).find(obj => obj.type === type && obj.name === name); if (prop) { - prop.value = value; + prop.callback(value); } }); } From 0ac319fd81bcecea2aa35743da28088832e44707 Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Sun, 23 Apr 2023 22:44:38 -0400 Subject: [PATCH 35/45] Don't delete all outputs when execution gets interrupted. --- execution.py | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/execution.py b/execution.py index 115efcbda..31a208e78 100644 --- a/execution.py +++ b/execution.py @@ -40,15 +40,13 @@ def get_input_data(inputs, class_def, unique_id, outputs={}, prompt={}, extra_da input_data_all[x] = unique_id return input_data_all -def recursive_execute(server, prompt, outputs, current_item, extra_data={}): +def recursive_execute(server, prompt, outputs, current_item, extra_data, executed): unique_id = current_item inputs = prompt[unique_id]['inputs'] class_type = prompt[unique_id]['class_type'] class_def = nodes.NODE_CLASS_MAPPINGS[class_type] if unique_id in outputs: - return [] - - executed = [] + return for x in inputs: input_data = inputs[x] @@ -57,7 +55,7 @@ def recursive_execute(server, prompt, outputs, current_item, extra_data={}): input_unique_id = input_data[0] output_index = input_data[1] if input_unique_id not in outputs: - executed += recursive_execute(server, prompt, outputs, input_unique_id, extra_data) + recursive_execute(server, prompt, outputs, input_unique_id, extra_data, executed) input_data_all = get_input_data(inputs, class_def, unique_id, outputs, prompt, extra_data) if server.client_id is not None: @@ -72,7 +70,7 @@ def recursive_execute(server, prompt, outputs, current_item, extra_data={}): server.send_sync("executed", { "node": unique_id, "output": outputs[unique_id]["ui"] }, server.client_id) if "result" in outputs[unique_id]: outputs[unique_id] = outputs[unique_id]["result"] - return executed + [unique_id] + executed.add(unique_id) def recursive_will_execute(prompt, outputs, current_item): unique_id = current_item @@ -158,7 +156,7 @@ class PromptExecutor: recursive_output_delete_if_changed(prompt, self.old_prompt, self.outputs, x) current_outputs = set(self.outputs.keys()) - executed = [] + executed = set() try: to_execute = [] for x in prompt: @@ -181,12 +179,12 @@ class PromptExecutor: except: valid = False if valid: - executed += recursive_execute(self.server, prompt, self.outputs, x, extra_data) + recursive_execute(self.server, prompt, self.outputs, x, extra_data, executed) except Exception as e: print(traceback.format_exc()) to_delete = [] for o in self.outputs: - if o not in current_outputs: + if (o not in current_outputs) and (o not in executed): to_delete += [o] if o in self.old_prompt: d = self.old_prompt.pop(o) @@ -194,11 +192,9 @@ class PromptExecutor: for o in to_delete: d = self.outputs.pop(o) del d - else: - executed = set(executed) + finally: for x in executed: self.old_prompt[x] = copy.deepcopy(prompt[x]) - finally: self.server.last_node_id = None if self.server.client_id is not None: self.server.send_sync("executing", { "node": None }, self.server.client_id) From f1b87f50fa9c274f2dd9dbe24b082aa83ef0b028 Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Mon, 24 Apr 2023 01:50:56 -0400 Subject: [PATCH 36/45] Add hypernetworks path config to extra_model_paths.yaml.example --- extra_model_paths.yaml.example | 1 + 1 file changed, 1 insertion(+) diff --git a/extra_model_paths.yaml.example b/extra_model_paths.yaml.example index ac1ffe9d2..fa5418a68 100644 --- a/extra_model_paths.yaml.example +++ b/extra_model_paths.yaml.example @@ -13,6 +13,7 @@ a111: models/ESRGAN models/SwinIR embeddings: embeddings + hypernetworks: models/hypernetworks controlnet: models/ControlNet #other_ui: From 4e345b31f692d5fb89009bf3352c922c2abe30e2 Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Mon, 24 Apr 2023 02:36:06 -0400 Subject: [PATCH 37/45] Support all known hypernetworks. --- comfy_extras/nodes_hypernetwork.py | 30 ++++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/comfy_extras/nodes_hypernetwork.py b/comfy_extras/nodes_hypernetwork.py index db2f8695c..c08c2c811 100644 --- a/comfy_extras/nodes_hypernetwork.py +++ b/comfy_extras/nodes_hypernetwork.py @@ -10,7 +10,17 @@ def load_hypernetwork_patch(path, strength): activate_output = sd.get('activate_output', False) last_layer_dropout = sd.get('last_layer_dropout', False) - if activation_func != 'linear' or is_layer_norm != False or use_dropout != False or activate_output != False or last_layer_dropout != False: + valid_activation = { + "linear": torch.nn.Identity, + "relu": torch.nn.ReLU, + "leakyrelu": torch.nn.LeakyReLU, + "elu": torch.nn.ELU, + "swish": torch.nn.Hardswish, + "tanh": torch.nn.Tanh, + "sigmoid": torch.nn.Sigmoid, + } + + if activation_func not in valid_activation: print("Unsupported Hypernetwork format, if you report it I might implement it.", path, " ", activation_func, is_layer_norm, use_dropout, activate_output, last_layer_dropout) return None @@ -28,15 +38,27 @@ def load_hypernetwork_patch(path, strength): keys = attn_weights.keys() linears = filter(lambda a: a.endswith(".weight"), keys) - linears = sorted(list(map(lambda a: a[:-len(".weight")], linears))) + linears = list(map(lambda a: a[:-len(".weight")], linears)) layers = [] - for lin_name in linears: + for i in range(len(linears)): + lin_name = linears[i] + last_layer = (i == (len(linears) - 1)) + penultimate_layer = (i == (len(linears) - 2)) + lin_weight = attn_weights['{}.weight'.format(lin_name)] lin_bias = attn_weights['{}.bias'.format(lin_name)] layer = torch.nn.Linear(lin_weight.shape[1], lin_weight.shape[0]) layer.load_state_dict({"weight": lin_weight, "bias": lin_bias}) - layers += [layer] + layers.append(layer) + if activation_func != "linear": + if (not last_layer) or (activate_output): + layers.append(valid_activation[activation_func]()) + if is_layer_norm: + layers.append(torch.nn.LayerNorm(lin_weight.shape[0])) + if use_dropout: + if (not last_layer) and (not penultimate_layer or last_layer_dropout): + layers.append(torch.nn.Dropout(p=0.3)) output.append(torch.nn.Sequential(*layers)) out[dim] = torch.nn.ModuleList(output) From 463bde66a1d22b02858ac6f148d7fa3e6d9c4322 Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Mon, 24 Apr 2023 03:08:51 -0400 Subject: [PATCH 38/45] Add hypernetwork example link to readme. Move hypernetwork loader node to loaders. --- README.md | 1 + comfy_extras/nodes_hypernetwork.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index bf16006bf..5b6346a67 100644 --- a/README.md +++ b/README.md @@ -17,6 +17,7 @@ This ui will let you design and execute advanced stable diffusion pipelines usin - Can load ckpt, safetensors and diffusers models/checkpoints. Standalone VAEs and CLIP models. - Embeddings/Textual inversion - [Loras (regular, locon and loha)](https://comfyanonymous.github.io/ComfyUI_examples/lora/) +- [Hypernetworks](https://comfyanonymous.github.io/ComfyUI_examples/hypernetworks/) - Loading full workflows (with seeds) from generated PNG files. - Saving/Loading workflows as Json files. - Nodes interface can be used to create complex workflows like one for [Hires fix](https://comfyanonymous.github.io/ComfyUI_examples/2_pass_txt2img/) or much more advanced ones. diff --git a/comfy_extras/nodes_hypernetwork.py b/comfy_extras/nodes_hypernetwork.py index c08c2c811..0c7250e43 100644 --- a/comfy_extras/nodes_hypernetwork.py +++ b/comfy_extras/nodes_hypernetwork.py @@ -93,7 +93,7 @@ class HypernetworkLoader: RETURN_TYPES = ("MODEL",) FUNCTION = "load_hypernetwork" - CATEGORY = "_for_testing" + CATEGORY = "loaders" def load_hypernetwork(self, model, hypernetwork_name, strength): hypernetwork_path = folder_paths.get_full_path("hypernetworks", hypernetwork_name) From d9b1595f8552384dd08374d34c4d4127e0b1a4e6 Mon Sep 17 00:00:00 2001 From: BlenderNeko <126974546+BlenderNeko@users.noreply.github.com> Date: Mon, 24 Apr 2023 12:53:10 +0200 Subject: [PATCH 39/45] made sample functions more explicit --- comfy/sample.py | 55 +++++++++++++++++++++---------------------------- nodes.py | 7 +++++-- 2 files changed, 29 insertions(+), 33 deletions(-) diff --git a/comfy/sample.py b/comfy/sample.py index 981781b53..84eefcb7b 100644 --- a/comfy/sample.py +++ b/comfy/sample.py @@ -2,30 +2,25 @@ import torch import comfy.model_management -def prepare_noise(latent, seed): - """creates random noise given a LATENT and a seed""" - latent_image = latent["samples"] - batch_index = 0 - if "batch_index" in latent: - batch_index = latent["batch_index"] - +def prepare_noise(latent_image, seed, skip=0): + """ + creates random noise given a latent image and a seed. + optional arg skip can be used to skip and discard x number of noise generations for a given seed + """ generator = torch.manual_seed(seed) - for i in range(batch_index): + for _ in range(skip): noise = torch.randn([1] + list(latent_image.size())[1:], dtype=latent_image.dtype, layout=latent_image.layout, generator=generator, device="cpu") noise = torch.randn(latent_image.size(), dtype=latent_image.dtype, layout=latent_image.layout, generator=generator, device="cpu") return noise -def create_mask(latent, noise): - """creates a mask for a given LATENT and noise""" - noise_mask = None +def prepare_mask(noise_mask, noise): + """ensures noise mask is of proper dimensions""" device = comfy.model_management.get_torch_device() - if "noise_mask" in latent: - noise_mask = latent['noise_mask'] - noise_mask = torch.nn.functional.interpolate(noise_mask[None,None,], size=(noise.shape[2], noise.shape[3]), mode="bilinear") - noise_mask = noise_mask.round() - noise_mask = torch.cat([noise_mask] * noise.shape[1], dim=1) - noise_mask = torch.cat([noise_mask] * noise.shape[0]) - noise_mask = noise_mask.to(device) + noise_mask = torch.nn.functional.interpolate(noise_mask[None,None,], size=(noise.shape[2], noise.shape[3]), mode="bilinear") + noise_mask = noise_mask.round() + noise_mask = torch.cat([noise_mask] * noise.shape[1], dim=1) + noise_mask = torch.cat([noise_mask] * noise.shape[0]) + noise_mask = noise_mask.to(device) return noise_mask def broadcast_cond(cond, noise): @@ -40,22 +35,20 @@ def broadcast_cond(cond, noise): copy += [[t] + p[1:]] return copy -def load_c_nets(positive, negative): - """loads control nets in positive and negative conditioning""" - def get_models(cond): - models = [] - for c in cond: - if 'control' in c[1]: - models += [c[1]['control']] - if 'gligen' in c[1]: - models += [c[1]['gligen'][1]] - return models - - return get_models(positive) + get_models(negative) +def get_models_from_cond(cond, model_type): + models = [] + for c in cond: + if model_type in c[1]: + models += [c[1][model_type]] + return models def load_additional_models(positive, negative): """loads additional models in positive and negative conditioning""" - models = load_c_nets(positive, negative) + models = [] + models += get_models_from_cond(positive, "control") + models += get_models_from_cond(negative, "control") + models += get_models_from_cond(positive, "gligen") + models += get_models_from_cond(negative, "gligen") comfy.model_management.load_controlnet_gpu(models) return models diff --git a/nodes.py b/nodes.py index b8c6d350f..f9bedc97e 100644 --- a/nodes.py +++ b/nodes.py @@ -747,9 +747,12 @@ def common_ksampler(model, seed, steps, cfg, sampler_name, scheduler, positive, if disable_noise: noise = torch.zeros(latent_image.size(), dtype=latent_image.dtype, layout=latent_image.layout, device="cpu") else: - noise = comfy.sample.prepare_noise(latent, seed) + skip = latent["batch_index"] if "batch_index" in latent else 0 + noise = comfy.sample.prepare_noise(latent_image, seed, skip) - noise_mask = comfy.sample.create_mask(latent, noise) + noise_mask = None + if "noise_mask" in latent: + noise_mask = comfy.sample.prepare_mask(latent["noise_mask"], noise) real_model = None comfy.model_management.load_model_gpu(model) From 0b07b2cc0f94fc2b8ebe656dfb3768c6f67866f1 Mon Sep 17 00:00:00 2001 From: BlenderNeko <126974546+BlenderNeko@users.noreply.github.com> Date: Mon, 24 Apr 2023 21:47:57 +0200 Subject: [PATCH 40/45] gligen tuple --- comfy/sample.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/comfy/sample.py b/comfy/sample.py index 84eefcb7b..09ab20cd2 100644 --- a/comfy/sample.py +++ b/comfy/sample.py @@ -44,11 +44,10 @@ def get_models_from_cond(cond, model_type): def load_additional_models(positive, negative): """loads additional models in positive and negative conditioning""" - models = [] - models += get_models_from_cond(positive, "control") - models += get_models_from_cond(negative, "control") - models += get_models_from_cond(positive, "gligen") - models += get_models_from_cond(negative, "gligen") + control_nets = get_models_from_cond(positive, "control") + get_models_from_cond(negative, "control") + gligen = get_models_from_cond(positive, "gligen") + get_models_from_cond(negative, "gligen") + gligen = [x[1] for x in gligen] + models = control_nets + gligen comfy.model_management.load_controlnet_gpu(models) return models From 36acce58e71bbe1bf835c2ec380dc7ac0c5b4752 Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Mon, 24 Apr 2023 18:13:18 -0400 Subject: [PATCH 41/45] Auto increase the size of the image upload widget when there's an image. --- web/scripts/widgets.js | 3 +++ 1 file changed, 3 insertions(+) diff --git a/web/scripts/widgets.js b/web/scripts/widgets.js index 2acc5f2c0..238ad59dd 100644 --- a/web/scripts/widgets.js +++ b/web/scripts/widgets.js @@ -270,6 +270,9 @@ export const ComfyWidgets = { app.graph.setDirtyCanvas(true); }; img.src = `/view?filename=${name}&type=input`; + if ((node.size[1] - node.imageOffset) < 100) { + node.size[1] = 250 + node.imageOffset; + } } // Add our own callback to the combo widget to render an image when it changes From 7983b3a975c26b93601c8b6fa9a0a333b35794bd Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Mon, 24 Apr 2023 22:45:35 -0400 Subject: [PATCH 42/45] This is cleaner this way. --- comfy/samplers.py | 59 ++++++++++++++++++++--------------------------- 1 file changed, 25 insertions(+), 34 deletions(-) diff --git a/comfy/samplers.py b/comfy/samplers.py index 46bdb82a0..26597ebba 100644 --- a/comfy/samplers.py +++ b/comfy/samplers.py @@ -400,38 +400,6 @@ def encode_adm(noise_augmentor, conds, batch_size, device): return conds -def calculate_sigmas(model, steps, scheduler, sampler): - """ - Returns a tensor containing the sigmas corresponding to the given model, number of steps, scheduler type and sample technique - """ - if not (isinstance(model, CompVisVDenoiser) or isinstance(model, k_diffusion_external.CompVisDenoiser)): - model = CFGNoisePredictor(model) - if model.inner_model.parameterization == "v": - model = CompVisVDenoiser(model, quantize=True) - else: - model = k_diffusion_external.CompVisDenoiser(model, quantize=True) - - sigmas = None - - discard_penultimate_sigma = False - if sampler in ['dpm_2', 'dpm_2_ancestral']: - steps += 1 - discard_penultimate_sigma = True - - if scheduler == "karras": - sigmas = k_diffusion_sampling.get_sigmas_karras(n=steps, sigma_min=float(model.sigma_min), sigma_max=float(model.sigma_max)) - elif scheduler == "normal": - sigmas = model.get_sigmas(steps) - elif scheduler == "simple": - sigmas = simple_scheduler(model, steps) - elif scheduler == "ddim_uniform": - sigmas = ddim_scheduler(model, steps) - else: - print("error invalid scheduler", scheduler) - - if discard_penultimate_sigma: - sigmas = torch.cat([sigmas[:-2], sigmas[-1:]]) - return sigmas class KSampler: SCHEDULERS = ["karras", "normal", "simple", "ddim_uniform"] @@ -461,13 +429,36 @@ class KSampler: self.denoise = denoise self.model_options = model_options + def calculate_sigmas(self, steps): + sigmas = None + + discard_penultimate_sigma = False + if self.sampler in ['dpm_2', 'dpm_2_ancestral']: + steps += 1 + discard_penultimate_sigma = True + + if self.scheduler == "karras": + sigmas = k_diffusion_sampling.get_sigmas_karras(n=steps, sigma_min=self.sigma_min, sigma_max=self.sigma_max) + elif self.scheduler == "normal": + sigmas = self.model_wrap.get_sigmas(steps) + elif self.scheduler == "simple": + sigmas = simple_scheduler(self.model_wrap, steps) + elif self.scheduler == "ddim_uniform": + sigmas = ddim_scheduler(self.model_wrap, steps) + else: + print("error invalid scheduler", self.scheduler) + + if discard_penultimate_sigma: + sigmas = torch.cat([sigmas[:-2], sigmas[-1:]]) + return sigmas + def set_steps(self, steps, denoise=None): self.steps = steps if denoise is None or denoise > 0.9999: - self.sigmas = calculate_sigmas(self.model_wrap, steps, self.scheduler, self.sampler).to(self.device) + self.sigmas = self.calculate_sigmas(steps).to(self.device) else: new_steps = int(steps/denoise) - sigmas = calculate_sigmas(self.model_wrap, new_steps, self.scheduler, self.sampler).to(self.device) + sigmas = self.calculate_sigmas(new_steps).to(self.device) self.sigmas = sigmas[-(steps + 1):] From c50208a703c6eba2363b08c4cb62e903a3012710 Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Mon, 24 Apr 2023 23:25:51 -0400 Subject: [PATCH 43/45] Refactor more code to sample.py --- comfy/sample.py | 47 ++++++++++++++++++++++++++++++++++++----------- nodes.py | 28 ++++------------------------ 2 files changed, 40 insertions(+), 35 deletions(-) diff --git a/comfy/sample.py b/comfy/sample.py index 09ab20cd2..d6848f9d5 100644 --- a/comfy/sample.py +++ b/comfy/sample.py @@ -1,5 +1,6 @@ import torch import comfy.model_management +import comfy.samplers def prepare_noise(latent_image, seed, skip=0): @@ -13,24 +14,22 @@ def prepare_noise(latent_image, seed, skip=0): noise = torch.randn(latent_image.size(), dtype=latent_image.dtype, layout=latent_image.layout, generator=generator, device="cpu") return noise -def prepare_mask(noise_mask, noise): +def prepare_mask(noise_mask, shape, device): """ensures noise mask is of proper dimensions""" - device = comfy.model_management.get_torch_device() - noise_mask = torch.nn.functional.interpolate(noise_mask[None,None,], size=(noise.shape[2], noise.shape[3]), mode="bilinear") + noise_mask = torch.nn.functional.interpolate(noise_mask[None,None,], size=(shape[2], shape[3]), mode="bilinear") noise_mask = noise_mask.round() - noise_mask = torch.cat([noise_mask] * noise.shape[1], dim=1) - noise_mask = torch.cat([noise_mask] * noise.shape[0]) + noise_mask = torch.cat([noise_mask] * shape[1], dim=1) + noise_mask = torch.cat([noise_mask] * shape[0]) noise_mask = noise_mask.to(device) return noise_mask -def broadcast_cond(cond, noise): - """broadcasts conditioning to the noise batch size""" - device = comfy.model_management.get_torch_device() +def broadcast_cond(cond, batch, device): + """broadcasts conditioning to the batch size""" copy = [] for p in cond: t = p[0] - if t.shape[0] < noise.shape[0]: - t = torch.cat([t] * noise.shape[0]) + if t.shape[0] < batch: + t = torch.cat([t] * batch) t = t.to(device) copy += [[t] + p[1:]] return copy @@ -54,4 +53,30 @@ def load_additional_models(positive, negative): def cleanup_additional_models(models): """cleanup additional models that were loaded""" for m in models: - m.cleanup() \ No newline at end of file + m.cleanup() + +def sample(model, noise, steps, cfg, sampler_name, scheduler, positive, negative, latent_image, denoise=1.0, disable_noise=False, start_step=None, last_step=None, force_full_denoise=False, noise_mask=None, sigmas=None): + device = comfy.model_management.get_torch_device() + + if noise_mask is not None: + noise_mask = prepare_mask(noise_mask, noise.shape, device) + + real_model = None + comfy.model_management.load_model_gpu(model) + real_model = model.model + + noise = noise.to(device) + latent_image = latent_image.to(device) + + positive_copy = broadcast_cond(positive, noise.shape[0], device) + negative_copy = broadcast_cond(negative, noise.shape[0], device) + + models = load_additional_models(positive, negative) + + sampler = comfy.samplers.KSampler(real_model, steps=steps, device=device, sampler=sampler_name, scheduler=scheduler, denoise=denoise, model_options=model.model_options) + + samples = sampler.sample(noise, positive_copy, negative_copy, cfg=cfg, latent_image=latent_image, start_step=start_step, last_step=last_step, force_full_denoise=force_full_denoise, denoise_mask=noise_mask, sigmas=sigmas) + samples = samples.cpu() + + cleanup_additional_models(models) + return samples diff --git a/nodes.py b/nodes.py index f787fcf8a..0083f6ef8 100644 --- a/nodes.py +++ b/nodes.py @@ -752,31 +752,11 @@ def common_ksampler(model, seed, steps, cfg, sampler_name, scheduler, positive, noise_mask = None if "noise_mask" in latent: - noise_mask = comfy.sample.prepare_mask(latent["noise_mask"], noise) - - real_model = None - comfy.model_management.load_model_gpu(model) - real_model = model.model - - noise = noise.to(device) - latent_image = latent_image.to(device) - - positive_copy = comfy.sample.broadcast_cond(positive, noise) - negative_copy = comfy.sample.broadcast_cond(negative, noise) - - models = comfy.sample.load_additional_models(positive, negative) - - if sampler_name in comfy.samplers.KSampler.SAMPLERS: - sampler = comfy.samplers.KSampler(real_model, steps=steps, device=device, sampler=sampler_name, scheduler=scheduler, denoise=denoise, model_options=model.model_options) - else: - #other samplers - pass - - samples = sampler.sample(noise, positive_copy, negative_copy, cfg=cfg, latent_image=latent_image, start_step=start_step, last_step=last_step, force_full_denoise=force_full_denoise, denoise_mask=noise_mask) - samples = samples.cpu() - - comfy.sample.cleanup_additional_models(models) + noise_mask = latent["noise_mask"] + samples = comfy.sample.sample(model, noise, steps, cfg, sampler_name, scheduler, positive, negative, latent_image, + denoise=denoise, disable_noise=disable_noise, start_step=start_step, last_step=last_step, + force_full_denoise=force_full_denoise, noise_mask=noise_mask) out = latent.copy() out["samples"] = samples return (out, ) From aa57136dae83887e005ab6b0222dce4667b61bee Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Tue, 25 Apr 2023 01:12:40 -0400 Subject: [PATCH 44/45] Some fixes to the batch masks PR. --- comfy/sample.py | 7 ++++--- nodes.py | 10 +++------- 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/comfy/sample.py b/comfy/sample.py index d6848f9d5..5e4d26142 100644 --- a/comfy/sample.py +++ b/comfy/sample.py @@ -1,7 +1,7 @@ import torch import comfy.model_management import comfy.samplers - +import math def prepare_noise(latent_image, seed, skip=0): """ @@ -16,10 +16,11 @@ def prepare_noise(latent_image, seed, skip=0): def prepare_mask(noise_mask, shape, device): """ensures noise mask is of proper dimensions""" - noise_mask = torch.nn.functional.interpolate(noise_mask[None,None,], size=(shape[2], shape[3]), mode="bilinear") + noise_mask = torch.nn.functional.interpolate(noise_mask.reshape((-1, 1, noise_mask.shape[-2], noise_mask.shape[-1])), size=(shape[2], shape[3]), mode="bilinear") noise_mask = noise_mask.round() noise_mask = torch.cat([noise_mask] * shape[1], dim=1) - noise_mask = torch.cat([noise_mask] * shape[0]) + if noise_mask.shape[0] < shape[0]: + noise_mask = noise_mask.repeat(math.ceil(shape[0] / noise_mask.shape[0]), 1, 1, 1)[:shape[0]] noise_mask = noise_mask.to(device) return noise_mask diff --git a/nodes.py b/nodes.py index b0b61d676..0a9513bed 100644 --- a/nodes.py +++ b/nodes.py @@ -172,16 +172,12 @@ class VAEEncodeForInpaint: def encode(self, vae, pixels, mask): x = (pixels.shape[1] // 64) * 64 y = (pixels.shape[2] // 64) * 64 - if len(mask.shape) < 3: - mask = mask.unsqueeze(0).unsqueeze(0) - elif len(mask.shape) < 4: - mask = mask.unsqueeze(1) - mask = torch.nn.functional.interpolate(mask, size=(pixels.shape[1], pixels.shape[2]), mode="bilinear") + mask = torch.nn.functional.interpolate(mask.reshape((-1, 1, mask.shape[-2], mask.shape[-1])), size=(pixels.shape[1], pixels.shape[2]), mode="bilinear") pixels = pixels.clone() if pixels.shape[1] != x or pixels.shape[2] != y: pixels = pixels[:,:x,:y,:] - mask = mask[:,:x,:y,:] + mask = mask[:,:,:x,:y] #grow mask by a few pixels to keep things seamless in latent space kernel_tensor = torch.ones((1, 1, 6, 6)) @@ -193,7 +189,7 @@ class VAEEncodeForInpaint: pixels[:,:,:,i] += 0.5 t = vae.encode(pixels) - return ({"samples":t, "noise_mask": (mask_erosion[:,:x,:y,:].round())}, ) + return ({"samples":t, "noise_mask": (mask_erosion[:,:,:x,:y].round())}, ) class CheckpointLoader: @classmethod From 07194297fd41729f8b95352a710b9039ca2c99e8 Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Tue, 25 Apr 2023 14:02:17 -0400 Subject: [PATCH 45/45] Python 3.7 support. --- comfy_extras/chainner_models/architecture/block.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/comfy_extras/chainner_models/architecture/block.py b/comfy_extras/chainner_models/architecture/block.py index 1abe1ed8f..214642cc4 100644 --- a/comfy_extras/chainner_models/architecture/block.py +++ b/comfy_extras/chainner_models/architecture/block.py @@ -4,7 +4,10 @@ from __future__ import annotations from collections import OrderedDict -from typing import Literal +try: + from typing import Literal +except ImportError: + from typing_extensions import Literal import torch import torch.nn as nn