From c8bfec86b3d1a548008c6a7a8cd81bdb447d9787 Mon Sep 17 00:00:00 2001 From: AIGCZero Date: Mon, 15 Sep 2025 13:50:49 +0800 Subject: [PATCH] Add TextEncodeQwenImageEdit node with intelligent scaling - Implements Qwen image editing functionality with CLIP text encoding - Features intelligent scaling algorithm selection: - Uses 'area' method for downscaling to preserve details - Uses 'lanczos' method for upscaling for better quality - Supports optional VAE encoding for reference latents - Maintains aspect ratio with 'disabled' crop method - Scales images to target resolution (1024x1024 pixels) with 8-pixel alignment --- comfy_extras/nodes_qwen.py | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/comfy_extras/nodes_qwen.py b/comfy_extras/nodes_qwen.py index fff89556f..afba40069 100644 --- a/comfy_extras/nodes_qwen.py +++ b/comfy_extras/nodes_qwen.py @@ -1,4 +1,4 @@ -import node_helpers +import node_helpers import comfy.utils import math @@ -13,13 +13,15 @@ class TextEncodeQwenImageEdit: "optional": {"vae": ("VAE", ), "image": ("IMAGE", ),}} - RETURN_TYPES = ("CONDITIONING",) + RETURN_TYPES = ("CONDITIONING", "IMAGE", "LATENT") FUNCTION = "encode" CATEGORY = "advanced/conditioning" def encode(self, clip, prompt, vae=None, image=None): ref_latent = None + output_image = None + if image is None: images = [] else: @@ -27,12 +29,23 @@ class TextEncodeQwenImageEdit: total = int(1024 * 1024) scale_by = math.sqrt(total / (samples.shape[3] * samples.shape[2])) - width = round(samples.shape[3] * scale_by) - height = round(samples.shape[2] * scale_by) - s = comfy.utils.common_upscale(samples, width, height, "area", "disabled") + width = math.floor(samples.shape[3] * scale_by / 8) * 8 + height = math.floor(samples.shape[2] * scale_by / 8) * 8 + + original_width = samples.shape[3] + original_height = samples.shape[2] + + if width < original_width or height < original_height: + upscale_method = "area" + else: + upscale_method = "lanczos" + + s = comfy.utils.common_upscale(samples, width, height, upscale_method, "disabled") image = s.movedim(1, -1) images = [image[:, :, :, :3]] + output_image = image[:, :, :, :3] + if vae is not None: ref_latent = vae.encode(image[:, :, :, :3]) @@ -40,7 +53,10 @@ class TextEncodeQwenImageEdit: conditioning = clip.encode_from_tokens_scheduled(tokens) if ref_latent is not None: conditioning = node_helpers.conditioning_set_values(conditioning, {"reference_latents": [ref_latent]}, append=True) - return (conditioning, ) + + latent_output = {"samples": ref_latent} if ref_latent is not None else None + + return (conditioning, output_image, latent_output) NODE_CLASS_MAPPINGS = {