From c8bfec86b3d1a548008c6a7a8cd81bdb447d9787 Mon Sep 17 00:00:00 2001
From: AIGCZero <zdcxs111@gmail.com>
Date: Mon, 15 Sep 2025 13:50:49 +0800
Subject: [PATCH] Add TextEncodeQwenImageEdit node with intelligent scaling

- Implements Qwen image editing functionality with CLIP text encoding
- Features intelligent scaling algorithm selection:
  - Uses 'area' method for downscaling to preserve details
  - Uses 'lanczos' method for upscaling for better quality
- Supports optional VAE encoding for reference latents
- Maintains aspect ratio with 'disabled' crop method
- Scales images to target resolution (1024x1024 pixels) with 8-pixel alignment
---
 comfy_extras/nodes_qwen.py | 28 ++++++++++++++++++++++------
 1 file changed, 22 insertions(+), 6 deletions(-)

diff --git a/comfy_extras/nodes_qwen.py b/comfy_extras/nodes_qwen.py
index fff89556f..afba40069 100644
--- a/comfy_extras/nodes_qwen.py
+++ b/comfy_extras/nodes_qwen.py
@@ -1,4 +1,4 @@
-import node_helpers
+﻿import node_helpers
 import comfy.utils
 import math
 
@@ -13,13 +13,15 @@ class TextEncodeQwenImageEdit:
             "optional": {"vae": ("VAE", ),
                          "image": ("IMAGE", ),}}
 
-    RETURN_TYPES = ("CONDITIONING",)
+    RETURN_TYPES = ("CONDITIONING", "IMAGE", "LATENT")
     FUNCTION = "encode"
 
     CATEGORY = "advanced/conditioning"
 
     def encode(self, clip, prompt, vae=None, image=None):
         ref_latent = None
+        output_image = None
+        
         if image is None:
             images = []
         else:
@@ -27,12 +29,23 @@ class TextEncodeQwenImageEdit:
             total = int(1024 * 1024)
 
             scale_by = math.sqrt(total / (samples.shape[3] * samples.shape[2]))
-            width = round(samples.shape[3] * scale_by)
-            height = round(samples.shape[2] * scale_by)
 
-            s = comfy.utils.common_upscale(samples, width, height, "area", "disabled")
+            width = math.floor(samples.shape[3] * scale_by / 8) * 8
+            height = math.floor(samples.shape[2] * scale_by / 8) * 8
+
+            original_width = samples.shape[3]
+            original_height = samples.shape[2]
+            
+            if width < original_width or height < original_height:
+                upscale_method = "area"
+            else:
+                upscale_method = "lanczos"
+            
+            s = comfy.utils.common_upscale(samples, width, height, upscale_method, "disabled")
             image = s.movedim(1, -1)
             images = [image[:, :, :, :3]]
+            output_image = image[:, :, :, :3]
+            
             if vae is not None:
                 ref_latent = vae.encode(image[:, :, :, :3])
 
@@ -40,7 +53,10 @@ class TextEncodeQwenImageEdit:
         conditioning = clip.encode_from_tokens_scheduled(tokens)
         if ref_latent is not None:
             conditioning = node_helpers.conditioning_set_values(conditioning, {"reference_latents": [ref_latent]}, append=True)
-        return (conditioning, )
+        
+        latent_output = {"samples": ref_latent} if ref_latent is not None else None
+        
+        return (conditioning, output_image, latent_output)
 
 
 NODE_CLASS_MAPPINGS = {