mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2025-12-16 09:42:29 +08:00
Changes to the previous radiance commit. (#9851)
This commit is contained in:
parent
c1297f4eb3
commit
80b7c9455b
@ -306,8 +306,9 @@ class ChromaRadiance(Chroma):
|
|||||||
|
|
||||||
params = self.radiance_get_override_params(transformer_options.get("chroma_radiance_options", {}))
|
params = self.radiance_get_override_params(transformer_options.get("chroma_radiance_options", {}))
|
||||||
|
|
||||||
h_len = ((h + (self.patch_size // 2)) // self.patch_size)
|
h_len = (img.shape[-2] // self.patch_size)
|
||||||
w_len = ((w + (self.patch_size // 2)) // self.patch_size)
|
w_len = (img.shape[-1] // self.patch_size)
|
||||||
|
|
||||||
img_ids = torch.zeros((h_len, w_len, 3), device=x.device, dtype=x.dtype)
|
img_ids = torch.zeros((h_len, w_len, 3), device=x.device, dtype=x.dtype)
|
||||||
img_ids[:, :, 1] = img_ids[:, :, 1] + torch.linspace(0, h_len - 1, steps=h_len, device=x.device, dtype=x.dtype).unsqueeze(1)
|
img_ids[:, :, 1] = img_ids[:, :, 1] + torch.linspace(0, h_len - 1, steps=h_len, device=x.device, dtype=x.dtype).unsqueeze(1)
|
||||||
img_ids[:, :, 2] = img_ids[:, :, 2] + torch.linspace(0, w_len - 1, steps=w_len, device=x.device, dtype=x.dtype).unsqueeze(0)
|
img_ids[:, :, 2] = img_ids[:, :, 2] + torch.linspace(0, w_len - 1, steps=w_len, device=x.device, dtype=x.dtype).unsqueeze(0)
|
||||||
@ -325,4 +326,4 @@ class ChromaRadiance(Chroma):
|
|||||||
transformer_options,
|
transformer_options,
|
||||||
attn_mask=kwargs.get("attention_mask", None),
|
attn_mask=kwargs.get("attention_mask", None),
|
||||||
)
|
)
|
||||||
return self.forward_nerf(img, img_out, params)
|
return self.forward_nerf(img, img_out, params)[:, :, :h, :w]
|
||||||
|
|||||||
16
comfy/pixel_space_convert.py
Normal file
16
comfy/pixel_space_convert.py
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
import torch
|
||||||
|
|
||||||
|
|
||||||
|
# "Fake" VAE that converts from IMAGE B, H, W, C and values on the scale of 0..1
|
||||||
|
# to LATENT B, C, H, W and values on the scale of -1..1.
|
||||||
|
class PixelspaceConversionVAE(torch.nn.Module):
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__()
|
||||||
|
self.pixel_space_vae = torch.nn.Parameter(torch.tensor(1.0))
|
||||||
|
|
||||||
|
def encode(self, pixels: torch.Tensor, *_args, **_kwargs) -> torch.Tensor:
|
||||||
|
return pixels
|
||||||
|
|
||||||
|
def decode(self, samples: torch.Tensor, *_args, **_kwargs) -> torch.Tensor:
|
||||||
|
return samples
|
||||||
|
|
||||||
69
comfy/sd.py
69
comfy/sd.py
@ -18,6 +18,7 @@ import comfy.ldm.wan.vae2_2
|
|||||||
import comfy.ldm.hunyuan3d.vae
|
import comfy.ldm.hunyuan3d.vae
|
||||||
import comfy.ldm.ace.vae.music_dcae_pipeline
|
import comfy.ldm.ace.vae.music_dcae_pipeline
|
||||||
import comfy.ldm.hunyuan_video.vae
|
import comfy.ldm.hunyuan_video.vae
|
||||||
|
import comfy.pixel_space_convert
|
||||||
import yaml
|
import yaml
|
||||||
import math
|
import math
|
||||||
import os
|
import os
|
||||||
@ -516,6 +517,15 @@ class VAE:
|
|||||||
self.working_dtypes = [torch.bfloat16, torch.float16, torch.float32]
|
self.working_dtypes = [torch.bfloat16, torch.float16, torch.float32]
|
||||||
self.disable_offload = True
|
self.disable_offload = True
|
||||||
self.extra_1d_channel = 16
|
self.extra_1d_channel = 16
|
||||||
|
elif "pixel_space_vae" in sd:
|
||||||
|
self.first_stage_model = comfy.pixel_space_convert.PixelspaceConversionVAE()
|
||||||
|
self.memory_used_encode = lambda shape, dtype: (1 * shape[2] * shape[3]) * model_management.dtype_size(dtype)
|
||||||
|
self.memory_used_decode = lambda shape, dtype: (1 * shape[2] * shape[3]) * model_management.dtype_size(dtype)
|
||||||
|
self.downscale_ratio = 1
|
||||||
|
self.upscale_ratio = 1
|
||||||
|
self.latent_channels = 3
|
||||||
|
self.latent_dim = 2
|
||||||
|
self.output_channels = 3
|
||||||
else:
|
else:
|
||||||
logging.warning("WARNING: No VAE weights detected, VAE not initalized.")
|
logging.warning("WARNING: No VAE weights detected, VAE not initalized.")
|
||||||
self.first_stage_model = None
|
self.first_stage_model = None
|
||||||
@ -785,65 +795,6 @@ class VAE:
|
|||||||
except:
|
except:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
# "Fake" VAE that converts from IMAGE B, H, W, C and values on the scale of 0..1
|
|
||||||
# to LATENT B, C, H, W and values on the scale of -1..1.
|
|
||||||
class PixelspaceConversionVAE:
|
|
||||||
def __init__(self, size_increment: int=16):
|
|
||||||
self.intermediate_device = comfy.model_management.intermediate_device()
|
|
||||||
self.size_increment = size_increment
|
|
||||||
|
|
||||||
def vae_encode_crop_pixels(self, pixels: torch.Tensor) -> torch.Tensor:
|
|
||||||
if self.size_increment == 1:
|
|
||||||
return pixels
|
|
||||||
dims = pixels.shape[1:-1]
|
|
||||||
for d in range(len(dims)):
|
|
||||||
d_adj = (dims[d] // self.size_increment) * self.size_increment
|
|
||||||
if d_adj == d:
|
|
||||||
continue
|
|
||||||
d_offset = (dims[d] % self.size_increment) // 2
|
|
||||||
pixels = pixels.narrow(d + 1, d_offset, d_adj)
|
|
||||||
return pixels
|
|
||||||
|
|
||||||
def encode(self, pixels: torch.Tensor, *_args, **_kwargs) -> torch.Tensor:
|
|
||||||
if pixels.ndim == 3:
|
|
||||||
pixels = pixels.unsqueeze(0)
|
|
||||||
elif pixels.ndim != 4:
|
|
||||||
raise ValueError("Unexpected input image shape")
|
|
||||||
# Ensure the image has spatial dimensions that are multiples of 16.
|
|
||||||
pixels = self.vae_encode_crop_pixels(pixels)
|
|
||||||
h, w, c = pixels.shape[1:]
|
|
||||||
if h < self.size_increment or w < self.size_increment:
|
|
||||||
raise ValueError(f"Image inputs must have height/width of at least {self.size_increment} pixel(s).")
|
|
||||||
pixels= pixels[..., :3]
|
|
||||||
if c == 1:
|
|
||||||
pixels = pixels.expand(-1, -1, -1, 3)
|
|
||||||
elif c != 3:
|
|
||||||
raise ValueError("Unexpected number of channels in input image")
|
|
||||||
# Rescale to -1..1 and move the channel dimension to position 1.
|
|
||||||
latent = pixels.to(device=self.intermediate_device, dtype=torch.float32, copy=True)
|
|
||||||
latent = latent.clamp_(0, 1).movedim(-1, 1).contiguous()
|
|
||||||
latent -= 0.5
|
|
||||||
latent *= 2
|
|
||||||
return latent.clamp_(-1, 1)
|
|
||||||
|
|
||||||
def decode(self, samples: torch.Tensor, *_args, **_kwargs) -> torch.Tensor:
|
|
||||||
# Rescale to 0..1 and move the channel dimension to the end.
|
|
||||||
img = samples.to(device=self.intermediate_device, dtype=torch.float32, copy=True)
|
|
||||||
img = img.clamp_(-1, 1).movedim(1, -1).contiguous()
|
|
||||||
img += 1.0
|
|
||||||
img *= 0.5
|
|
||||||
return img.clamp_(0, 1)
|
|
||||||
|
|
||||||
encode_tiled = encode
|
|
||||||
decode_tiled = decode
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def spacial_compression_decode(cls) -> int:
|
|
||||||
# This just exists so the tiled VAE nodes don't crash.
|
|
||||||
return 1
|
|
||||||
|
|
||||||
spacial_compression_encode = spacial_compression_decode
|
|
||||||
temporal_compression_decode = spacial_compression_decode
|
|
||||||
|
|
||||||
class StyleModel:
|
class StyleModel:
|
||||||
def __init__(self, model, device="cpu"):
|
def __init__(self, model, device="cpu"):
|
||||||
|
|||||||
@ -1213,7 +1213,7 @@ class ChromaRadiance(Chroma):
|
|||||||
latent_format = comfy.latent_formats.ChromaRadiance
|
latent_format = comfy.latent_formats.ChromaRadiance
|
||||||
|
|
||||||
# Pixel-space model, no spatial compression for model input.
|
# Pixel-space model, no spatial compression for model input.
|
||||||
memory_usage_factor = 0.0325
|
memory_usage_factor = 0.038
|
||||||
|
|
||||||
def get_model(self, state_dict, prefix="", device=None):
|
def get_model(self, state_dict, prefix="", device=None):
|
||||||
return model_base.ChromaRadiance(self, device=device)
|
return model_base.ChromaRadiance(self, device=device)
|
||||||
|
|||||||
7
nodes.py
7
nodes.py
@ -730,7 +730,7 @@ class VAELoader:
|
|||||||
vaes.append("taesd3")
|
vaes.append("taesd3")
|
||||||
if f1_taesd_dec and f1_taesd_enc:
|
if f1_taesd_dec and f1_taesd_enc:
|
||||||
vaes.append("taef1")
|
vaes.append("taef1")
|
||||||
vaes.append("chroma_radiance")
|
vaes.append("pixel_space")
|
||||||
return vaes
|
return vaes
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@ -773,8 +773,9 @@ class VAELoader:
|
|||||||
|
|
||||||
#TODO: scale factor?
|
#TODO: scale factor?
|
||||||
def load_vae(self, vae_name):
|
def load_vae(self, vae_name):
|
||||||
if vae_name == "chroma_radiance":
|
if vae_name == "pixel_space":
|
||||||
return (comfy.sd.PixelspaceConversionVAE(),)
|
sd = {}
|
||||||
|
sd["pixel_space_vae"] = torch.tensor(1.0)
|
||||||
elif vae_name in ["taesd", "taesdxl", "taesd3", "taef1"]:
|
elif vae_name in ["taesd", "taesdxl", "taesd3", "taef1"]:
|
||||||
sd = self.load_taesd(vae_name)
|
sd = self.load_taesd(vae_name)
|
||||||
else:
|
else:
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user