From 65549c39f19a82253e1ef7f1899df2904daa2523 Mon Sep 17 00:00:00 2001 From: doctorpangloss <@hiddenswitch.com> Date: Mon, 29 Jan 2024 14:14:46 -0800 Subject: [PATCH] Remove unused configs contents --- comfy/cmd/folder_paths.py | 2 - models/configs/anything_v3.yaml | 73 -------- models/configs/v1-inference.yaml | 70 -------- models/configs/v1-inference_clip_skip_2.yaml | 73 -------- .../v1-inference_clip_skip_2_fp16.yaml | 74 -------- models/configs/v1-inference_fp16.yaml | 71 -------- models/configs/v1-inpainting-inference.yaml | 71 -------- models/configs/v2-inference-v.yaml | 68 -------- models/configs/v2-inference-v_fp32.yaml | 68 -------- models/configs/v2-inference.yaml | 67 -------- models/configs/v2-inference_fp32.yaml | 67 -------- models/configs/v2-inpainting-inference.yaml | 158 ------------------ 12 files changed, 862 deletions(-) delete mode 100644 models/configs/anything_v3.yaml delete mode 100644 models/configs/v1-inference.yaml delete mode 100644 models/configs/v1-inference_clip_skip_2.yaml delete mode 100644 models/configs/v1-inference_clip_skip_2_fp16.yaml delete mode 100644 models/configs/v1-inference_fp16.yaml delete mode 100644 models/configs/v1-inpainting-inference.yaml delete mode 100644 models/configs/v2-inference-v.yaml delete mode 100644 models/configs/v2-inference-v_fp32.yaml delete mode 100644 models/configs/v2-inference.yaml delete mode 100644 models/configs/v2-inference_fp32.yaml delete mode 100644 models/configs/v2-inpainting-inference.yaml diff --git a/comfy/cmd/folder_paths.py b/comfy/cmd/folder_paths.py index eed7577dd..678d6d080 100644 --- a/comfy/cmd/folder_paths.py +++ b/comfy/cmd/folder_paths.py @@ -12,8 +12,6 @@ else: base_path = os.getcwd() models_dir = os.path.join(base_path, "models") folder_names_and_paths["checkpoints"] = ([os.path.join(models_dir, "checkpoints")], supported_pt_extensions) -folder_names_and_paths["configs"] = ([os.path.join(models_dir, "configs")], [".yaml"]) - folder_names_and_paths["loras"] = ([os.path.join(models_dir, "loras")], supported_pt_extensions) folder_names_and_paths["vae"] = ([os.path.join(models_dir, "vae")], supported_pt_extensions) folder_names_and_paths["clip"] = ([os.path.join(models_dir, "clip")], supported_pt_extensions) diff --git a/models/configs/anything_v3.yaml b/models/configs/anything_v3.yaml deleted file mode 100644 index 8bcfe584a..000000000 --- a/models/configs/anything_v3.yaml +++ /dev/null @@ -1,73 +0,0 @@ -model: - base_learning_rate: 1.0e-04 - target: ldm.models.diffusion.ddpm.LatentDiffusion - params: - linear_start: 0.00085 - linear_end: 0.0120 - num_timesteps_cond: 1 - log_every_t: 200 - timesteps: 1000 - first_stage_key: "jpg" - cond_stage_key: "txt" - image_size: 64 - channels: 4 - cond_stage_trainable: false # Note: different from the one we trained before - conditioning_key: crossattn - monitor: val/loss_simple_ema - scale_factor: 0.18215 - use_ema: False - - scheduler_config: # 10000 warmup steps - target: ldm.lr_scheduler.LambdaLinearScheduler - params: - warm_up_steps: [ 10000 ] - cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases - f_start: [ 1.e-6 ] - f_max: [ 1. ] - f_min: [ 1. ] - - unet_config: - target: ldm.modules.diffusionmodules.openaimodel.UNetModel - params: - image_size: 32 # unused - in_channels: 4 - out_channels: 4 - model_channels: 320 - attention_resolutions: [ 4, 2, 1 ] - num_res_blocks: 2 - channel_mult: [ 1, 2, 4, 4 ] - num_heads: 8 - use_spatial_transformer: True - transformer_depth: 1 - context_dim: 768 - use_checkpoint: True - legacy: False - - first_stage_config: - target: ldm.models.autoencoder.AutoencoderKL - params: - embed_dim: 4 - monitor: val/rec_loss - ddconfig: - double_z: true - z_channels: 4 - resolution: 256 - in_channels: 3 - out_ch: 3 - ch: 128 - ch_mult: - - 1 - - 2 - - 4 - - 4 - num_res_blocks: 2 - attn_resolutions: [] - dropout: 0.0 - lossconfig: - target: torch.nn.Identity - - cond_stage_config: - target: ldm.modules.encoders.modules.FrozenCLIPEmbedder - params: - layer: "hidden" - layer_idx: -2 diff --git a/models/configs/v1-inference.yaml b/models/configs/v1-inference.yaml deleted file mode 100644 index d4effe569..000000000 --- a/models/configs/v1-inference.yaml +++ /dev/null @@ -1,70 +0,0 @@ -model: - base_learning_rate: 1.0e-04 - target: ldm.models.diffusion.ddpm.LatentDiffusion - params: - linear_start: 0.00085 - linear_end: 0.0120 - num_timesteps_cond: 1 - log_every_t: 200 - timesteps: 1000 - first_stage_key: "jpg" - cond_stage_key: "txt" - image_size: 64 - channels: 4 - cond_stage_trainable: false # Note: different from the one we trained before - conditioning_key: crossattn - monitor: val/loss_simple_ema - scale_factor: 0.18215 - use_ema: False - - scheduler_config: # 10000 warmup steps - target: ldm.lr_scheduler.LambdaLinearScheduler - params: - warm_up_steps: [ 10000 ] - cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases - f_start: [ 1.e-6 ] - f_max: [ 1. ] - f_min: [ 1. ] - - unet_config: - target: ldm.modules.diffusionmodules.openaimodel.UNetModel - params: - image_size: 32 # unused - in_channels: 4 - out_channels: 4 - model_channels: 320 - attention_resolutions: [ 4, 2, 1 ] - num_res_blocks: 2 - channel_mult: [ 1, 2, 4, 4 ] - num_heads: 8 - use_spatial_transformer: True - transformer_depth: 1 - context_dim: 768 - use_checkpoint: True - legacy: False - - first_stage_config: - target: ldm.models.autoencoder.AutoencoderKL - params: - embed_dim: 4 - monitor: val/rec_loss - ddconfig: - double_z: true - z_channels: 4 - resolution: 256 - in_channels: 3 - out_ch: 3 - ch: 128 - ch_mult: - - 1 - - 2 - - 4 - - 4 - num_res_blocks: 2 - attn_resolutions: [] - dropout: 0.0 - lossconfig: - target: torch.nn.Identity - - cond_stage_config: - target: ldm.modules.encoders.modules.FrozenCLIPEmbedder diff --git a/models/configs/v1-inference_clip_skip_2.yaml b/models/configs/v1-inference_clip_skip_2.yaml deleted file mode 100644 index 8bcfe584a..000000000 --- a/models/configs/v1-inference_clip_skip_2.yaml +++ /dev/null @@ -1,73 +0,0 @@ -model: - base_learning_rate: 1.0e-04 - target: ldm.models.diffusion.ddpm.LatentDiffusion - params: - linear_start: 0.00085 - linear_end: 0.0120 - num_timesteps_cond: 1 - log_every_t: 200 - timesteps: 1000 - first_stage_key: "jpg" - cond_stage_key: "txt" - image_size: 64 - channels: 4 - cond_stage_trainable: false # Note: different from the one we trained before - conditioning_key: crossattn - monitor: val/loss_simple_ema - scale_factor: 0.18215 - use_ema: False - - scheduler_config: # 10000 warmup steps - target: ldm.lr_scheduler.LambdaLinearScheduler - params: - warm_up_steps: [ 10000 ] - cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases - f_start: [ 1.e-6 ] - f_max: [ 1. ] - f_min: [ 1. ] - - unet_config: - target: ldm.modules.diffusionmodules.openaimodel.UNetModel - params: - image_size: 32 # unused - in_channels: 4 - out_channels: 4 - model_channels: 320 - attention_resolutions: [ 4, 2, 1 ] - num_res_blocks: 2 - channel_mult: [ 1, 2, 4, 4 ] - num_heads: 8 - use_spatial_transformer: True - transformer_depth: 1 - context_dim: 768 - use_checkpoint: True - legacy: False - - first_stage_config: - target: ldm.models.autoencoder.AutoencoderKL - params: - embed_dim: 4 - monitor: val/rec_loss - ddconfig: - double_z: true - z_channels: 4 - resolution: 256 - in_channels: 3 - out_ch: 3 - ch: 128 - ch_mult: - - 1 - - 2 - - 4 - - 4 - num_res_blocks: 2 - attn_resolutions: [] - dropout: 0.0 - lossconfig: - target: torch.nn.Identity - - cond_stage_config: - target: ldm.modules.encoders.modules.FrozenCLIPEmbedder - params: - layer: "hidden" - layer_idx: -2 diff --git a/models/configs/v1-inference_clip_skip_2_fp16.yaml b/models/configs/v1-inference_clip_skip_2_fp16.yaml deleted file mode 100644 index 7eca31c7b..000000000 --- a/models/configs/v1-inference_clip_skip_2_fp16.yaml +++ /dev/null @@ -1,74 +0,0 @@ -model: - base_learning_rate: 1.0e-04 - target: ldm.models.diffusion.ddpm.LatentDiffusion - params: - linear_start: 0.00085 - linear_end: 0.0120 - num_timesteps_cond: 1 - log_every_t: 200 - timesteps: 1000 - first_stage_key: "jpg" - cond_stage_key: "txt" - image_size: 64 - channels: 4 - cond_stage_trainable: false # Note: different from the one we trained before - conditioning_key: crossattn - monitor: val/loss_simple_ema - scale_factor: 0.18215 - use_ema: False - - scheduler_config: # 10000 warmup steps - target: ldm.lr_scheduler.LambdaLinearScheduler - params: - warm_up_steps: [ 10000 ] - cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases - f_start: [ 1.e-6 ] - f_max: [ 1. ] - f_min: [ 1. ] - - unet_config: - target: ldm.modules.diffusionmodules.openaimodel.UNetModel - params: - use_fp16: True - image_size: 32 # unused - in_channels: 4 - out_channels: 4 - model_channels: 320 - attention_resolutions: [ 4, 2, 1 ] - num_res_blocks: 2 - channel_mult: [ 1, 2, 4, 4 ] - num_heads: 8 - use_spatial_transformer: True - transformer_depth: 1 - context_dim: 768 - use_checkpoint: True - legacy: False - - first_stage_config: - target: ldm.models.autoencoder.AutoencoderKL - params: - embed_dim: 4 - monitor: val/rec_loss - ddconfig: - double_z: true - z_channels: 4 - resolution: 256 - in_channels: 3 - out_ch: 3 - ch: 128 - ch_mult: - - 1 - - 2 - - 4 - - 4 - num_res_blocks: 2 - attn_resolutions: [] - dropout: 0.0 - lossconfig: - target: torch.nn.Identity - - cond_stage_config: - target: ldm.modules.encoders.modules.FrozenCLIPEmbedder - params: - layer: "hidden" - layer_idx: -2 diff --git a/models/configs/v1-inference_fp16.yaml b/models/configs/v1-inference_fp16.yaml deleted file mode 100644 index 147f42b17..000000000 --- a/models/configs/v1-inference_fp16.yaml +++ /dev/null @@ -1,71 +0,0 @@ -model: - base_learning_rate: 1.0e-04 - target: ldm.models.diffusion.ddpm.LatentDiffusion - params: - linear_start: 0.00085 - linear_end: 0.0120 - num_timesteps_cond: 1 - log_every_t: 200 - timesteps: 1000 - first_stage_key: "jpg" - cond_stage_key: "txt" - image_size: 64 - channels: 4 - cond_stage_trainable: false # Note: different from the one we trained before - conditioning_key: crossattn - monitor: val/loss_simple_ema - scale_factor: 0.18215 - use_ema: False - - scheduler_config: # 10000 warmup steps - target: ldm.lr_scheduler.LambdaLinearScheduler - params: - warm_up_steps: [ 10000 ] - cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases - f_start: [ 1.e-6 ] - f_max: [ 1. ] - f_min: [ 1. ] - - unet_config: - target: ldm.modules.diffusionmodules.openaimodel.UNetModel - params: - use_fp16: True - image_size: 32 # unused - in_channels: 4 - out_channels: 4 - model_channels: 320 - attention_resolutions: [ 4, 2, 1 ] - num_res_blocks: 2 - channel_mult: [ 1, 2, 4, 4 ] - num_heads: 8 - use_spatial_transformer: True - transformer_depth: 1 - context_dim: 768 - use_checkpoint: True - legacy: False - - first_stage_config: - target: ldm.models.autoencoder.AutoencoderKL - params: - embed_dim: 4 - monitor: val/rec_loss - ddconfig: - double_z: true - z_channels: 4 - resolution: 256 - in_channels: 3 - out_ch: 3 - ch: 128 - ch_mult: - - 1 - - 2 - - 4 - - 4 - num_res_blocks: 2 - attn_resolutions: [] - dropout: 0.0 - lossconfig: - target: torch.nn.Identity - - cond_stage_config: - target: ldm.modules.encoders.modules.FrozenCLIPEmbedder diff --git a/models/configs/v1-inpainting-inference.yaml b/models/configs/v1-inpainting-inference.yaml deleted file mode 100644 index 45f3f82d4..000000000 --- a/models/configs/v1-inpainting-inference.yaml +++ /dev/null @@ -1,71 +0,0 @@ -model: - base_learning_rate: 7.5e-05 - target: ldm.models.diffusion.ddpm.LatentInpaintDiffusion - params: - linear_start: 0.00085 - linear_end: 0.0120 - num_timesteps_cond: 1 - log_every_t: 200 - timesteps: 1000 - first_stage_key: "jpg" - cond_stage_key: "txt" - image_size: 64 - channels: 4 - cond_stage_trainable: false # Note: different from the one we trained before - conditioning_key: hybrid # important - monitor: val/loss_simple_ema - scale_factor: 0.18215 - finetune_keys: null - - scheduler_config: # 10000 warmup steps - target: ldm.lr_scheduler.LambdaLinearScheduler - params: - warm_up_steps: [ 2500 ] # NOTE for resuming. use 10000 if starting from scratch - cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases - f_start: [ 1.e-6 ] - f_max: [ 1. ] - f_min: [ 1. ] - - unet_config: - target: ldm.modules.diffusionmodules.openaimodel.UNetModel - params: - image_size: 32 # unused - in_channels: 9 # 4 data + 4 downscaled image + 1 mask - out_channels: 4 - model_channels: 320 - attention_resolutions: [ 4, 2, 1 ] - num_res_blocks: 2 - channel_mult: [ 1, 2, 4, 4 ] - num_heads: 8 - use_spatial_transformer: True - transformer_depth: 1 - context_dim: 768 - use_checkpoint: True - legacy: False - - first_stage_config: - target: ldm.models.autoencoder.AutoencoderKL - params: - embed_dim: 4 - monitor: val/rec_loss - ddconfig: - double_z: true - z_channels: 4 - resolution: 256 - in_channels: 3 - out_ch: 3 - ch: 128 - ch_mult: - - 1 - - 2 - - 4 - - 4 - num_res_blocks: 2 - attn_resolutions: [] - dropout: 0.0 - lossconfig: - target: torch.nn.Identity - - cond_stage_config: - target: ldm.modules.encoders.modules.FrozenCLIPEmbedder - diff --git a/models/configs/v2-inference-v.yaml b/models/configs/v2-inference-v.yaml deleted file mode 100644 index 8ec8dfbfe..000000000 --- a/models/configs/v2-inference-v.yaml +++ /dev/null @@ -1,68 +0,0 @@ -model: - base_learning_rate: 1.0e-4 - target: ldm.models.diffusion.ddpm.LatentDiffusion - params: - parameterization: "v" - linear_start: 0.00085 - linear_end: 0.0120 - num_timesteps_cond: 1 - log_every_t: 200 - timesteps: 1000 - first_stage_key: "jpg" - cond_stage_key: "txt" - image_size: 64 - channels: 4 - cond_stage_trainable: false - conditioning_key: crossattn - monitor: val/loss_simple_ema - scale_factor: 0.18215 - use_ema: False # we set this to false because this is an inference only config - - unet_config: - target: ldm.modules.diffusionmodules.openaimodel.UNetModel - params: - use_checkpoint: True - use_fp16: True - image_size: 32 # unused - in_channels: 4 - out_channels: 4 - model_channels: 320 - attention_resolutions: [ 4, 2, 1 ] - num_res_blocks: 2 - channel_mult: [ 1, 2, 4, 4 ] - num_head_channels: 64 # need to fix for flash-attn - use_spatial_transformer: True - use_linear_in_transformer: True - transformer_depth: 1 - context_dim: 1024 - legacy: False - - first_stage_config: - target: ldm.models.autoencoder.AutoencoderKL - params: - embed_dim: 4 - monitor: val/rec_loss - ddconfig: - #attn_type: "vanilla-xformers" - double_z: true - z_channels: 4 - resolution: 256 - in_channels: 3 - out_ch: 3 - ch: 128 - ch_mult: - - 1 - - 2 - - 4 - - 4 - num_res_blocks: 2 - attn_resolutions: [] - dropout: 0.0 - lossconfig: - target: torch.nn.Identity - - cond_stage_config: - target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder - params: - freeze: True - layer: "penultimate" diff --git a/models/configs/v2-inference-v_fp32.yaml b/models/configs/v2-inference-v_fp32.yaml deleted file mode 100644 index d5c9b9cb2..000000000 --- a/models/configs/v2-inference-v_fp32.yaml +++ /dev/null @@ -1,68 +0,0 @@ -model: - base_learning_rate: 1.0e-4 - target: ldm.models.diffusion.ddpm.LatentDiffusion - params: - parameterization: "v" - linear_start: 0.00085 - linear_end: 0.0120 - num_timesteps_cond: 1 - log_every_t: 200 - timesteps: 1000 - first_stage_key: "jpg" - cond_stage_key: "txt" - image_size: 64 - channels: 4 - cond_stage_trainable: false - conditioning_key: crossattn - monitor: val/loss_simple_ema - scale_factor: 0.18215 - use_ema: False # we set this to false because this is an inference only config - - unet_config: - target: ldm.modules.diffusionmodules.openaimodel.UNetModel - params: - use_checkpoint: True - use_fp16: False - image_size: 32 # unused - in_channels: 4 - out_channels: 4 - model_channels: 320 - attention_resolutions: [ 4, 2, 1 ] - num_res_blocks: 2 - channel_mult: [ 1, 2, 4, 4 ] - num_head_channels: 64 # need to fix for flash-attn - use_spatial_transformer: True - use_linear_in_transformer: True - transformer_depth: 1 - context_dim: 1024 - legacy: False - - first_stage_config: - target: ldm.models.autoencoder.AutoencoderKL - params: - embed_dim: 4 - monitor: val/rec_loss - ddconfig: - #attn_type: "vanilla-xformers" - double_z: true - z_channels: 4 - resolution: 256 - in_channels: 3 - out_ch: 3 - ch: 128 - ch_mult: - - 1 - - 2 - - 4 - - 4 - num_res_blocks: 2 - attn_resolutions: [] - dropout: 0.0 - lossconfig: - target: torch.nn.Identity - - cond_stage_config: - target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder - params: - freeze: True - layer: "penultimate" diff --git a/models/configs/v2-inference.yaml b/models/configs/v2-inference.yaml deleted file mode 100644 index 152c4f3c2..000000000 --- a/models/configs/v2-inference.yaml +++ /dev/null @@ -1,67 +0,0 @@ -model: - base_learning_rate: 1.0e-4 - target: ldm.models.diffusion.ddpm.LatentDiffusion - params: - linear_start: 0.00085 - linear_end: 0.0120 - num_timesteps_cond: 1 - log_every_t: 200 - timesteps: 1000 - first_stage_key: "jpg" - cond_stage_key: "txt" - image_size: 64 - channels: 4 - cond_stage_trainable: false - conditioning_key: crossattn - monitor: val/loss_simple_ema - scale_factor: 0.18215 - use_ema: False # we set this to false because this is an inference only config - - unet_config: - target: ldm.modules.diffusionmodules.openaimodel.UNetModel - params: - use_checkpoint: True - use_fp16: True - image_size: 32 # unused - in_channels: 4 - out_channels: 4 - model_channels: 320 - attention_resolutions: [ 4, 2, 1 ] - num_res_blocks: 2 - channel_mult: [ 1, 2, 4, 4 ] - num_head_channels: 64 # need to fix for flash-attn - use_spatial_transformer: True - use_linear_in_transformer: True - transformer_depth: 1 - context_dim: 1024 - legacy: False - - first_stage_config: - target: ldm.models.autoencoder.AutoencoderKL - params: - embed_dim: 4 - monitor: val/rec_loss - ddconfig: - #attn_type: "vanilla-xformers" - double_z: true - z_channels: 4 - resolution: 256 - in_channels: 3 - out_ch: 3 - ch: 128 - ch_mult: - - 1 - - 2 - - 4 - - 4 - num_res_blocks: 2 - attn_resolutions: [] - dropout: 0.0 - lossconfig: - target: torch.nn.Identity - - cond_stage_config: - target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder - params: - freeze: True - layer: "penultimate" diff --git a/models/configs/v2-inference_fp32.yaml b/models/configs/v2-inference_fp32.yaml deleted file mode 100644 index 0d03231f3..000000000 --- a/models/configs/v2-inference_fp32.yaml +++ /dev/null @@ -1,67 +0,0 @@ -model: - base_learning_rate: 1.0e-4 - target: ldm.models.diffusion.ddpm.LatentDiffusion - params: - linear_start: 0.00085 - linear_end: 0.0120 - num_timesteps_cond: 1 - log_every_t: 200 - timesteps: 1000 - first_stage_key: "jpg" - cond_stage_key: "txt" - image_size: 64 - channels: 4 - cond_stage_trainable: false - conditioning_key: crossattn - monitor: val/loss_simple_ema - scale_factor: 0.18215 - use_ema: False # we set this to false because this is an inference only config - - unet_config: - target: ldm.modules.diffusionmodules.openaimodel.UNetModel - params: - use_checkpoint: True - use_fp16: False - image_size: 32 # unused - in_channels: 4 - out_channels: 4 - model_channels: 320 - attention_resolutions: [ 4, 2, 1 ] - num_res_blocks: 2 - channel_mult: [ 1, 2, 4, 4 ] - num_head_channels: 64 # need to fix for flash-attn - use_spatial_transformer: True - use_linear_in_transformer: True - transformer_depth: 1 - context_dim: 1024 - legacy: False - - first_stage_config: - target: ldm.models.autoencoder.AutoencoderKL - params: - embed_dim: 4 - monitor: val/rec_loss - ddconfig: - #attn_type: "vanilla-xformers" - double_z: true - z_channels: 4 - resolution: 256 - in_channels: 3 - out_ch: 3 - ch: 128 - ch_mult: - - 1 - - 2 - - 4 - - 4 - num_res_blocks: 2 - attn_resolutions: [] - dropout: 0.0 - lossconfig: - target: torch.nn.Identity - - cond_stage_config: - target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder - params: - freeze: True - layer: "penultimate" diff --git a/models/configs/v2-inpainting-inference.yaml b/models/configs/v2-inpainting-inference.yaml deleted file mode 100644 index 32a9471d7..000000000 --- a/models/configs/v2-inpainting-inference.yaml +++ /dev/null @@ -1,158 +0,0 @@ -model: - base_learning_rate: 5.0e-05 - target: ldm.models.diffusion.ddpm.LatentInpaintDiffusion - params: - linear_start: 0.00085 - linear_end: 0.0120 - num_timesteps_cond: 1 - log_every_t: 200 - timesteps: 1000 - first_stage_key: "jpg" - cond_stage_key: "txt" - image_size: 64 - channels: 4 - cond_stage_trainable: false - conditioning_key: hybrid - scale_factor: 0.18215 - monitor: val/loss_simple_ema - finetune_keys: null - use_ema: False - - unet_config: - target: ldm.modules.diffusionmodules.openaimodel.UNetModel - params: - use_checkpoint: True - image_size: 32 # unused - in_channels: 9 - out_channels: 4 - model_channels: 320 - attention_resolutions: [ 4, 2, 1 ] - num_res_blocks: 2 - channel_mult: [ 1, 2, 4, 4 ] - num_head_channels: 64 # need to fix for flash-attn - use_spatial_transformer: True - use_linear_in_transformer: True - transformer_depth: 1 - context_dim: 1024 - legacy: False - - first_stage_config: - target: ldm.models.autoencoder.AutoencoderKL - params: - embed_dim: 4 - monitor: val/rec_loss - ddconfig: - #attn_type: "vanilla-xformers" - double_z: true - z_channels: 4 - resolution: 256 - in_channels: 3 - out_ch: 3 - ch: 128 - ch_mult: - - 1 - - 2 - - 4 - - 4 - num_res_blocks: 2 - attn_resolutions: [ ] - dropout: 0.0 - lossconfig: - target: torch.nn.Identity - - cond_stage_config: - target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder - params: - freeze: True - layer: "penultimate" - - -data: - target: ldm.data.laion.WebDataModuleFromConfig - params: - tar_base: null # for concat as in LAION-A - p_unsafe_threshold: 0.1 - filter_word_list: "data/filters.yaml" - max_pwatermark: 0.45 - batch_size: 8 - num_workers: 6 - multinode: True - min_size: 512 - train: - shards: - - "pipe:aws s3 cp s3://stability-aws/laion-a-native/part-0/{00000..18699}.tar -" - - "pipe:aws s3 cp s3://stability-aws/laion-a-native/part-1/{00000..18699}.tar -" - - "pipe:aws s3 cp s3://stability-aws/laion-a-native/part-2/{00000..18699}.tar -" - - "pipe:aws s3 cp s3://stability-aws/laion-a-native/part-3/{00000..18699}.tar -" - - "pipe:aws s3 cp s3://stability-aws/laion-a-native/part-4/{00000..18699}.tar -" #{00000-94333}.tar" - shuffle: 10000 - image_key: jpg - image_transforms: - - target: torchvision.transforms.Resize - params: - size: 512 - interpolation: 3 - - target: torchvision.transforms.RandomCrop - params: - size: 512 - postprocess: - target: ldm.data.laion.AddMask - params: - mode: "512train-large" - p_drop: 0.25 - # NOTE use enough shards to avoid empty validation loops in workers - validation: - shards: - - "pipe:aws s3 cp s3://deep-floyd-s3/datasets/laion_cleaned-part5/{93001..94333}.tar - " - shuffle: 0 - image_key: jpg - image_transforms: - - target: torchvision.transforms.Resize - params: - size: 512 - interpolation: 3 - - target: torchvision.transforms.CenterCrop - params: - size: 512 - postprocess: - target: ldm.data.laion.AddMask - params: - mode: "512train-large" - p_drop: 0.25 - -lightning: - find_unused_parameters: True - modelcheckpoint: - params: - every_n_train_steps: 5000 - - callbacks: - metrics_over_trainsteps_checkpoint: - params: - every_n_train_steps: 10000 - - image_logger: - target: main.ImageLogger - params: - enable_autocast: False - disabled: False - batch_frequency: 1000 - max_images: 4 - increase_log_steps: False - log_first_step: False - log_images_kwargs: - use_ema_scope: False - inpaint: False - plot_progressive_rows: False - plot_diffusion_rows: False - N: 4 - unconditional_guidance_scale: 5.0 - unconditional_guidance_label: [""] - ddim_steps: 50 # todo check these out for depth2img, - ddim_eta: 0.0 # todo check these out for depth2img, - - trainer: - benchmark: True - val_check_interval: 5000000 - num_sanity_val_steps: 0 - accumulate_grad_batches: 1