From 40f8974443974410088e0665702a4f4e9e45d4b3 Mon Sep 17 00:00:00 2001 From: nygupta Date: Sun, 5 Apr 2026 11:03:13 +0530 Subject: [PATCH] initial setup --- comfyui_setup_guide.txt | 135 ++++++++++++++ workflows/anime_image.json | 371 +++++++++++++++++++++++++++++++++++++ 2 files changed, 506 insertions(+) create mode 100644 comfyui_setup_guide.txt create mode 100644 workflows/anime_image.json diff --git a/comfyui_setup_guide.txt b/comfyui_setup_guide.txt new file mode 100644 index 000000000..e01338fb1 --- /dev/null +++ b/comfyui_setup_guide.txt @@ -0,0 +1,135 @@ +COMFYUI LOCAL VIDEO GENERATION SETUP GUIDE (NILAY) + +--------------------------------------- +1. SYSTEM REQUIREMENTS +--------------------------------------- +GPU: NVIDIA RTX 4070 (8GB VRAM) +Drivers: Installed (nvidia-smi working) +Python: 3.10+ +OS: Windows + +--------------------------------------- +2. INSTALLATION STEPS +--------------------------------------- + +Step 1: Clone ComfyUI +git clone https://github.com/comfyanonymous/ComfyUI.git +cd ComfyUI + +Step 2: Create Virtual Environment +python -m venv comfy-env +comfy-env\Scripts\activate + +Step 3: Install Dependencies +pip install -r requirements.txt + +Step 4: Install CUDA-enabled PyTorch +pip uninstall torch torchvision torchaudio -y +pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121 + +Step 5: Verify GPU +python +import torch +print(torch.cuda.is_available()) +print(torch.cuda.get_device_name(0)) + +--------------------------------------- +3. RUN COMFYUI +--------------------------------------- +python main.py + +Open in browser: +http://127.0.0.1:8188 + +--------------------------------------- +4. MODELS SETUP +--------------------------------------- + +IMPORTANT: +Do NOT use SVD model for text prompts. + +Use: + +A. Text-to-Image Model (REQUIRED) +Download: +v1-5-pruned-emaonly.safetensors +Place in: +ComfyUI/models/checkpoints/ + +B. Video Model (OPTIONAL) +svd_xt.safetensors +(Used ONLY for image-to-video) + +--------------------------------------- +5. BASIC WORKFLOW (TEXT → IMAGE) +--------------------------------------- + +Nodes: +- Load Checkpoint (SD model) +- CLIP Text Encode (positive) +- CLIP Text Encode (negative) +- Empty Latent Image +- KSampler +- VAE Decode +- Save Image + +Connections: + +Checkpoint.CLIP → CLIP Encode (both +ve & -ve) +Checkpoint.MODEL → KSampler.model +Checkpoint.VAE → VAE Decode.vae + +Positive → KSampler.positive +Negative → KSampler.negative +Latent → KSampler.latent_image + +KSampler → VAE Decode +VAE Decode → Save + +--------------------------------------- +6. IMPORTANT SETTINGS +--------------------------------------- + +Resolution: 512 x 512 +Batch size (frames): 16 +Steps: 20 +CFG: 7.5 +Sampler: euler +Scheduler: normal +Denoise: 1.0 + +--------------------------------------- +7. COMMON ERRORS & FIXES +--------------------------------------- + +Error: Torch not compiled with CUDA +→ Install CUDA version of PyTorch + +Error: steps = NaN +→ Delete and re-add KSampler + +Error: clip input is invalid +→ Wrong model (SVD used instead of SD) + +Error: CUDA out of memory +→ Reduce resolution or batch size + +--------------------------------------- +8. VIDEO GENERATION PIPELINE (CORRECT) +--------------------------------------- + +Text → Image (SD model) +Image → Video (SVD / AnimateDiff) + +NOT: +Text → Video directly (will fail) + +--------------------------------------- +9. OUTPUT LOCATION +--------------------------------------- + +Generated files: +ComfyUI/output/ + +--------------------------------------- +END diff --git a/workflows/anime_image.json b/workflows/anime_image.json new file mode 100644 index 000000000..85691c9d6 --- /dev/null +++ b/workflows/anime_image.json @@ -0,0 +1,371 @@ +{ + "id": "24ec025d-a335-41f7-b48f-9dfde13adc33", + "revision": 0, + "last_node_id": 8, + "last_link_id": 0, + "nodes": [ + { + "id": 6, + "type": "KSampler", + "pos": [ + 800, + 0 + ], + "size": [ + 270, + 262 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": null + }, + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "link": null + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "link": null + }, + { + "localized_name": "latent_image", + "name": "latent_image", + "type": "LATENT", + "link": null + }, + { + "localized_name": "seed", + "name": "seed", + "type": "INT", + "widget": { + "name": "seed" + }, + "link": null + }, + { + "localized_name": "steps", + "name": "steps", + "type": "INT", + "widget": { + "name": "steps" + }, + "link": null + }, + { + "localized_name": "cfg", + "name": "cfg", + "type": "FLOAT", + "widget": { + "name": "cfg" + }, + "link": null + }, + { + "localized_name": "sampler_name", + "name": "sampler_name", + "type": "COMBO", + "widget": { + "name": "sampler_name" + }, + "link": null + }, + { + "localized_name": "scheduler", + "name": "scheduler", + "type": "COMBO", + "widget": { + "name": "scheduler" + }, + "link": null + }, + { + "localized_name": "denoise", + "name": "denoise", + "type": "FLOAT", + "widget": { + "name": "denoise" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "LATENT", + "name": "LATENT", + "type": "LATENT", + "links": null + } + ], + "properties": { + "Node name for S&R": "KSampler" + }, + "widgets_values": [ + 20, + 7.5, + "euler", + 8, + "euler", + "simple", + 1 + ] + }, + { + "id": 4, + "type": "CLIPTextEncode", + "pos": [ + 687.8332901000977, + -303.9999945958456 + ], + "size": [ + 400, + 200 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [ + { + "localized_name": "clip", + "name": "clip", + "type": "CLIP", + "link": null + }, + { + "localized_name": "text", + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": null + } + ], + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "anime style, studio ghibli, highly detailed" + ] + }, + { + "id": 3, + "type": "CheckpointLoaderSimple", + "pos": [ + 15.611103905571852, + 242.7222294277615 + ], + "size": [ + 270, + 98 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [ + { + "localized_name": "ckpt_name", + "name": "ckpt_name", + "type": "COMBO", + "widget": { + "name": "ckpt_name" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "links": null + }, + { + "localized_name": "CLIP", + "name": "CLIP", + "type": "CLIP", + "links": null + }, + { + "localized_name": "VAE", + "name": "VAE", + "type": "VAE", + "links": null + } + ], + "properties": { + "Node name for S&R": "CheckpointLoaderSimple" + }, + "widgets_values": [ + "svd_xt.safetensors" + ] + }, + { + "id": 1, + "type": "LoadVideo", + "pos": [ + 105.77774895562072, + -162.44445164998368 + ], + "size": [ + 282.798828125, + 82 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [ + { + "localized_name": "file", + "name": "file", + "type": "COMBO", + "widget": { + "name": "file" + }, + "link": null + }, + { + "localized_name": "choose file to upload", + "name": "upload", + "type": "IMAGEUPLOAD", + "widget": { + "name": "upload" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "VIDEO", + "name": "VIDEO", + "type": "VIDEO", + "links": null + } + ], + "properties": { + "Node name for S&R": "LoadVideo" + }, + "widgets_values": [ + null, + "image" + ] + }, + { + "id": 5, + "type": "CLIPTextEncode", + "pos": [ + 359.1666666666667, + 108.49997838338217 + ], + "size": [ + 400, + 200 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [ + { + "localized_name": "clip", + "name": "clip", + "type": "CLIP", + "link": null + }, + { + "localized_name": "text", + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": null + } + ], + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "blurry, low quality, distorted" + ] + }, + { + "id": 7, + "type": "VAEDecode", + "pos": [ + 1295.507941635465, + -123.21179729824479 + ], + "size": [ + 140, + 46 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [ + { + "localized_name": "samples", + "name": "samples", + "type": "LATENT", + "link": null + }, + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": null + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "links": null + } + ], + "properties": { + "Node name for S&R": "VAEDecode" + }, + "widgets_values": [] + } + ], + "links": [], + "groups": [], + "config": {}, + "extra": { + "ds": { + "scale": 0.798624898856785, + "offset": [ + 156.14948406834526, + 418.7798890693015 + ] + } + }, + "version": 0.4 +} \ No newline at end of file