From 40f8974443974410088e0665702a4f4e9e45d4b3 Mon Sep 17 00:00:00 2001
From: nygupta <nygupta21@gmail.com>
Date: Sun, 5 Apr 2026 11:03:13 +0530
Subject: [PATCH] initial setup

---
 comfyui_setup_guide.txt    | 135 ++++++++++++++
 workflows/anime_image.json | 371 +++++++++++++++++++++++++++++++++++++
 2 files changed, 506 insertions(+)
 create mode 100644 comfyui_setup_guide.txt
 create mode 100644 workflows/anime_image.json

diff --git a/comfyui_setup_guide.txt b/comfyui_setup_guide.txt
new file mode 100644
index 000000000..e01338fb1
--- /dev/null
+++ b/comfyui_setup_guide.txt
@@ -0,0 +1,135 @@
+COMFYUI LOCAL VIDEO GENERATION SETUP GUIDE (NILAY)
+
+---------------------------------------
+1. SYSTEM REQUIREMENTS
+---------------------------------------
+GPU: NVIDIA RTX 4070 (8GB VRAM)
+Drivers: Installed (nvidia-smi working)
+Python: 3.10+
+OS: Windows
+
+---------------------------------------
+2. INSTALLATION STEPS
+---------------------------------------
+
+Step 1: Clone ComfyUI
+git clone https://github.com/comfyanonymous/ComfyUI.git
+cd ComfyUI
+
+Step 2: Create Virtual Environment
+python -m venv comfy-env
+comfy-env\Scripts\activate
+
+Step 3: Install Dependencies
+pip install -r requirements.txt
+
+Step 4: Install CUDA-enabled PyTorch
+pip uninstall torch torchvision torchaudio -y
+pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
+
+Step 5: Verify GPU
+python
+import torch
+print(torch.cuda.is_available())
+print(torch.cuda.get_device_name(0))
+
+---------------------------------------
+3. RUN COMFYUI
+---------------------------------------
+python main.py
+
+Open in browser:
+http://127.0.0.1:8188
+
+---------------------------------------
+4. MODELS SETUP
+---------------------------------------
+
+IMPORTANT:
+Do NOT use SVD model for text prompts.
+
+Use:
+
+A. Text-to-Image Model (REQUIRED)
+Download:
+v1-5-pruned-emaonly.safetensors
+Place in:
+ComfyUI/models/checkpoints/
+
+B. Video Model (OPTIONAL)
+svd_xt.safetensors
+(Used ONLY for image-to-video)
+
+---------------------------------------
+5. BASIC WORKFLOW (TEXT → IMAGE)
+---------------------------------------
+
+Nodes:
+- Load Checkpoint (SD model)
+- CLIP Text Encode (positive)
+- CLIP Text Encode (negative)
+- Empty Latent Image
+- KSampler
+- VAE Decode
+- Save Image
+
+Connections:
+
+Checkpoint.CLIP → CLIP Encode (both +ve & -ve)
+Checkpoint.MODEL → KSampler.model
+Checkpoint.VAE → VAE Decode.vae
+
+Positive → KSampler.positive
+Negative → KSampler.negative
+Latent → KSampler.latent_image
+
+KSampler → VAE Decode
+VAE Decode → Save
+
+---------------------------------------
+6. IMPORTANT SETTINGS
+---------------------------------------
+
+Resolution: 512 x 512
+Batch size (frames): 16
+Steps: 20
+CFG: 7.5
+Sampler: euler
+Scheduler: normal
+Denoise: 1.0
+
+---------------------------------------
+7. COMMON ERRORS & FIXES
+---------------------------------------
+
+Error: Torch not compiled with CUDA
+→ Install CUDA version of PyTorch
+
+Error: steps = NaN
+→ Delete and re-add KSampler
+
+Error: clip input is invalid
+→ Wrong model (SVD used instead of SD)
+
+Error: CUDA out of memory
+→ Reduce resolution or batch size
+
+---------------------------------------
+8. VIDEO GENERATION PIPELINE (CORRECT)
+---------------------------------------
+
+Text → Image (SD model)
+Image → Video (SVD / AnimateDiff)
+
+NOT:
+Text → Video directly (will fail)
+
+---------------------------------------
+9. OUTPUT LOCATION
+---------------------------------------
+
+Generated files:
+ComfyUI/output/
+
+---------------------------------------
+END
diff --git a/workflows/anime_image.json b/workflows/anime_image.json
new file mode 100644
index 000000000..85691c9d6
--- /dev/null
+++ b/workflows/anime_image.json
@@ -0,0 +1,371 @@
+{
+  "id": "24ec025d-a335-41f7-b48f-9dfde13adc33",
+  "revision": 0,
+  "last_node_id": 8,
+  "last_link_id": 0,
+  "nodes": [
+    {
+      "id": 6,
+      "type": "KSampler",
+      "pos": [
+        800,
+        0
+      ],
+      "size": [
+        270,
+        262
+      ],
+      "flags": {},
+      "order": 1,
+      "mode": 0,
+      "inputs": [
+        {
+          "localized_name": "model",
+          "name": "model",
+          "type": "MODEL",
+          "link": null
+        },
+        {
+          "localized_name": "positive",
+          "name": "positive",
+          "type": "CONDITIONING",
+          "link": null
+        },
+        {
+          "localized_name": "negative",
+          "name": "negative",
+          "type": "CONDITIONING",
+          "link": null
+        },
+        {
+          "localized_name": "latent_image",
+          "name": "latent_image",
+          "type": "LATENT",
+          "link": null
+        },
+        {
+          "localized_name": "seed",
+          "name": "seed",
+          "type": "INT",
+          "widget": {
+            "name": "seed"
+          },
+          "link": null
+        },
+        {
+          "localized_name": "steps",
+          "name": "steps",
+          "type": "INT",
+          "widget": {
+            "name": "steps"
+          },
+          "link": null
+        },
+        {
+          "localized_name": "cfg",
+          "name": "cfg",
+          "type": "FLOAT",
+          "widget": {
+            "name": "cfg"
+          },
+          "link": null
+        },
+        {
+          "localized_name": "sampler_name",
+          "name": "sampler_name",
+          "type": "COMBO",
+          "widget": {
+            "name": "sampler_name"
+          },
+          "link": null
+        },
+        {
+          "localized_name": "scheduler",
+          "name": "scheduler",
+          "type": "COMBO",
+          "widget": {
+            "name": "scheduler"
+          },
+          "link": null
+        },
+        {
+          "localized_name": "denoise",
+          "name": "denoise",
+          "type": "FLOAT",
+          "widget": {
+            "name": "denoise"
+          },
+          "link": null
+        }
+      ],
+      "outputs": [
+        {
+          "localized_name": "LATENT",
+          "name": "LATENT",
+          "type": "LATENT",
+          "links": null
+        }
+      ],
+      "properties": {
+        "Node name for S&R": "KSampler"
+      },
+      "widgets_values": [
+        20,
+        7.5,
+        "euler",
+        8,
+        "euler",
+        "simple",
+        1
+      ]
+    },
+    {
+      "id": 4,
+      "type": "CLIPTextEncode",
+      "pos": [
+        687.8332901000977,
+        -303.9999945958456
+      ],
+      "size": [
+        400,
+        200
+      ],
+      "flags": {},
+      "order": 3,
+      "mode": 0,
+      "inputs": [
+        {
+          "localized_name": "clip",
+          "name": "clip",
+          "type": "CLIP",
+          "link": null
+        },
+        {
+          "localized_name": "text",
+          "name": "text",
+          "type": "STRING",
+          "widget": {
+            "name": "text"
+          },
+          "link": null
+        }
+      ],
+      "outputs": [
+        {
+          "localized_name": "CONDITIONING",
+          "name": "CONDITIONING",
+          "type": "CONDITIONING",
+          "links": null
+        }
+      ],
+      "properties": {
+        "Node name for S&R": "CLIPTextEncode"
+      },
+      "widgets_values": [
+        "anime style, studio ghibli, highly detailed"
+      ]
+    },
+    {
+      "id": 3,
+      "type": "CheckpointLoaderSimple",
+      "pos": [
+        15.611103905571852,
+        242.7222294277615
+      ],
+      "size": [
+        270,
+        98
+      ],
+      "flags": {},
+      "order": 4,
+      "mode": 0,
+      "inputs": [
+        {
+          "localized_name": "ckpt_name",
+          "name": "ckpt_name",
+          "type": "COMBO",
+          "widget": {
+            "name": "ckpt_name"
+          },
+          "link": null
+        }
+      ],
+      "outputs": [
+        {
+          "localized_name": "MODEL",
+          "name": "MODEL",
+          "type": "MODEL",
+          "links": null
+        },
+        {
+          "localized_name": "CLIP",
+          "name": "CLIP",
+          "type": "CLIP",
+          "links": null
+        },
+        {
+          "localized_name": "VAE",
+          "name": "VAE",
+          "type": "VAE",
+          "links": null
+        }
+      ],
+      "properties": {
+        "Node name for S&R": "CheckpointLoaderSimple"
+      },
+      "widgets_values": [
+        "svd_xt.safetensors"
+      ]
+    },
+    {
+      "id": 1,
+      "type": "LoadVideo",
+      "pos": [
+        105.77774895562072,
+        -162.44445164998368
+      ],
+      "size": [
+        282.798828125,
+        82
+      ],
+      "flags": {},
+      "order": 5,
+      "mode": 0,
+      "inputs": [
+        {
+          "localized_name": "file",
+          "name": "file",
+          "type": "COMBO",
+          "widget": {
+            "name": "file"
+          },
+          "link": null
+        },
+        {
+          "localized_name": "choose file to upload",
+          "name": "upload",
+          "type": "IMAGEUPLOAD",
+          "widget": {
+            "name": "upload"
+          },
+          "link": null
+        }
+      ],
+      "outputs": [
+        {
+          "localized_name": "VIDEO",
+          "name": "VIDEO",
+          "type": "VIDEO",
+          "links": null
+        }
+      ],
+      "properties": {
+        "Node name for S&R": "LoadVideo"
+      },
+      "widgets_values": [
+        null,
+        "image"
+      ]
+    },
+    {
+      "id": 5,
+      "type": "CLIPTextEncode",
+      "pos": [
+        359.1666666666667,
+        108.49997838338217
+      ],
+      "size": [
+        400,
+        200
+      ],
+      "flags": {},
+      "order": 0,
+      "mode": 0,
+      "inputs": [
+        {
+          "localized_name": "clip",
+          "name": "clip",
+          "type": "CLIP",
+          "link": null
+        },
+        {
+          "localized_name": "text",
+          "name": "text",
+          "type": "STRING",
+          "widget": {
+            "name": "text"
+          },
+          "link": null
+        }
+      ],
+      "outputs": [
+        {
+          "localized_name": "CONDITIONING",
+          "name": "CONDITIONING",
+          "type": "CONDITIONING",
+          "links": null
+        }
+      ],
+      "properties": {
+        "Node name for S&R": "CLIPTextEncode"
+      },
+      "widgets_values": [
+        "blurry, low quality, distorted"
+      ]
+    },
+    {
+      "id": 7,
+      "type": "VAEDecode",
+      "pos": [
+        1295.507941635465,
+        -123.21179729824479
+      ],
+      "size": [
+        140,
+        46
+      ],
+      "flags": {},
+      "order": 2,
+      "mode": 0,
+      "inputs": [
+        {
+          "localized_name": "samples",
+          "name": "samples",
+          "type": "LATENT",
+          "link": null
+        },
+        {
+          "localized_name": "vae",
+          "name": "vae",
+          "type": "VAE",
+          "link": null
+        }
+      ],
+      "outputs": [
+        {
+          "localized_name": "IMAGE",
+          "name": "IMAGE",
+          "type": "IMAGE",
+          "links": null
+        }
+      ],
+      "properties": {
+        "Node name for S&R": "VAEDecode"
+      },
+      "widgets_values": []
+    }
+  ],
+  "links": [],
+  "groups": [],
+  "config": {},
+  "extra": {
+    "ds": {
+      "scale": 0.798624898856785,
+      "offset": [
+        156.14948406834526,
+        418.7798890693015
+      ]
+    }
+  },
+  "version": 0.4
+}
\ No newline at end of file