ComfyUI/blueprints/Image Captioning (gemini).json

{
  "revision": 0,
  "last_node_id": 231,
  "last_link_id": 0,
  "nodes": [
    {
      "id": 231,
      "type": "e3e78497-720e-45a2-b4fb-c7bfdb80dd11",
      "pos": [
        23.13283014087665,
        1034.468391137315
      ],
      "size": [
        280,
        260
      ],
      "flags": {},
      "order": 8,
      "mode": 0,
      "inputs": [
        {
          "localized_name": "images",
          "name": "images",
          "type": "IMAGE",
          "link": null
        },
        {
          "name": "prompt",
          "type": "STRING",
          "widget": {
            "name": "prompt"
          },
          "link": null
        },
        {
          "name": "model",
          "type": "COMBO",
          "widget": {
            "name": "model"
          },
          "link": null
        }
      ],
      "outputs": [
        {
          "localized_name": "STRING",
          "name": "STRING",
          "type": "STRING",
          "links": []
        }
      ],
      "properties": {
        "proxyWidgets": [
          [
            "-1",
            "prompt"
          ],
          [
            "-1",
            "model"
          ],
          [
            "1",
            "seed"
          ]
        ],
        "cnr_id": "comfy-core",
        "ver": "0.13.0"
      },
      "widgets_values": [
        "Describe this image",
        "gemini-2.5-pro"
      ],
      "title": "Image Captioning(Gemini)"
    }
  ],
  "links": [],
  "version": 0.4,
  "definitions": {
    "subgraphs": [
      {
        "id": "e3e78497-720e-45a2-b4fb-c7bfdb80dd11",
        "version": 1,
        "state": {
          "lastGroupId": 1,
          "lastNodeId": 16,
          "lastLinkId": 16,
          "lastRerouteId": 0
        },
        "revision": 0,
        "config": {},
        "name": "Image Captioning(Gemini)",
        "inputNode": {
          "id": -10,
          "bounding": [
            -6870,
            2530,
            120,
            100
          ]
        },
        "outputNode": {
          "id": -20,
          "bounding": [
            -6240,
            2530,
            120,
            60
          ]
        },
        "inputs": [
          {
            "id": "97cb8fa5-0514-4e05-b206-46fa6d7b5589",
            "name": "images",
            "type": "IMAGE",
            "linkIds": [
              1
            ],
            "localized_name": "images",
            "shape": 7,
            "pos": [
              -6770,
              2550
            ]
          },
          {
            "id": "d8cbd7eb-636a-4d7b-8ff6-b22f1755e26c",
            "name": "prompt",
            "type": "STRING",
            "linkIds": [
              15
            ],
            "pos": [
              -6770,
              2570
            ]
          },
          {
            "id": "b034e26a-d114-4604-aec2-32783e86aa6b",
            "name": "model",
            "type": "COMBO",
            "linkIds": [
              16
            ],
            "pos": [
              -6770,
              2590
            ]
          }
        ],
        "outputs": [
          {
            "id": "e12c6e80-5210-4328-a581-bc8924c53070",
            "name": "STRING",
            "type": "STRING",
            "linkIds": [
              6
            ],
            "localized_name": "STRING",
            "pos": [
              -6220,
              2550
            ]
          }
        ],
        "widgets": [],
        "nodes": [
          {
            "id": 1,
            "type": "GeminiNode",
            "pos": [
              -6690,
              2360
            ],
            "size": [
              390,
              430
            ],
            "flags": {},
            "order": 0,
            "mode": 0,
            "inputs": [
              {
                "localized_name": "images",
                "name": "images",
                "shape": 7,
                "type": "IMAGE",
                "link": 1
              },
              {
                "localized_name": "audio",
                "name": "audio",
                "shape": 7,
                "type": "AUDIO",
                "link": null
              },
              {
                "localized_name": "video",
                "name": "video",
                "shape": 7,
                "type": "VIDEO",
                "link": null
              },
              {
                "localized_name": "files",
                "name": "files",
                "shape": 7,
                "type": "GEMINI_INPUT_FILES",
                "link": null
              },
              {
                "localized_name": "prompt",
                "name": "prompt",
                "type": "STRING",
                "widget": {
                  "name": "prompt"
                },
                "link": 15
              },
              {
                "localized_name": "model",
                "name": "model",
                "type": "COMBO",
                "widget": {
                  "name": "model"
                },
                "link": 16
              },
              {
                "localized_name": "seed",
                "name": "seed",
                "type": "INT",
                "widget": {
                  "name": "seed"
                },
                "link": null
              },
              {
                "localized_name": "system_prompt",
                "name": "system_prompt",
                "shape": 7,
                "type": "STRING",
                "widget": {
                  "name": "system_prompt"
                },
                "link": null
              }
            ],
            "outputs": [
              {
                "localized_name": "STRING",
                "name": "STRING",
                "type": "STRING",
                "links": [
                  6
                ]
              }
            ],
            "properties": {
              "cnr_id": "comfy-core",
              "ver": "0.5.1",
              "Node name for S&R": "GeminiNode"
            },
            "widgets_values": [
              "Describe this image",
              "gemini-2.5-pro",
              511865409297955,
              "randomize",
              "- Role: AI Image Analysis and Description Specialist\n- Background: The user requires a prompt that enables AI to analyze images and generate detailed descriptions which can be used as drawing prompts to create similar images. This is essential for tasks like content creation, design inspiration, and artistic exploration.\n- Profile: As an AI Image Analysis and Description Specialist, you possess extensive knowledge in computer vision, image processing, and natural language generation. You are adept at interpreting visual data and translating it into descriptive text that can guide the creation of new images.\n- Skills: Proficiency in image recognition, feature extraction, descriptive language generation, and understanding of artistic elements such as composition, color, and texture.\n- Goals: To analyze the provided image, generate a comprehensive and detailed description that captures the key visual elements, and ensure this description can effectively serve as a drawing prompt for creating similar images.\n- Constrains: The description must be clear, concise, and specific enough to guide the creation of a similar image. It should avoid ambiguity and focus on the most salient features of the image. The output should only contain the drawing prompt.\n- OutputFormat: A detailed text description of the image, highlighting key visual elements such as objects, colors, composition, and any unique features.\n- Workflow:\n  1. Analyze the image to identify key visual elements including objects, colors, and composition.\n  2. Generate a detailed description that captures the essence of the image, ensuring it is specific and actionable.\n  3. Refine the description to ensure clarity and conciseness, making it suitable for use as a drawing prompt."
            ],
            "color": "#432",
            "bgcolor": "#653"
          }
        ],
        "groups": [],
        "links": [
          {
            "id": 1,
            "origin_id": -10,
            "origin_slot": 0,
            "target_id": 1,
            "target_slot": 0,
            "type": "IMAGE"
          },
          {
            "id": 6,
            "origin_id": 1,
            "origin_slot": 0,
            "target_id": -20,
            "target_slot": 0,
            "type": "*"
          },
          {
            "id": 15,
            "origin_id": -10,
            "origin_slot": 1,
            "target_id": 1,
            "target_slot": 4,
            "type": "STRING"
          },
          {
            "id": 16,
            "origin_id": -10,
            "origin_slot": 2,
            "target_id": 1,
            "target_slot": 5,
            "type": "COMBO"
          }
        ],
        "extra": {
          "workflowRendererVersion": "LG"
        },
        "category": "Text generation/Image Captioning",
        "description": "Generates descriptive captions for images using Google's Gemini multimodal LLM."
      }
    ]
  }
}