diff --git a/comfy/annotator.zip b/comfy/annotator.zip new file mode 100644 index 000000000..b2046b74d Binary files /dev/null and b/comfy/annotator.zip differ diff --git a/nodes.py b/nodes.py index ed30c0ca5..b4e8c6f9f 100644 --- a/nodes.py +++ b/nodes.py @@ -18,11 +18,11 @@ import comfy.samplers import comfy.sd import comfy.utils from comfy.annotator import canny, hed, midas, mlsd, openpose, uniformer +from comfy.annotator.util import HWC3 import model_management import importlib - supported_ckpt_extensions = ['.ckpt', '.pth'] supported_pt_extensions = ['.ckpt', '.pt', '.bin', '.pth'] try: @@ -43,6 +43,14 @@ def recursive_search(directory): def filter_files_extensions(files, extensions): return sorted(list(filter(lambda a: os.path.splitext(a)[-1].lower() in extensions, files))) +def img_np_to_tensor(img_np): + return torch.from_numpy(img_np.astype(np.float32) / 255.0)[None,] +def img_tensor_to_np(img_tensor): + img_tensor = img_tensor.clone() + img_tensor = img_tensor * 255.0 + return img_tensor.squeeze(0).numpy().astype(np.uint8) + #Thanks ChatGPT + class CLIPTextEncode: @classmethod def INPUT_TYPES(s): @@ -217,109 +225,6 @@ class VAELoader: vae = comfy.sd.VAE(ckpt_path=vae_path) return (vae,) -class CannyPreprocessor: - @classmethod - def INPUT_TYPES(s): - return {"required": { "image": ("IMAGE", ) , - "low_threshold": ("INT", {"default": 100, "min": 0, "max": 255, "step": 1}), - "high_threshold": ("INT", {"default": 100, "min": 0, "max": 255, "step": 1}), - "l2gradient": (["disable", "enable"], ) - }} - RETURN_TYPES = ("IMAGE",) - FUNCTION = "detect_edge" - - CATEGORY = "preprocessor" - - def detect_edge(self, image, low_threshold, high_threshold, l2gradient): - apply_canny = canny.CannyDetector() - image = apply_canny(image.numpy(), low_threshold, high_threshold, l2gradient == "enable") - image = torch.from_numpy(image) - return (image,) - -class HEDPreprocessor: - @classmethod - def INPUT_TYPES(s): - return {"required": { "image": ("IMAGE",) }} - RETURN_TYPES = ("IMAGE",) - FUNCTION = "detect_edge" - - CATEGORY = "preprocessor" - - def detect_edge(self, image): - apply_hed = hed.HEDdetector() - image = torch.from_numpy(apply_hed(image.numpy())) - return (image,) - -class MIDASPreprocessor: - @classmethod - def INPUT_TYPES(s): - return {"required": { "image": ("IMAGE", ) , - "a": ("FLOAT", {"default": np.pi * 2.0, "min": 0.0, "max": np.pi * 5.0, "step": 0.1}), - "bg_threshold": ("FLOAT", {"default": 0.1, "min": 0, "max": 1, "step": 0.1}) - }} - RETURN_TYPES = ("IMAGE",) - FUNCTION = "estimate_depth" - - CATEGORY = "preprocessor" - - def estimate_depth(self, image, a, bg_threshold): - model_midas = midas.MidasDetector() - image = model_midas(image.numpy(), a, bg_threshold) - image = torch.from_numpy(image) - return (image,) - -class MLSDPreprocessor: - @classmethod - def INPUT_TYPES(s): - return {"required": { "image": ("IMAGE",) , - #Idk what should be the max value here since idk much about ML - "score_threshold": ("FLOAT", {"default": np.pi * 2.0, "min": 0.0, "max": np.pi * 2.0, "step": 0.1}), - "dist_threshold": ("FLOAT", {"default": 0.1, "min": 0, "max": 1, "step": 0.1}) - }} - RETURN_TYPES = ("IMAGE",) - FUNCTION = "detect_edge" - - CATEGORY = "preprocessor" - - def detect_edge(self, image, score_threshold, dist_threshold): - model_mlsd = mlsd.MLSDdetector() - image = model_mlsd(image.numpy(), score_threshold, dist_threshold) - image = torch.from_numpy(image) - return (image,) - -class OpenPosePreprocessor: - @classmethod - def INPUT_TYPES(s): - return {"required": { "image": ("IMAGE", ), - "detect_hand": (["disable", "enable"],) - }} - RETURN_TYPES = ("IMAGE",) - FUNCTION = "estimate_pose" - - CATEGORY = "preprocessor" - - def estimate_pose(self, image, detect_hand): - model_openpose = openpose.OpenposeDetector() - image = model_openpose(image.numpy(), detect_hand == "enable") - image = torch.from_numpy(image) - return (image,) - -class UniformerPreprocessor: - @classmethod - def INPUT_TYPES(s): - return {"required": { "image": ("IMAGE", ) - }} - RETURN_TYPES = ("IMAGE",) - FUNCTION = "semantic_segmentate" - - CATEGORY = "preprocessor" - - def semantic_segmentate(self, image): - model_uniformer = uniformer.UniformerDetector() - image = torch.from_numpy(model_uniformer(image.numpy())) - return (image,) - - class ControlNetLoader: models_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "models") controlnet_dir = os.path.join(models_dir, "controlnet") @@ -337,6 +242,109 @@ class ControlNetLoader: controlnet = comfy.sd.load_controlnet(controlnet_path) return (controlnet,) +class CannyPreprocessor: + @classmethod + def INPUT_TYPES(s): + return {"required": { "image": ("IMAGE", ) , + "low_threshold": ("INT", {"default": 100, "min": 0, "max": 255, "step": 1}), + "high_threshold": ("INT", {"default": 100, "min": 0, "max": 255, "step": 1}), + "l2gradient": (["disable", "enable"], ) + }} + RETURN_TYPES = ("IMAGE",) + FUNCTION = "detect_edge" + + CATEGORY = "preprocessor" + + def detect_edge(self, image, low_threshold, high_threshold, l2gradient): + apply_canny = canny.CannyDetector() + image = apply_canny(img_tensor_to_np(image), low_threshold, high_threshold, l2gradient == "enable") + image = img_np_to_tensor(HWC3(image)) + return (image,) + +class HEDPreprocessor: + @classmethod + def INPUT_TYPES(s): + return {"required": { "image": ("IMAGE",) }} + RETURN_TYPES = ("IMAGE",) + FUNCTION = "detect_edge" + + CATEGORY = "preprocessor" + + def detect_edge(self, image): + apply_hed = hed.HEDdetector() + image = apply_hed(img_tensor_to_np(image)) + image = img_np_to_tensor(HWC3(image)) + return (image,) + +class MIDASPreprocessor: + @classmethod + def INPUT_TYPES(s): + return {"required": { "image": ("IMAGE", ) , + "a": ("FLOAT", {"default": np.pi * 2.0, "min": 0.0, "max": np.pi * 5.0, "step": 0.1}), + "bg_threshold": ("FLOAT", {"default": 0.1, "min": 0, "max": 1, "step": 0.1}) + }} + RETURN_TYPES = ("IMAGE",) + FUNCTION = "estimate_depth" + + CATEGORY = "preprocessor" + + def estimate_depth(self, image, a, bg_threshold): + model_midas = midas.MidasDetector() + image, _ = model_midas(img_tensor_to_np(image), a, bg_threshold) + image = img_np_to_tensor(HWC3(image)) + return (image,) + +class MLSDPreprocessor: + @classmethod + def INPUT_TYPES(s): + return {"required": { "image": ("IMAGE",) , + #Idk what should be the max value here since idk much about ML + "score_threshold": ("FLOAT", {"default": np.pi * 2.0, "min": 0.0, "max": np.pi * 2.0, "step": 0.1}), + "dist_threshold": ("FLOAT", {"default": 0.1, "min": 0, "max": 1, "step": 0.1}) + }} + RETURN_TYPES = ("IMAGE",) + FUNCTION = "detect_edge" + + CATEGORY = "preprocessor" + + def detect_edge(self, image, score_threshold, dist_threshold): + model_mlsd = mlsd.MLSDdetector() + image = model_mlsd(img_tensor_to_np(image), score_threshold, dist_threshold) + image = img_np_to_tensor(HWC3(image)) + return (image,) + +class OpenPosePreprocessor: + @classmethod + def INPUT_TYPES(s): + return {"required": { "image": ("IMAGE", ), + "detect_hand": (["disable", "enable"],) + }} + RETURN_TYPES = ("IMAGE",) + FUNCTION = "estimate_pose" + + CATEGORY = "preprocessor" + + def estimate_pose(self, image, detect_hand): + model_openpose = openpose.OpenposeDetector() + image, _ = model_openpose(img_tensor_to_np(image), detect_hand == "enable") + image = img_np_to_tensor(HWC3(image)) + return (image,) + +class UniformerPreprocessor: + @classmethod + def INPUT_TYPES(s): + return {"required": { "image": ("IMAGE", ) + }} + RETURN_TYPES = ("IMAGE",) + FUNCTION = "semantic_segmentate" + + CATEGORY = "preprocessor" + + def semantic_segmentate(self, image): + model_uniformer = uniformer.UniformerDetector() + image = model_uniformer(img_np_to_tensor(image)) + image = img_np_to_tensor(HWC3(image)) + return (image,) class ControlNetApply: @classmethod