mirror of
https://github.com/lllyasviel/stable-diffusion-webui-forge.git
synced 2026-02-08 08:59:58 +00:00
1246 lines
32 KiB
Python
1246 lines
32 KiB
Python
import os
|
|
import cv2
|
|
import numpy as np
|
|
import torch
|
|
|
|
from annotator.util import HWC3
|
|
from typing import Callable, Tuple, Union, List
|
|
|
|
from modules.safe import Extra
|
|
from modules import devices
|
|
from scripts.logging import logger
|
|
|
|
|
|
def torch_handler(module: str, name: str):
|
|
""" Allow all torch access. Bypass A1111 safety whitelist. """
|
|
if module == 'torch':
|
|
return getattr(torch, name)
|
|
if module == 'torch._tensor':
|
|
# depth_anything dep.
|
|
return getattr(torch._tensor, name)
|
|
|
|
|
|
def pad64(x):
|
|
return int(np.ceil(float(x) / 64.0) * 64 - x)
|
|
|
|
|
|
def safer_memory(x):
|
|
# Fix many MAC/AMD problems
|
|
return np.ascontiguousarray(x.copy()).copy()
|
|
|
|
|
|
def resize_image_with_pad(input_image, resolution, skip_hwc3=False):
|
|
if skip_hwc3:
|
|
img = input_image
|
|
else:
|
|
img = HWC3(input_image)
|
|
H_raw, W_raw, _ = img.shape
|
|
k = float(resolution) / float(min(H_raw, W_raw))
|
|
interpolation = cv2.INTER_CUBIC if k > 1 else cv2.INTER_AREA
|
|
H_target = int(np.round(float(H_raw) * k))
|
|
W_target = int(np.round(float(W_raw) * k))
|
|
img = cv2.resize(img, (W_target, H_target), interpolation=interpolation)
|
|
H_pad, W_pad = pad64(H_target), pad64(W_target)
|
|
img_padded = np.pad(img, [[0, H_pad], [0, W_pad], [0, 0]], mode='edge')
|
|
|
|
def remove_pad(x):
|
|
return safer_memory(x[:H_target, :W_target])
|
|
|
|
return safer_memory(img_padded), remove_pad
|
|
|
|
|
|
model_canny = None
|
|
|
|
|
|
def canny(img, res=512, thr_a=100, thr_b=200, **kwargs):
|
|
l, h = thr_a, thr_b
|
|
img, remove_pad = resize_image_with_pad(img, res)
|
|
global model_canny
|
|
if model_canny is None:
|
|
from annotator.canny import apply_canny
|
|
model_canny = apply_canny
|
|
result = model_canny(img, l, h)
|
|
return remove_pad(result), True
|
|
|
|
|
|
def scribble_thr(img, res=512, **kwargs):
|
|
img, remove_pad = resize_image_with_pad(img, res)
|
|
result = np.zeros_like(img, dtype=np.uint8)
|
|
result[np.min(img, axis=2) < 127] = 255
|
|
return remove_pad(result), True
|
|
|
|
|
|
def scribble_xdog(img, res=512, thr_a=32, **kwargs):
|
|
img, remove_pad = resize_image_with_pad(img, res)
|
|
g1 = cv2.GaussianBlur(img.astype(np.float32), (0, 0), 0.5)
|
|
g2 = cv2.GaussianBlur(img.astype(np.float32), (0, 0), 5.0)
|
|
dog = (255 - np.min(g2 - g1, axis=2)).clip(0, 255).astype(np.uint8)
|
|
result = np.zeros_like(img, dtype=np.uint8)
|
|
result[2 * (255 - dog) > thr_a] = 255
|
|
return remove_pad(result), True
|
|
|
|
|
|
def tile_resample(img, res=512, thr_a=1.0, **kwargs):
|
|
img = HWC3(img)
|
|
if thr_a < 1.1:
|
|
return img, True
|
|
H, W, C = img.shape
|
|
H = int(float(H) / float(thr_a))
|
|
W = int(float(W) / float(thr_a))
|
|
img = cv2.resize(img, (W, H), interpolation=cv2.INTER_AREA)
|
|
return img, True
|
|
|
|
|
|
def threshold(img, res=512, thr_a=127, **kwargs):
|
|
img, remove_pad = resize_image_with_pad(img, res)
|
|
result = np.zeros_like(img, dtype=np.uint8)
|
|
result[np.min(img, axis=2) > thr_a] = 255
|
|
return remove_pad(result), True
|
|
|
|
|
|
def identity(img, **kwargs):
|
|
return img, True
|
|
|
|
|
|
def invert(img, res=512, **kwargs):
|
|
return 255 - HWC3(img), True
|
|
|
|
|
|
model_hed = None
|
|
|
|
|
|
def hed(img, res=512, **kwargs):
|
|
img, remove_pad = resize_image_with_pad(img, res)
|
|
global model_hed
|
|
if model_hed is None:
|
|
from annotator.hed import apply_hed
|
|
model_hed = apply_hed
|
|
result = model_hed(img)
|
|
return remove_pad(result), True
|
|
|
|
|
|
def hed_safe(img, res=512, **kwargs):
|
|
img, remove_pad = resize_image_with_pad(img, res)
|
|
global model_hed
|
|
if model_hed is None:
|
|
from annotator.hed import apply_hed
|
|
model_hed = apply_hed
|
|
result = model_hed(img, is_safe=True)
|
|
return remove_pad(result), True
|
|
|
|
|
|
def unload_hed():
|
|
global model_hed
|
|
if model_hed is not None:
|
|
from annotator.hed import unload_hed_model
|
|
unload_hed_model()
|
|
|
|
|
|
def scribble_hed(img, res=512, **kwargs):
|
|
result, _ = hed(img, res)
|
|
import cv2
|
|
from annotator.util import nms
|
|
result = nms(result, 127, 3.0)
|
|
result = cv2.GaussianBlur(result, (0, 0), 3.0)
|
|
result[result > 4] = 255
|
|
result[result < 255] = 0
|
|
return result, True
|
|
|
|
|
|
model_mediapipe_face = None
|
|
|
|
|
|
def mediapipe_face(img, res=512, thr_a: int = 10, thr_b: float = 0.5, **kwargs):
|
|
max_faces = int(thr_a)
|
|
min_confidence = thr_b
|
|
img, remove_pad = resize_image_with_pad(img, res)
|
|
global model_mediapipe_face
|
|
if model_mediapipe_face is None:
|
|
from annotator.mediapipe_face import apply_mediapipe_face
|
|
model_mediapipe_face = apply_mediapipe_face
|
|
result = model_mediapipe_face(img, max_faces=max_faces, min_confidence=min_confidence)
|
|
return remove_pad(result), True
|
|
|
|
|
|
model_mlsd = None
|
|
|
|
|
|
def mlsd(img, res=512, thr_a=0.1, thr_b=0.1, **kwargs):
|
|
thr_v, thr_d = thr_a, thr_b
|
|
img, remove_pad = resize_image_with_pad(img, res)
|
|
global model_mlsd
|
|
if model_mlsd is None:
|
|
from annotator.mlsd import apply_mlsd
|
|
model_mlsd = apply_mlsd
|
|
result = model_mlsd(img, thr_v, thr_d)
|
|
return remove_pad(result), True
|
|
|
|
|
|
def unload_mlsd():
|
|
global model_mlsd
|
|
if model_mlsd is not None:
|
|
from annotator.mlsd import unload_mlsd_model
|
|
unload_mlsd_model()
|
|
|
|
|
|
model_depth_anything = None
|
|
|
|
|
|
def depth_anything(img, res:int = 512, colored:bool = True, **kwargs):
|
|
img, remove_pad = resize_image_with_pad(img, res)
|
|
global model_depth_anything
|
|
if model_depth_anything is None:
|
|
with Extra(torch_handler):
|
|
from annotator.depth_anything import DepthAnythingDetector
|
|
device = devices.get_device_for("controlnet")
|
|
model_depth_anything = DepthAnythingDetector(device)
|
|
return remove_pad(model_depth_anything(img, colored=colored)), True
|
|
|
|
|
|
def unload_depth_anything():
|
|
if model_depth_anything is not None:
|
|
model_depth_anything.unload_model()
|
|
|
|
|
|
model_midas = None
|
|
|
|
|
|
def midas(img, res=512, a=np.pi * 2.0, **kwargs):
|
|
img, remove_pad = resize_image_with_pad(img, res)
|
|
global model_midas
|
|
if model_midas is None:
|
|
from annotator.midas import apply_midas
|
|
model_midas = apply_midas
|
|
result, _ = model_midas(img, a)
|
|
return remove_pad(result), True
|
|
|
|
|
|
def midas_normal(img, res=512, a=np.pi * 2.0, thr_a=0.4, **kwargs): # bg_th -> thr_a
|
|
bg_th = thr_a
|
|
img, remove_pad = resize_image_with_pad(img, res)
|
|
global model_midas
|
|
if model_midas is None:
|
|
from annotator.midas import apply_midas
|
|
model_midas = apply_midas
|
|
_, result = model_midas(img, a, bg_th)
|
|
return remove_pad(result), True
|
|
|
|
|
|
def unload_midas():
|
|
global model_midas
|
|
if model_midas is not None:
|
|
from annotator.midas import unload_midas_model
|
|
unload_midas_model()
|
|
|
|
|
|
model_leres = None
|
|
|
|
|
|
def leres(img, res=512, a=np.pi * 2.0, thr_a=0, thr_b=0, boost=False, **kwargs):
|
|
img, remove_pad = resize_image_with_pad(img, res)
|
|
global model_leres
|
|
if model_leres is None:
|
|
from annotator.leres import apply_leres
|
|
model_leres = apply_leres
|
|
result = model_leres(img, thr_a, thr_b, boost=boost)
|
|
return remove_pad(result), True
|
|
|
|
|
|
def unload_leres():
|
|
global model_leres
|
|
if model_leres is not None:
|
|
from annotator.leres import unload_leres_model
|
|
unload_leres_model()
|
|
|
|
|
|
class OpenposeModel(object):
|
|
def __init__(self) -> None:
|
|
self.model_openpose = None
|
|
|
|
def run_model(
|
|
self,
|
|
img: np.ndarray,
|
|
include_body: bool,
|
|
include_hand: bool,
|
|
include_face: bool,
|
|
use_dw_pose: bool = False,
|
|
use_animal_pose: bool = False,
|
|
json_pose_callback: Callable[[str], None] = None,
|
|
res: int = 512,
|
|
**kwargs # Ignore rest of kwargs
|
|
) -> Tuple[np.ndarray, bool]:
|
|
"""Run the openpose model. Returns a tuple of
|
|
- result image
|
|
- is_image flag
|
|
|
|
The JSON format pose string is passed to `json_pose_callback`.
|
|
"""
|
|
if json_pose_callback is None:
|
|
json_pose_callback = lambda x: None
|
|
|
|
img, remove_pad = resize_image_with_pad(img, res)
|
|
|
|
if self.model_openpose is None:
|
|
from annotator.openpose import OpenposeDetector
|
|
self.model_openpose = OpenposeDetector()
|
|
|
|
return remove_pad(self.model_openpose(
|
|
img,
|
|
include_body=include_body,
|
|
include_hand=include_hand,
|
|
include_face=include_face,
|
|
use_dw_pose=use_dw_pose,
|
|
use_animal_pose=use_animal_pose,
|
|
json_pose_callback=json_pose_callback
|
|
)), True
|
|
|
|
def unload(self):
|
|
if self.model_openpose is not None:
|
|
self.model_openpose.unload_model()
|
|
|
|
|
|
g_openpose_model = OpenposeModel()
|
|
|
|
model_uniformer = None
|
|
|
|
|
|
def uniformer(img, res=512, **kwargs):
|
|
img, remove_pad = resize_image_with_pad(img, res)
|
|
global model_uniformer
|
|
if model_uniformer is None:
|
|
from annotator.uniformer import apply_uniformer
|
|
model_uniformer = apply_uniformer
|
|
result = model_uniformer(img)
|
|
return remove_pad(result), True
|
|
|
|
|
|
def unload_uniformer():
|
|
global model_uniformer
|
|
if model_uniformer is not None:
|
|
from annotator.uniformer import unload_uniformer_model
|
|
unload_uniformer_model()
|
|
|
|
|
|
model_pidinet = None
|
|
|
|
|
|
def pidinet(img, res=512, **kwargs):
|
|
img, remove_pad = resize_image_with_pad(img, res)
|
|
global model_pidinet
|
|
if model_pidinet is None:
|
|
from annotator.pidinet import apply_pidinet
|
|
model_pidinet = apply_pidinet
|
|
result = model_pidinet(img)
|
|
return remove_pad(result), True
|
|
|
|
|
|
def pidinet_ts(img, res=512, **kwargs):
|
|
img, remove_pad = resize_image_with_pad(img, res)
|
|
global model_pidinet
|
|
if model_pidinet is None:
|
|
from annotator.pidinet import apply_pidinet
|
|
model_pidinet = apply_pidinet
|
|
result = model_pidinet(img, apply_fliter=True)
|
|
return remove_pad(result), True
|
|
|
|
|
|
def pidinet_safe(img, res=512, **kwargs):
|
|
img, remove_pad = resize_image_with_pad(img, res)
|
|
global model_pidinet
|
|
if model_pidinet is None:
|
|
from annotator.pidinet import apply_pidinet
|
|
model_pidinet = apply_pidinet
|
|
result = model_pidinet(img, is_safe=True)
|
|
return remove_pad(result), True
|
|
|
|
|
|
def scribble_pidinet(img, res=512, **kwargs):
|
|
result, _ = pidinet(img, res)
|
|
import cv2
|
|
from annotator.util import nms
|
|
result = nms(result, 127, 3.0)
|
|
result = cv2.GaussianBlur(result, (0, 0), 3.0)
|
|
result[result > 4] = 255
|
|
result[result < 255] = 0
|
|
return result, True
|
|
|
|
|
|
def unload_pidinet():
|
|
global model_pidinet
|
|
if model_pidinet is not None:
|
|
from annotator.pidinet import unload_pid_model
|
|
unload_pid_model()
|
|
|
|
|
|
clip_encoder = {
|
|
'clip_g': None,
|
|
'clip_h': None,
|
|
'clip_vitl': None,
|
|
}
|
|
|
|
|
|
def clip(img, res=512, config='clip_vitl', low_vram=False, **kwargs):
|
|
img = HWC3(img)
|
|
global clip_encoder
|
|
if clip_encoder[config] is None:
|
|
from annotator.clipvision import ClipVisionDetector
|
|
if low_vram:
|
|
logger.info("Loading CLIP model on CPU.")
|
|
clip_encoder[config] = ClipVisionDetector(config, low_vram)
|
|
result = clip_encoder[config](img)
|
|
return result, False
|
|
|
|
|
|
def unload_clip(config='clip_vitl'):
|
|
global clip_encoder
|
|
if clip_encoder[config] is not None:
|
|
clip_encoder[config].unload_model()
|
|
clip_encoder[config] = None
|
|
|
|
|
|
model_color = None
|
|
|
|
|
|
def color(img, res=512, **kwargs):
|
|
img = HWC3(img)
|
|
global model_color
|
|
if model_color is None:
|
|
from annotator.color import apply_color
|
|
model_color = apply_color
|
|
result = model_color(img, res=res)
|
|
return result, True
|
|
|
|
|
|
def lineart_standard(img, res=512, **kwargs):
|
|
img, remove_pad = resize_image_with_pad(img, res)
|
|
x = img.astype(np.float32)
|
|
g = cv2.GaussianBlur(x, (0, 0), 6.0)
|
|
intensity = np.min(g - x, axis=2).clip(0, 255)
|
|
intensity /= max(16, np.median(intensity[intensity > 8]))
|
|
intensity *= 127
|
|
result = intensity.clip(0, 255).astype(np.uint8)
|
|
return remove_pad(result), True
|
|
|
|
|
|
model_lineart = None
|
|
|
|
|
|
def lineart(img, res=512, **kwargs):
|
|
img, remove_pad = resize_image_with_pad(img, res)
|
|
global model_lineart
|
|
if model_lineart is None:
|
|
from annotator.lineart import LineartDetector
|
|
model_lineart = LineartDetector(LineartDetector.model_default)
|
|
|
|
# applied auto inversion
|
|
result = 255 - model_lineart(img)
|
|
return remove_pad(result), True
|
|
|
|
|
|
def unload_lineart():
|
|
global model_lineart
|
|
if model_lineart is not None:
|
|
model_lineart.unload_model()
|
|
|
|
|
|
model_lineart_coarse = None
|
|
|
|
|
|
def lineart_coarse(img, res=512, **kwargs):
|
|
img, remove_pad = resize_image_with_pad(img, res)
|
|
global model_lineart_coarse
|
|
if model_lineart_coarse is None:
|
|
from annotator.lineart import LineartDetector
|
|
model_lineart_coarse = LineartDetector(LineartDetector.model_coarse)
|
|
|
|
# applied auto inversion
|
|
result = 255 - model_lineart_coarse(img)
|
|
return remove_pad(result), True
|
|
|
|
|
|
def unload_lineart_coarse():
|
|
global model_lineart_coarse
|
|
if model_lineart_coarse is not None:
|
|
model_lineart_coarse.unload_model()
|
|
|
|
|
|
model_lineart_anime = None
|
|
|
|
|
|
def lineart_anime(img, res=512, **kwargs):
|
|
img, remove_pad = resize_image_with_pad(img, res)
|
|
global model_lineart_anime
|
|
if model_lineart_anime is None:
|
|
from annotator.lineart_anime import LineartAnimeDetector
|
|
model_lineart_anime = LineartAnimeDetector()
|
|
|
|
# applied auto inversion
|
|
result = 255 - model_lineart_anime(img)
|
|
return remove_pad(result), True
|
|
|
|
|
|
def unload_lineart_anime():
|
|
global model_lineart_anime
|
|
if model_lineart_anime is not None:
|
|
model_lineart_anime.unload_model()
|
|
|
|
|
|
model_manga_line = None
|
|
|
|
|
|
def lineart_anime_denoise(img, res=512, **kwargs):
|
|
img, remove_pad = resize_image_with_pad(img, res)
|
|
global model_manga_line
|
|
if model_manga_line is None:
|
|
from annotator.manga_line import MangaLineExtration
|
|
model_manga_line = MangaLineExtration()
|
|
|
|
# applied auto inversion
|
|
result = model_manga_line(img)
|
|
return remove_pad(result), True
|
|
|
|
|
|
def unload_lineart_anime_denoise():
|
|
global model_manga_line
|
|
if model_manga_line is not None:
|
|
model_manga_line.unload_model()
|
|
|
|
|
|
model_lama = None
|
|
|
|
|
|
def lama_inpaint(img, res=512, **kwargs):
|
|
H, W, C = img.shape
|
|
raw_color = img[:, :, 0:3].copy()
|
|
raw_mask = img[:, :, 3:4].copy()
|
|
|
|
res = 256 # Always use 256 since lama is trained on 256
|
|
|
|
img_res, remove_pad = resize_image_with_pad(img, res, skip_hwc3=True)
|
|
|
|
global model_lama
|
|
if model_lama is None:
|
|
from annotator.lama import LamaInpainting
|
|
model_lama = LamaInpainting()
|
|
|
|
# applied auto inversion
|
|
prd_color = model_lama(img_res)
|
|
prd_color = remove_pad(prd_color)
|
|
prd_color = cv2.resize(prd_color, (W, H))
|
|
|
|
alpha = raw_mask.astype(np.float32) / 255.0
|
|
fin_color = prd_color.astype(np.float32) * alpha + raw_color.astype(np.float32) * (1 - alpha)
|
|
fin_color = fin_color.clip(0, 255).astype(np.uint8)
|
|
|
|
result = np.concatenate([fin_color, raw_mask], axis=2)
|
|
|
|
return result, True
|
|
|
|
|
|
def unload_lama_inpaint():
|
|
global model_lama
|
|
if model_lama is not None:
|
|
model_lama.unload_model()
|
|
|
|
|
|
model_zoe_depth = None
|
|
|
|
|
|
def zoe_depth(img, res=512, **kwargs):
|
|
img, remove_pad = resize_image_with_pad(img, res)
|
|
global model_zoe_depth
|
|
if model_zoe_depth is None:
|
|
from annotator.zoe import ZoeDetector
|
|
model_zoe_depth = ZoeDetector()
|
|
result = model_zoe_depth(img)
|
|
return remove_pad(result), True
|
|
|
|
|
|
def unload_zoe_depth():
|
|
global model_zoe_depth
|
|
if model_zoe_depth is not None:
|
|
model_zoe_depth.unload_model()
|
|
|
|
|
|
model_normal_bae = None
|
|
|
|
|
|
def normal_bae(img, res=512, **kwargs):
|
|
img, remove_pad = resize_image_with_pad(img, res)
|
|
global model_normal_bae
|
|
if model_normal_bae is None:
|
|
from annotator.normalbae import NormalBaeDetector
|
|
model_normal_bae = NormalBaeDetector()
|
|
result = model_normal_bae(img)
|
|
return remove_pad(result), True
|
|
|
|
|
|
def unload_normal_bae():
|
|
global model_normal_bae
|
|
if model_normal_bae is not None:
|
|
model_normal_bae.unload_model()
|
|
|
|
|
|
model_oneformer_coco = None
|
|
|
|
|
|
def oneformer_coco(img, res=512, **kwargs):
|
|
img, remove_pad = resize_image_with_pad(img, res)
|
|
global model_oneformer_coco
|
|
if model_oneformer_coco is None:
|
|
from annotator.oneformer import OneformerDetector
|
|
model_oneformer_coco = OneformerDetector(OneformerDetector.configs["coco"])
|
|
result = model_oneformer_coco(img)
|
|
return remove_pad(result), True
|
|
|
|
|
|
def unload_oneformer_coco():
|
|
global model_oneformer_coco
|
|
if model_oneformer_coco is not None:
|
|
model_oneformer_coco.unload_model()
|
|
|
|
|
|
model_oneformer_ade20k = None
|
|
|
|
|
|
def oneformer_ade20k(img, res=512, **kwargs):
|
|
img, remove_pad = resize_image_with_pad(img, res)
|
|
global model_oneformer_ade20k
|
|
if model_oneformer_ade20k is None:
|
|
from annotator.oneformer import OneformerDetector
|
|
model_oneformer_ade20k = OneformerDetector(OneformerDetector.configs["ade20k"])
|
|
result = model_oneformer_ade20k(img)
|
|
return remove_pad(result), True
|
|
|
|
|
|
def unload_oneformer_ade20k():
|
|
global model_oneformer_ade20k
|
|
if model_oneformer_ade20k is not None:
|
|
model_oneformer_ade20k.unload_model()
|
|
|
|
|
|
model_shuffle = None
|
|
|
|
|
|
def shuffle(img, res=512, **kwargs):
|
|
img, remove_pad = resize_image_with_pad(img, res)
|
|
img = remove_pad(img)
|
|
global model_shuffle
|
|
if model_shuffle is None:
|
|
from annotator.shuffle import ContentShuffleDetector
|
|
model_shuffle = ContentShuffleDetector()
|
|
result = model_shuffle(img)
|
|
return result, True
|
|
|
|
|
|
def recolor_luminance(img, res=512, thr_a=1.0, **kwargs):
|
|
result = cv2.cvtColor(HWC3(img), cv2.COLOR_BGR2LAB)
|
|
result = result[:, :, 0].astype(np.float32) / 255.0
|
|
result = result ** thr_a
|
|
result = (result * 255.0).clip(0, 255).astype(np.uint8)
|
|
result = cv2.cvtColor(result, cv2.COLOR_GRAY2RGB)
|
|
return result, True
|
|
|
|
|
|
def recolor_intensity(img, res=512, thr_a=1.0, **kwargs):
|
|
result = cv2.cvtColor(HWC3(img), cv2.COLOR_BGR2HSV)
|
|
result = result[:, :, 2].astype(np.float32) / 255.0
|
|
result = result ** thr_a
|
|
result = (result * 255.0).clip(0, 255).astype(np.uint8)
|
|
result = cv2.cvtColor(result, cv2.COLOR_GRAY2RGB)
|
|
return result, True
|
|
|
|
|
|
def blur_gaussian(img, res=512, thr_a=1.0, **kwargs):
|
|
img, remove_pad = resize_image_with_pad(img, res)
|
|
img = remove_pad(img)
|
|
result = cv2.GaussianBlur(img, (0, 0), float(thr_a))
|
|
return result, True
|
|
|
|
|
|
model_anime_face_segment = None
|
|
|
|
|
|
def anime_face_segment(img, res=512, **kwargs):
|
|
img, remove_pad = resize_image_with_pad(img, res)
|
|
global model_anime_face_segment
|
|
if model_anime_face_segment is None:
|
|
from annotator.anime_face_segment import AnimeFaceSegment
|
|
model_anime_face_segment = AnimeFaceSegment()
|
|
|
|
result = model_anime_face_segment(img)
|
|
return remove_pad(result), True
|
|
|
|
|
|
def unload_anime_face_segment():
|
|
global model_anime_face_segment
|
|
if model_anime_face_segment is not None:
|
|
model_anime_face_segment.unload_model()
|
|
|
|
|
|
|
|
def densepose(img, res=512, cmap="viridis", **kwargs):
|
|
img, remove_pad = resize_image_with_pad(img, res)
|
|
from annotator.densepose import apply_densepose
|
|
result = apply_densepose(img, cmap=cmap)
|
|
return remove_pad(result), True
|
|
|
|
|
|
def unload_densepose():
|
|
from annotator.densepose import unload_model
|
|
unload_model()
|
|
|
|
model_te_hed = None
|
|
|
|
def te_hed(img, res=512, thr_a=2, **kwargs):
|
|
img, remove_pad = resize_image_with_pad(img, res)
|
|
global model_te_hed
|
|
if model_te_hed is None:
|
|
from annotator.teed import TEEDDector
|
|
model_te_hed = TEEDDector()
|
|
result = model_te_hed(img, safe_steps=int(thr_a))
|
|
return remove_pad(result), True
|
|
|
|
def unload_te_hed():
|
|
if model_te_hed is not None:
|
|
model_te_hed.unload_model()
|
|
|
|
class InsightFaceModel:
|
|
def __init__(self):
|
|
self.model = None
|
|
|
|
def load_model(self):
|
|
if self.model is None:
|
|
from insightface.app import FaceAnalysis
|
|
from annotator.annotator_path import models_path
|
|
self.model = FaceAnalysis(
|
|
name="buffalo_l",
|
|
providers=['CUDAExecutionProvider', 'CPUExecutionProvider'],
|
|
root=os.path.join(models_path, "insightface"),
|
|
)
|
|
self.model.prepare(ctx_id=0, det_size=(640, 640))
|
|
|
|
def run_model(self, imgs: Union[Tuple[np.ndarray], np.ndarray], **kwargs):
|
|
self.load_model()
|
|
imgs = imgs if isinstance(imgs, tuple) else (imgs,)
|
|
faceid_embeds = []
|
|
for i, img in enumerate(imgs):
|
|
img = HWC3(img)
|
|
faces = self.model.get(img)
|
|
if not faces:
|
|
logger.warn(f"Insightface: No face found in image {i}.")
|
|
continue
|
|
if len(faces) > 1:
|
|
logger.warn("Insightface: More than one face is detected in the image. "
|
|
f"Only the first one will be used {i}.")
|
|
faceid_embeds.append(torch.from_numpy(faces[0].normed_embedding).unsqueeze(0))
|
|
return faceid_embeds, False
|
|
|
|
|
|
g_insight_face_model = InsightFaceModel()
|
|
|
|
|
|
def face_id_plus(img, low_vram=False, **kwargs):
|
|
""" FaceID plus uses both face_embeding from insightface and clip_embeding from clip. """
|
|
face_embed, _ = g_insight_face_model.run_model(img)
|
|
clip_embed, _ = clip(img, config='clip_h', low_vram=low_vram)
|
|
assert len(face_embed) > 0
|
|
return (face_embed[0], clip_embed), False
|
|
|
|
|
|
class HandRefinerModel:
|
|
def __init__(self):
|
|
self.model = None
|
|
self.device = devices.get_device_for("controlnet")
|
|
|
|
def load_model(self):
|
|
if self.model is None:
|
|
from annotator.annotator_path import models_path
|
|
from hand_refiner import MeshGraphormerDetector # installed via hand_refiner_portable
|
|
with Extra(torch_handler):
|
|
self.model = MeshGraphormerDetector.from_pretrained(
|
|
"hr16/ControlNet-HandRefiner-pruned",
|
|
cache_dir=os.path.join(models_path, "hand_refiner"),
|
|
device=self.device,
|
|
)
|
|
else:
|
|
self.model.to(self.device)
|
|
|
|
def unload(self):
|
|
if self.model is not None:
|
|
self.model.to("cpu")
|
|
|
|
def run_model(self, img, res=512, **kwargs):
|
|
img, remove_pad = resize_image_with_pad(img, res)
|
|
self.load_model()
|
|
with Extra(torch_handler):
|
|
depth_map, mask, info = self.model(
|
|
img, output_type="np",
|
|
detect_resolution=res,
|
|
mask_bbox_padding=30,
|
|
)
|
|
return remove_pad(depth_map), True
|
|
|
|
|
|
g_hand_refiner_model = HandRefinerModel()
|
|
|
|
|
|
model_free_preprocessors = [
|
|
"reference_only",
|
|
"reference_adain",
|
|
"reference_adain+attn",
|
|
"revision_clipvision",
|
|
"revision_ignore_prompt"
|
|
]
|
|
|
|
no_control_mode_preprocessors = [
|
|
"revision_clipvision",
|
|
"revision_ignore_prompt",
|
|
"clip_vision",
|
|
"ip-adapter_clip_sd15",
|
|
"ip-adapter_clip_sdxl",
|
|
"ip-adapter_clip_sdxl_plus_vith",
|
|
"t2ia_style_clipvision",
|
|
"ip-adapter_face_id",
|
|
"ip-adapter_face_id_plus",
|
|
]
|
|
|
|
flag_preprocessor_resolution = "Preprocessor Resolution"
|
|
preprocessor_sliders_config = {
|
|
"none": [],
|
|
"inpaint": [],
|
|
"inpaint_only": [],
|
|
"revision_clipvision": [
|
|
None,
|
|
{
|
|
"name": "Noise Augmentation",
|
|
"value": 0.0,
|
|
"min": 0.0,
|
|
"max": 1.0
|
|
},
|
|
],
|
|
"revision_ignore_prompt": [
|
|
None,
|
|
{
|
|
"name": "Noise Augmentation",
|
|
"value": 0.0,
|
|
"min": 0.0,
|
|
"max": 1.0
|
|
},
|
|
],
|
|
"canny": [
|
|
{
|
|
"name": flag_preprocessor_resolution,
|
|
"value": 512,
|
|
"min": 64,
|
|
"max": 2048
|
|
},
|
|
{
|
|
"name": "Canny Low Threshold",
|
|
"value": 100,
|
|
"min": 1,
|
|
"max": 255
|
|
},
|
|
{
|
|
"name": "Canny High Threshold",
|
|
"value": 200,
|
|
"min": 1,
|
|
"max": 255
|
|
},
|
|
],
|
|
"mlsd": [
|
|
{
|
|
"name": flag_preprocessor_resolution,
|
|
"min": 64,
|
|
"max": 2048,
|
|
"value": 512
|
|
},
|
|
{
|
|
"name": "MLSD Value Threshold",
|
|
"min": 0.01,
|
|
"max": 2.0,
|
|
"value": 0.1,
|
|
"step": 0.01
|
|
},
|
|
{
|
|
"name": "MLSD Distance Threshold",
|
|
"min": 0.01,
|
|
"max": 20.0,
|
|
"value": 0.1,
|
|
"step": 0.01
|
|
}
|
|
],
|
|
"hed": [
|
|
{
|
|
"name": flag_preprocessor_resolution,
|
|
"min": 64,
|
|
"max": 2048,
|
|
"value": 512
|
|
}
|
|
],
|
|
"scribble_hed": [
|
|
{
|
|
"name": flag_preprocessor_resolution,
|
|
"min": 64,
|
|
"max": 2048,
|
|
"value": 512
|
|
}
|
|
],
|
|
"hed_safe": [
|
|
{
|
|
"name": flag_preprocessor_resolution,
|
|
"min": 64,
|
|
"max": 2048,
|
|
"value": 512
|
|
}
|
|
],
|
|
"openpose": [
|
|
{
|
|
"name": flag_preprocessor_resolution,
|
|
"min": 64,
|
|
"max": 2048,
|
|
"value": 512
|
|
}
|
|
],
|
|
"openpose_full": [
|
|
{
|
|
"name": flag_preprocessor_resolution,
|
|
"min": 64,
|
|
"max": 2048,
|
|
"value": 512
|
|
}
|
|
],
|
|
"dw_openpose_full": [
|
|
{
|
|
"name": flag_preprocessor_resolution,
|
|
"min": 64,
|
|
"max": 2048,
|
|
"value": 512
|
|
}
|
|
],
|
|
"animal_openpose": [
|
|
{
|
|
"name": flag_preprocessor_resolution,
|
|
"min": 64,
|
|
"max": 2048,
|
|
"value": 512
|
|
}
|
|
],
|
|
"segmentation": [
|
|
{
|
|
"name": flag_preprocessor_resolution,
|
|
"min": 64,
|
|
"max": 2048,
|
|
"value": 512
|
|
}
|
|
],
|
|
"depth": [
|
|
{
|
|
"name": flag_preprocessor_resolution,
|
|
"min": 64,
|
|
"max": 2048,
|
|
"value": 512
|
|
}
|
|
],
|
|
"depth_leres": [
|
|
{
|
|
"name": flag_preprocessor_resolution,
|
|
"min": 64,
|
|
"max": 2048,
|
|
"value": 512
|
|
},
|
|
{
|
|
"name": "Remove Near %",
|
|
"min": 0,
|
|
"max": 100,
|
|
"value": 0,
|
|
"step": 0.1,
|
|
},
|
|
{
|
|
"name": "Remove Background %",
|
|
"min": 0,
|
|
"max": 100,
|
|
"value": 0,
|
|
"step": 0.1,
|
|
}
|
|
],
|
|
"depth_leres++": [
|
|
{
|
|
"name": flag_preprocessor_resolution,
|
|
"min": 64,
|
|
"max": 2048,
|
|
"value": 512
|
|
},
|
|
{
|
|
"name": "Remove Near %",
|
|
"min": 0,
|
|
"max": 100,
|
|
"value": 0,
|
|
"step": 0.1,
|
|
},
|
|
{
|
|
"name": "Remove Background %",
|
|
"min": 0,
|
|
"max": 100,
|
|
"value": 0,
|
|
"step": 0.1,
|
|
}
|
|
],
|
|
"normal_map": [
|
|
{
|
|
"name": flag_preprocessor_resolution,
|
|
"min": 64,
|
|
"max": 2048,
|
|
"value": 512
|
|
},
|
|
{
|
|
"name": "Normal Background Threshold",
|
|
"min": 0.0,
|
|
"max": 1.0,
|
|
"value": 0.4,
|
|
"step": 0.01
|
|
}
|
|
],
|
|
"threshold": [
|
|
{
|
|
"name": flag_preprocessor_resolution,
|
|
"value": 512,
|
|
"min": 64,
|
|
"max": 2048
|
|
},
|
|
{
|
|
"name": "Binarization Threshold",
|
|
"min": 0,
|
|
"max": 255,
|
|
"value": 127
|
|
}
|
|
],
|
|
|
|
"scribble_xdog": [
|
|
{
|
|
"name": flag_preprocessor_resolution,
|
|
"value": 512,
|
|
"min": 64,
|
|
"max": 2048
|
|
},
|
|
{
|
|
"name": "XDoG Threshold",
|
|
"min": 1,
|
|
"max": 64,
|
|
"value": 32,
|
|
}
|
|
],
|
|
"blur_gaussian": [
|
|
{
|
|
"name": flag_preprocessor_resolution,
|
|
"value": 512,
|
|
"min": 64,
|
|
"max": 2048
|
|
},
|
|
{
|
|
"name": "Sigma",
|
|
"min": 0.01,
|
|
"max": 64.0,
|
|
"value": 9.0,
|
|
}
|
|
],
|
|
"tile_resample": [
|
|
None,
|
|
{
|
|
"name": "Down Sampling Rate",
|
|
"value": 1.0,
|
|
"min": 1.0,
|
|
"max": 8.0,
|
|
"step": 0.01
|
|
}
|
|
],
|
|
"tile_colorfix": [
|
|
None,
|
|
{
|
|
"name": "Variation",
|
|
"value": 8.0,
|
|
"min": 3.0,
|
|
"max": 32.0,
|
|
"step": 1.0
|
|
}
|
|
],
|
|
"tile_colorfix+sharp": [
|
|
None,
|
|
{
|
|
"name": "Variation",
|
|
"value": 8.0,
|
|
"min": 3.0,
|
|
"max": 32.0,
|
|
"step": 1.0
|
|
},
|
|
{
|
|
"name": "Sharpness",
|
|
"value": 1.0,
|
|
"min": 0.0,
|
|
"max": 2.0,
|
|
"step": 0.01
|
|
}
|
|
],
|
|
"reference_only": [
|
|
None,
|
|
{
|
|
"name": r'Style Fidelity (only for "Balanced" mode)',
|
|
"value": 0.5,
|
|
"min": 0.0,
|
|
"max": 1.0,
|
|
"step": 0.01
|
|
}
|
|
],
|
|
"reference_adain": [
|
|
None,
|
|
{
|
|
"name": r'Style Fidelity (only for "Balanced" mode)',
|
|
"value": 0.5,
|
|
"min": 0.0,
|
|
"max": 1.0,
|
|
"step": 0.01
|
|
}
|
|
],
|
|
"reference_adain+attn": [
|
|
None,
|
|
{
|
|
"name": r'Style Fidelity (only for "Balanced" mode)',
|
|
"value": 0.5,
|
|
"min": 0.0,
|
|
"max": 1.0,
|
|
"step": 0.01
|
|
}
|
|
],
|
|
"inpaint_only+lama": [],
|
|
"color": [
|
|
{
|
|
"name": flag_preprocessor_resolution,
|
|
"value": 512,
|
|
"min": 64,
|
|
"max": 2048,
|
|
}
|
|
],
|
|
"mediapipe_face": [
|
|
{
|
|
"name": flag_preprocessor_resolution,
|
|
"value": 512,
|
|
"min": 64,
|
|
"max": 2048,
|
|
},
|
|
{
|
|
"name": "Max Faces",
|
|
"value": 1,
|
|
"min": 1,
|
|
"max": 10,
|
|
"step": 1
|
|
},
|
|
{
|
|
"name": "Min Face Confidence",
|
|
"value": 0.5,
|
|
"min": 0.01,
|
|
"max": 1.0,
|
|
"step": 0.01
|
|
}
|
|
],
|
|
"recolor_luminance": [
|
|
None,
|
|
{
|
|
"name": "Gamma Correction",
|
|
"value": 1.0,
|
|
"min": 0.1,
|
|
"max": 2.0,
|
|
"step": 0.001
|
|
}
|
|
],
|
|
"recolor_intensity": [
|
|
None,
|
|
{
|
|
"name": "Gamma Correction",
|
|
"value": 1.0,
|
|
"min": 0.1,
|
|
"max": 2.0,
|
|
"step": 0.001
|
|
}
|
|
],
|
|
"anime_face_segment": [
|
|
{
|
|
"name": flag_preprocessor_resolution,
|
|
"value": 512,
|
|
"min": 64,
|
|
"max": 2048
|
|
}
|
|
],
|
|
"densepose": [
|
|
{
|
|
"name": flag_preprocessor_resolution,
|
|
"min": 64,
|
|
"max": 2048,
|
|
"value": 512
|
|
}
|
|
],
|
|
"densepose_parula": [
|
|
{
|
|
"name": flag_preprocessor_resolution,
|
|
"min": 64,
|
|
"max": 2048,
|
|
"value": 512
|
|
}
|
|
],
|
|
"depth_hand_refiner": [
|
|
{
|
|
"name": flag_preprocessor_resolution,
|
|
"value": 512,
|
|
"min": 64,
|
|
"max": 2048
|
|
}
|
|
],
|
|
"te_hed": [
|
|
{
|
|
"name": flag_preprocessor_resolution,
|
|
"value": 512,
|
|
"min": 64,
|
|
"max": 2048
|
|
},
|
|
{
|
|
"name": "Safe Steps",
|
|
"min": 0,
|
|
"max": 10,
|
|
"value": 2,
|
|
"step": 1,
|
|
},
|
|
],
|
|
}
|
|
|
|
preprocessor_filters = {
|
|
"All": "none",
|
|
"Canny": "canny",
|
|
"Depth": "depth_midas",
|
|
"NormalMap": "normal_bae",
|
|
"OpenPose": "openpose_full",
|
|
"MLSD": "mlsd",
|
|
"Lineart": "lineart_standard (from white bg & black line)",
|
|
"SoftEdge": "softedge_pidinet",
|
|
"Scribble/Sketch": "scribble_pidinet",
|
|
"Segmentation": "seg_ofade20k",
|
|
"Shuffle": "shuffle",
|
|
"Tile/Blur": "tile_resample",
|
|
"Inpaint": "inpaint_only",
|
|
"InstructP2P": "none",
|
|
"Reference": "reference_only",
|
|
"Recolor": "recolor_luminance",
|
|
"Revision": "revision_clipvision",
|
|
"T2I-Adapter": "none",
|
|
"IP-Adapter": "ip-adapter_clip_sd15",
|
|
}
|
|
|
|
preprocessor_filters_aliases = {
|
|
'instructp2p': ['ip2p'],
|
|
'segmentation': ['seg'],
|
|
'normalmap': ['normal'],
|
|
't2i-adapter': ['t2i_adapter', 't2iadapter', 't2ia'],
|
|
'ip-adapter': ['ip_adapter', 'ipadapter'],
|
|
'scribble/sketch': ['scribble', 'sketch'],
|
|
'tile/blur': ['tile', 'blur'],
|
|
'openpose':['openpose', 'densepose'],
|
|
} # must use all lower texts
|