From 9ceb58685a7639b1a6a6c6ca2ab7a7b9d750cf6f Mon Sep 17 00:00:00 2001 From: Collin Avidano Date: Wed, 25 Sep 2024 07:12:28 -0400 Subject: [PATCH] Feature: Filter k most confident masks (#720) * add filter to things that return confidences need to add ui elements to select between the two methods * add ui elements for controlling method * forgot to remove this * fix incorrect early exit * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix: pop mask only top k params * fix: filter confidences * refactor: change to one public function --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Dowon --- aaaaaa/ui.py | 13 ++++++++++--- adetailer/args.py | 12 +++++++++--- adetailer/common.py | 1 + adetailer/mask.py | 23 +++++++++++++++++++++++ adetailer/mediapipe.py | 7 +++++-- adetailer/ultralytics.py | 7 ++++++- scripts/!adetailer.py | 4 ++-- 7 files changed, 56 insertions(+), 11 deletions(-) diff --git a/aaaaaa/ui.py b/aaaaaa/ui.py index 4b9b484..8a4c19c 100644 --- a/aaaaaa/ui.py +++ b/aaaaaa/ui.py @@ -294,14 +294,21 @@ def detection(w: Widgets, n: int, is_img2img: bool): visible=True, elem_id=eid("ad_confidence"), ) - w.ad_mask_k_largest = gr.Slider( - label="Mask only the top k largest (0 to disable)" + suffix(n), + w.ad_mask_filter_method = gr.Radio( + choices=["Area", "Confidence"], + value="Area", + label="Method to filter top k masks by (confidence or area)", + visible=True, + elem_id=eid("ad_mask_filter_method"), + ) + w.ad_mask_k = gr.Slider( + label="Mask only the top k (0 to disable)" + suffix(n), minimum=0, maximum=10, step=1, value=0, visible=True, - elem_id=eid("ad_mask_k_largest"), + elem_id=eid("ad_mask_k"), ) with gr.Column(variant="compact"): diff --git a/adetailer/args.py b/adetailer/args.py index 4efcdd7..e1b8751 100644 --- a/adetailer/args.py +++ b/adetailer/args.py @@ -60,7 +60,8 @@ class ADetailerArgs(BaseModel, extra=Extra.forbid): ad_prompt: str = "" ad_negative_prompt: str = "" ad_confidence: confloat(ge=0.0, le=1.0) = 0.3 - ad_mask_k_largest: NonNegativeInt = 0 + ad_mask_filter_method: Literal["Area", "Confidence"] = "Area" + ad_mask_k: NonNegativeInt = 0 ad_mask_min_ratio: confloat(ge=0.0, le=1.0) = 0.0 ad_mask_max_ratio: confloat(ge=0.0, le=1.0) = 1.0 ad_dilate_erode: int = 4 @@ -131,7 +132,11 @@ class ADetailerArgs(BaseModel, extra=Extra.forbid): ppop("ADetailer prompt") ppop("ADetailer negative prompt") p.pop("ADetailer tab enable", None) # always pop - ppop("ADetailer mask only top k largest", cond=0) + ppop( + "ADetailer mask only top k", + ["ADetailer mask only top k", "ADetailer method to decide top k masks"], + cond=0, + ) ppop("ADetailer mask min ratio", cond=0.0) ppop("ADetailer mask max ratio", cond=1.0) ppop("ADetailer x offset", cond=0) @@ -217,7 +222,8 @@ _all_args = [ ("ad_prompt", "ADetailer prompt"), ("ad_negative_prompt", "ADetailer negative prompt"), ("ad_confidence", "ADetailer confidence"), - ("ad_mask_k_largest", "ADetailer mask only top k largest"), + ("ad_mask_filter_method", "ADetailer method to decide top k masks"), + ("ad_mask_k", "ADetailer mask only top k"), ("ad_mask_min_ratio", "ADetailer mask min ratio"), ("ad_mask_max_ratio", "ADetailer mask max ratio"), ("ad_x_offset", "ADetailer x offset"), diff --git a/adetailer/common.py b/adetailer/common.py index 0a4fb7a..ca6415a 100644 --- a/adetailer/common.py +++ b/adetailer/common.py @@ -22,6 +22,7 @@ T = TypeVar("T", int, float) class PredictOutput(Generic[T]): bboxes: list[list[T]] = field(default_factory=list) masks: list[Image.Image] = field(default_factory=list) + confidences: list[float] = field(default_factory=list) preview: Optional[Image.Image] = None diff --git a/adetailer/mask.py b/adetailer/mask.py index 9496aa4..65388c5 100644 --- a/adetailer/mask.py +++ b/adetailer/mask.py @@ -225,6 +225,7 @@ def filter_by_ratio( idx = [i for i in range(items) if is_in_ratio(pred.bboxes[i], low, high, orig_area)] pred.bboxes = [pred.bboxes[i] for i in idx] pred.masks = [pred.masks[i] for i in idx] + pred.confidences = [pred.confidences[i] for i in idx] return pred @@ -236,9 +237,31 @@ def filter_k_largest(pred: PredictOutput[T], k: int = 0) -> PredictOutput[T]: idx = idx[::-1] pred.bboxes = [pred.bboxes[i] for i in idx] pred.masks = [pred.masks[i] for i in idx] + pred.confidences = [pred.confidences[i] for i in idx] return pred +def filter_k_most_confident(pred: PredictOutput[T], k: int = 0) -> PredictOutput[T]: + if not pred.bboxes or not pred.confidences or k == 0: + return pred + idx = np.argsort(pred.confidences)[-k:] + idx = idx[::-1] + pred.bboxes = [pred.bboxes[i] for i in idx] + pred.masks = [pred.masks[i] for i in idx] + pred.confidences = [pred.confidences[i] for i in idx] + return pred + + +def filter_k_by( + pred: PredictOutput[T], k: int = 0, by: str = "Area" +) -> PredictOutput[T]: + if by == "Area": + return filter_k_largest(pred, k) + if by == "Confidence": + return filter_k_most_confident(pred, k) + raise RuntimeError + + # Merge / Invert def mask_merge(masks: list[Image.Image]) -> list[Image.Image]: arrs = [np.array(m) for m in masks] diff --git a/adetailer/mediapipe.py b/adetailer/mediapipe.py index b05fa00..067aa53 100644 --- a/adetailer/mediapipe.py +++ b/adetailer/mediapipe.py @@ -52,6 +52,7 @@ def mediapipe_face_detection( preview_array = img_array.copy() bboxes = [] + confidences = [] for detection in pred.detections: draw_util.draw_detection(preview_array, detection) @@ -63,12 +64,15 @@ def mediapipe_face_detection( x2 = x1 + w y2 = y1 + h + confidences.append(detection.score) bboxes.append([x1, y1, x2, y2]) masks = create_mask_from_bbox(bboxes, image.size) preview = Image.fromarray(preview_array) - return PredictOutput(bboxes=bboxes, masks=masks, preview=preview) + return PredictOutput( + bboxes=bboxes, masks=masks, confidences=confidences, preview=preview + ) def mediapipe_face_mesh( @@ -141,7 +145,6 @@ def mediapipe_face_mesh_eyes_only( preview = image.copy() masks = [] - for landmarks in pred.multi_face_landmarks: points = np.array( [[land.x * w, land.y * h] for land in landmarks.landmark], dtype=int diff --git a/adetailer/ultralytics.py b/adetailer/ultralytics.py index dc93482..7c7a1a7 100644 --- a/adetailer/ultralytics.py +++ b/adetailer/ultralytics.py @@ -37,11 +37,16 @@ def ultralytics_predict( masks = create_mask_from_bbox(bboxes, image.size) else: masks = mask_to_pil(pred[0].masks.data, image.size) + + confidences = pred[0].boxes.conf.cpu().numpy().tolist() + preview = pred[0].plot() preview = cv2.cvtColor(preview, cv2.COLOR_BGR2RGB) preview = Image.fromarray(preview) - return PredictOutput(bboxes=bboxes, masks=masks, preview=preview) + return PredictOutput( + bboxes=bboxes, masks=masks, confidences=confidences, preview=preview + ) def apply_classes(model: YOLO | YOLOWorld, model_path: str | Path, classes: str): diff --git a/scripts/!adetailer.py b/scripts/!adetailer.py index 52baf15..ed7f4de 100644 --- a/scripts/!adetailer.py +++ b/scripts/!adetailer.py @@ -52,7 +52,7 @@ from adetailer.args import ( from adetailer.common import PredictOutput, ensure_pil_image, safe_mkdir from adetailer.mask import ( filter_by_ratio, - filter_k_largest, + filter_k_by, has_intersection, is_all_black, mask_preprocess, @@ -596,7 +596,7 @@ class AfterDetailerScript(scripts.Script): pred = filter_by_ratio( pred, low=args.ad_mask_min_ratio, high=args.ad_mask_max_ratio ) - pred = filter_k_largest(pred, k=args.ad_mask_k_largest) + pred = filter_k_by(pred, k=args.ad_mask_k, by=args.ad_mask_filter_method) pred = self.sort_bboxes(pred) masks = mask_preprocess( pred.masks,