Added fuyu captioning

2026-04-30 03:01:28 +00:00 · 2023-10-25 14:14:53 -06:00
parent d742792ee4
commit 9636194c09
10 changed files with 240 additions and 87 deletions
--- a/extensions_built_in/dataset_tools/tools/image_tools.py
+++ b/extensions_built_in/dataset_tools/tools/image_tools.py
@@ -1,4 +1,4 @@
-from typing import Literal, Type
+from typing import Literal, Type, TYPE_CHECKING, Union

 import cv2
 import numpy as np
@@ -8,6 +8,14 @@ Step: Type = Literal['caption', 'caption_short', 'create_mask', 'contrast_stretc

 img_manipulation_steps = ['contrast_stretch']

+img_ext = ['.jpg', '.jpeg', '.png', '.webp']
+
+if TYPE_CHECKING:
+    from .llava_utils import LLaVAImageProcessor
+    from .fuyu_utils import FuyuImageProcessor
+
+ImageProcessor = Union['LLaVAImageProcessor', 'FuyuImageProcessor']
+

 def pil_to_cv2(image):
    """Convert a PIL image to a cv2 image."""
@@ -18,6 +26,7 @@ def cv2_to_pil(image):
    """Convert a cv2 image to a PIL image."""
    return Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))

+
 def load_image(img_path: str):
    image = Image.open(img_path).convert('RGB')
    try:
@@ -27,3 +36,14 @@ def load_image(img_path: str):
        pass
    return image

+
+def resize_to_max(image, max_width=1024, max_height=1024):
+    width, height = image.size
+    if width <= max_width and height <= max_height:
+        return image
+
+    scale = min(max_width / width, max_height / height)
+    width = int(width * scale)
+    height = int(height * scale)
+
+    return image.resize((width, height), Image.LANCZOS)