Added a file signature check on the dataset size caching system to invalidate cached dimensions if the file changes.

This commit is contained in:
Jaret Burkett
2025-04-01 07:39:36 -06:00
parent 5ea19b6292
commit 3d131fb27a
3 changed files with 25 additions and 3 deletions

View File

@@ -10,6 +10,7 @@ from PIL import Image
from PIL.ImageOps import exif_transpose
from toolkit import image_utils
from toolkit.basic import get_quick_signature_string
from toolkit.dataloader_mixins import CaptionProcessingDTOMixin, ImageProcessingDTOMixin, LatentCachingFileItemDTOMixin, \
ControlFileItemDTOMixin, ArgBreakMixin, PoiFileItemDTOMixin, MaskFileItemDTOMixin, AugmentationFileItemDTOMixin, \
UnconditionalFileItemDTOMixin, ClipImageFileItemDTOMixin, InpaintControlFileItemDTOMixin
@@ -53,8 +54,19 @@ class FileItemDTO(
file_key = self.path.replace(dataset_root, '')
else:
file_key = os.path.basename(self.path)
file_signature = get_quick_signature_string(self.path)
if file_signature is None:
raise Exception("Error: Could not get file signature for {self.path}")
use_db_entry = False
if file_key in size_database:
w, h = size_database[file_key]
db_entry = size_database[file_key]
if db_entry is not None and db_entry[2] == file_signature:
use_db_entry = True
if use_db_entry:
w, h, _ = size_database[file_key]
elif self.is_video:
# Open the video file
video = cv2.VideoCapture(self.path)
@@ -80,7 +92,7 @@ class FileItemDTO(
# f'This process is faster for png, jpeg')
img = exif_transpose(Image.open(self.path))
w, h = img.size
size_database[file_key] = (w, h)
size_database[file_key] = (w, h, file_signature)
self.width: int = w
self.height: int = h
self.dataloader_transforms = kwargs.get('dataloader_transforms', None)