Merge branch 'master' into racecond_fix

2026-03-05 05:00:18 +00:00 · 2022-12-03 10:19:51 +03:00
parent 39541d7725 5cd5a672f7
commit c9a2cfdf2a
93 changed files with 3589 additions and 8465 deletions
--- a/modules/textual_inversion/autocrop.py
+++ b/modules/textual_inversion/autocrop.py
@@ -276,8 +276,8 @@ def poi_average(pois, settings):
        weight += poi.weight
        x += poi.x * poi.weight
        y += poi.y * poi.weight
-    avg_x = round(x / weight)
-    avg_y = round(y / weight)
+    avg_x = round(weight and x / weight)
+    avg_y = round(weight and y / weight)

    return PointOfInterest(avg_x, avg_y)

@@ -338,4 +338,4 @@ class Settings:
    self.face_points_weight = face_points_weight
    self.annotate_image = annotate_image
    self.destop_view_image = False
-    self.dnn_model_path = dnn_model_path
+    self.dnn_model_path = dnn_model_path
--- a/modules/textual_inversion/dataset.py
+++ b/modules/textual_inversion/dataset.py
@@ -3,7 +3,7 @@ import numpy as np
 import PIL
 import torch
 from PIL import Image
-from torch.utils.data import Dataset
+from torch.utils.data import Dataset, DataLoader
 from torchvision import transforms

 import random
@@ -11,25 +11,28 @@ import tqdm
 from modules import devices, shared
 import re

+from ldm.modules.distributions.distributions import DiagonalGaussianDistribution
+
 re_numbers_at_start = re.compile(r"^[-\d]+\s*")


 class DatasetEntry:
-    def __init__(self, filename=None, latent=None, filename_text=None):
+    def __init__(self, filename=None, filename_text=None, latent_dist=None, latent_sample=None, cond=None, cond_text=None, pixel_values=None):
        self.filename = filename
-        self.latent = latent
        self.filename_text = filename_text
-        self.cond = None
-        self.cond_text = None
+        self.latent_dist = latent_dist
+        self.latent_sample = latent_sample
+        self.cond = cond
+        self.cond_text = cond_text
+        self.pixel_values = pixel_values


 class PersonalizedBase(Dataset):
-    def __init__(self, data_root, width, height, repeats, flip_p=0.5, placeholder_token="*", model=None, device=None, template_file=None, include_cond=False, batch_size=1):
+    def __init__(self, data_root, width, height, repeats, flip_p=0.5, placeholder_token="*", model=None, cond_model=None, device=None, template_file=None, include_cond=False, batch_size=1, gradient_step=1, shuffle_tags=False, tag_drop_out=0, latent_sampling_method='once'):        
        re_word = re.compile(shared.opts.dataset_filename_word_regex) if len(shared.opts.dataset_filename_word_regex) > 0 else None
-
+        
        self.placeholder_token = placeholder_token

-        self.batch_size = batch_size
        self.width = width
        self.height = height
        self.flip = transforms.RandomHorizontalFlip(p=flip_p)
@@ -45,11 +48,16 @@ class PersonalizedBase(Dataset):
        assert os.path.isdir(data_root), "Dataset directory doesn't exist"
        assert os.listdir(data_root), "Dataset directory is empty"

-        cond_model = shared.sd_model.cond_stage_model
-
        self.image_paths = [os.path.join(data_root, file_path) for file_path in os.listdir(data_root)]
+
+        
+        self.shuffle_tags = shuffle_tags
+        self.tag_drop_out = tag_drop_out
+
        print("Preparing dataset...")
        for path in tqdm.tqdm(self.image_paths):
+            if shared.state.interrupted:
+                raise Exception("inturrupted")
            try:
                image = Image.open(path).convert('RGB').resize((self.width, self.height), PIL.Image.BICUBIC)
            except Exception:
@@ -71,53 +79,94 @@ class PersonalizedBase(Dataset):
            npimage = np.array(image).astype(np.uint8)
            npimage = (npimage / 127.5 - 1.0).astype(np.float32)

-            torchdata = torch.from_numpy(npimage).to(device=device, dtype=torch.float32)
-            torchdata = torch.moveaxis(torchdata, 2, 0)
+            torchdata = torch.from_numpy(npimage).permute(2, 0, 1).to(device=device, dtype=torch.float32)
+            latent_sample = None

-            init_latent = model.get_first_stage_encoding(model.encode_first_stage(torchdata.unsqueeze(dim=0))).squeeze()
-            init_latent = init_latent.to(devices.cpu)
+            with devices.autocast():
+                latent_dist = model.encode_first_stage(torchdata.unsqueeze(dim=0))

-            entry = DatasetEntry(filename=path, filename_text=filename_text, latent=init_latent)
+            if latent_sampling_method == "once" or (latent_sampling_method == "deterministic" and not isinstance(latent_dist, DiagonalGaussianDistribution)):
+                latent_sample = model.get_first_stage_encoding(latent_dist).squeeze().to(devices.cpu)
+                latent_sampling_method = "once"
+                entry = DatasetEntry(filename=path, filename_text=filename_text, latent_sample=latent_sample)
+            elif latent_sampling_method == "deterministic":
+                # Works only for DiagonalGaussianDistribution
+                latent_dist.std = 0
+                latent_sample = model.get_first_stage_encoding(latent_dist).squeeze().to(devices.cpu)
+                entry = DatasetEntry(filename=path, filename_text=filename_text, latent_sample=latent_sample)
+            elif latent_sampling_method == "random":
+                entry = DatasetEntry(filename=path, filename_text=filename_text, latent_dist=latent_dist)

-            if include_cond:
+            if not (self.tag_drop_out != 0 or self.shuffle_tags):
                entry.cond_text = self.create_text(filename_text)
-                entry.cond = cond_model([entry.cond_text]).to(devices.cpu).squeeze(0)
+
+            if include_cond and not (self.tag_drop_out != 0 or self.shuffle_tags):
+                with devices.autocast():
+                    entry.cond = cond_model([entry.cond_text]).to(devices.cpu).squeeze(0)

            self.dataset.append(entry)
+            del torchdata
+            del latent_dist
+            del latent_sample

-        assert len(self.dataset) > 0, "No images have been found in the dataset."
-        self.length = len(self.dataset) * repeats // batch_size
-
-        self.dataset_length = len(self.dataset)
-        self.indexes = None
-        self.shuffle()
-
-    def shuffle(self):
-        self.indexes = np.random.permutation(self.dataset_length)
+        self.length = len(self.dataset)
+        assert self.length > 0, "No images have been found in the dataset."
+        self.batch_size = min(batch_size, self.length)
+        self.gradient_step = min(gradient_step, self.length // self.batch_size)
+        self.latent_sampling_method = latent_sampling_method

    def create_text(self, filename_text):
        text = random.choice(self.lines)
+        tags = filename_text.split(',')
+        if self.tag_drop_out != 0:
+            tags = [t for t in tags if random.random() > self.tag_drop_out]
+        if self.shuffle_tags:
+            random.shuffle(tags)
+        text = text.replace("[filewords]", ','.join(tags))
        text = text.replace("[name]", self.placeholder_token)
-        text = text.replace("[filewords]", filename_text)
        return text

    def __len__(self):
        return self.length

    def __getitem__(self, i):
-        res = []
+        entry = self.dataset[i]
+        if self.tag_drop_out != 0 or self.shuffle_tags:
+            entry.cond_text = self.create_text(entry.filename_text)
+        if self.latent_sampling_method == "random":
+            entry.latent_sample = shared.sd_model.get_first_stage_encoding(entry.latent_dist).to(devices.cpu)
+        return entry

-        for j in range(self.batch_size):
-            position = i * self.batch_size + j
-            if position % len(self.indexes) == 0:
-                self.shuffle()
+class PersonalizedDataLoader(DataLoader):
+    def __init__(self, dataset, latent_sampling_method="once", batch_size=1, pin_memory=False):
+        super(PersonalizedDataLoader, self).__init__(dataset, shuffle=True, drop_last=True, batch_size=batch_size, pin_memory=pin_memory)
+        if latent_sampling_method == "random":
+            self.collate_fn = collate_wrapper_random
+        else:
+            self.collate_fn = collate_wrapper
+        

-            index = self.indexes[position % len(self.indexes)]
-            entry = self.dataset[index]
+class BatchLoader:
+    def __init__(self, data):
+        self.cond_text = [entry.cond_text for entry in data]
+        self.cond = [entry.cond for entry in data]
+        self.latent_sample = torch.stack([entry.latent_sample for entry in data]).squeeze(1)
+        #self.emb_index = [entry.emb_index for entry in data]
+        #print(self.latent_sample.device)

-            if entry.cond is None:
-                entry.cond_text = self.create_text(entry.filename_text)
+    def pin_memory(self):
+        self.latent_sample = self.latent_sample.pin_memory()
+        return self

-            res.append(entry)
+def collate_wrapper(batch):
+    return BatchLoader(batch)

-        return res
+class BatchLoaderRandom(BatchLoader):
+    def __init__(self, data):
+        super().__init__(data)
+
+    def pin_memory(self):
+        return self
+
+def collate_wrapper_random(batch):
+    return BatchLoaderRandom(batch)
--- a/modules/textual_inversion/preprocess.py
+++ b/modules/textual_inversion/preprocess.py
@@ -6,12 +6,10 @@ import sys
 import tqdm
 import time

-from modules import shared, images
+from modules import shared, images, deepbooru
 from modules.paths import models_path
 from modules.shared import opts, cmd_opts
 from modules.textual_inversion import autocrop
-if cmd_opts.deepdanbooru:
-    import modules.deepbooru as deepbooru


 def preprocess(process_src, process_dst, process_width, process_height, preprocess_txt_action, process_flip, process_split, process_caption, process_caption_deepbooru=False, split_threshold=0.5, overlap_ratio=0.2, process_focal_crop=False, process_focal_crop_face_weight=0.9, process_focal_crop_entropy_weight=0.3, process_focal_crop_edges_weight=0.5, process_focal_crop_debug=False):
@@ -20,9 +18,7 @@ def preprocess(process_src, process_dst, process_width, process_height, preproce
            shared.interrogator.load()

        if process_caption_deepbooru:
-            db_opts = deepbooru.create_deepbooru_opts()
-            db_opts[deepbooru.OPT_INCLUDE_RANKS] = False
-            deepbooru.create_deepbooru_process(opts.interrogate_deepbooru_score_threshold, db_opts)
+            deepbooru.model.start()

        preprocess_work(process_src, process_dst, process_width, process_height, preprocess_txt_action, process_flip, process_split, process_caption, process_caption_deepbooru, split_threshold, overlap_ratio, process_focal_crop, process_focal_crop_face_weight, process_focal_crop_entropy_weight, process_focal_crop_edges_weight, process_focal_crop_debug)

@@ -32,9 +28,87 @@ def preprocess(process_src, process_dst, process_width, process_height, preproce
            shared.interrogator.send_blip_to_ram()

        if process_caption_deepbooru:
-            deepbooru.release_process()
+            deepbooru.model.stop()


+def listfiles(dirname):
+    return os.listdir(dirname)
+
+
+class PreprocessParams:
+    src = None
+    dstdir = None
+    subindex = 0
+    flip = False
+    process_caption = False
+    process_caption_deepbooru = False
+    preprocess_txt_action = None
+
+
+def save_pic_with_caption(image, index, params: PreprocessParams, existing_caption=None):
+    caption = ""
+
+    if params.process_caption:
+        caption += shared.interrogator.generate_caption(image)
+
+    if params.process_caption_deepbooru:
+        if len(caption) > 0:
+            caption += ", "
+        caption += deepbooru.model.tag_multi(image)
+
+    filename_part = params.src
+    filename_part = os.path.splitext(filename_part)[0]
+    filename_part = os.path.basename(filename_part)
+
+    basename = f"{index:05}-{params.subindex}-{filename_part}"
+    image.save(os.path.join(params.dstdir, f"{basename}.png"))
+
+    if params.preprocess_txt_action == 'prepend' and existing_caption:
+        caption = existing_caption + ' ' + caption
+    elif params.preprocess_txt_action == 'append' and existing_caption:
+        caption = caption + ' ' + existing_caption
+    elif params.preprocess_txt_action == 'copy' and existing_caption:
+        caption = existing_caption
+
+    caption = caption.strip()
+
+    if len(caption) > 0:
+        with open(os.path.join(params.dstdir, f"{basename}.txt"), "w", encoding="utf8") as file:
+            file.write(caption)
+
+    params.subindex += 1
+
+
+def save_pic(image, index, params, existing_caption=None):
+    save_pic_with_caption(image, index, params, existing_caption=existing_caption)
+
+    if params.flip:
+        save_pic_with_caption(ImageOps.mirror(image), index, params, existing_caption=existing_caption)
+
+
+def split_pic(image, inverse_xy, width, height, overlap_ratio):
+    if inverse_xy:
+        from_w, from_h = image.height, image.width
+        to_w, to_h = height, width
+    else:
+        from_w, from_h = image.width, image.height
+        to_w, to_h = width, height
+    h = from_h * to_w // from_w
+    if inverse_xy:
+        image = image.resize((h, to_w))
+    else:
+        image = image.resize((to_w, h))
+
+    split_count = math.ceil((h - to_h * overlap_ratio) / (to_h * (1.0 - overlap_ratio)))
+    y_step = (h - to_h) / (split_count - 1)
+    for i in range(split_count):
+        y = int(y_step * i)
+        if inverse_xy:
+            splitted = image.crop((y, 0, y + to_h, to_w))
+        else:
+            splitted = image.crop((0, y, to_w, y + to_h))
+        yield splitted
+

 def preprocess_work(process_src, process_dst, process_width, process_height, preprocess_txt_action, process_flip, process_split, process_caption, process_caption_deepbooru=False, split_threshold=0.5, overlap_ratio=0.2, process_focal_crop=False, process_focal_crop_face_weight=0.9, process_focal_crop_entropy_weight=0.3, process_focal_crop_edges_weight=0.5, process_focal_crop_debug=False):
    width = process_width
@@ -48,82 +122,28 @@ def preprocess_work(process_src, process_dst, process_width, process_height, pre

    os.makedirs(dst, exist_ok=True)

-    files = os.listdir(src)
+    files = listfiles(src)

    shared.state.textinfo = "Preprocessing..."
    shared.state.job_count = len(files)

-    def save_pic_with_caption(image, index, existing_caption=None):
-        caption = ""
-
-        if process_caption:
-            caption += shared.interrogator.generate_caption(image)
-
-        if process_caption_deepbooru:
-            if len(caption) > 0:
-                caption += ", "
-            caption += deepbooru.get_tags_from_process(image)
-
-        filename_part = filename
-        filename_part = os.path.splitext(filename_part)[0]
-        filename_part = os.path.basename(filename_part)
-
-        basename = f"{index:05}-{subindex[0]}-{filename_part}"
-        image.save(os.path.join(dst, f"{basename}.png"))
-
-        if preprocess_txt_action == 'prepend' and existing_caption:
-            caption = existing_caption + ' ' + caption
-        elif preprocess_txt_action == 'append' and existing_caption:
-            caption = caption + ' ' + existing_caption
-        elif preprocess_txt_action == 'copy' and existing_caption:
-            caption = existing_caption
-
-        caption = caption.strip()
-        
-        if len(caption) > 0:
-            with open(os.path.join(dst, f"{basename}.txt"), "w", encoding="utf8") as file:
-                file.write(caption)
-
-        subindex[0] += 1
-
-    def save_pic(image, index, existing_caption=None):
-        save_pic_with_caption(image, index, existing_caption=existing_caption)
-
-        if process_flip:
-            save_pic_with_caption(ImageOps.mirror(image), index, existing_caption=existing_caption)
-
-    def split_pic(image, inverse_xy):
-        if inverse_xy:
-            from_w, from_h = image.height, image.width
-            to_w, to_h = height, width
-        else:
-            from_w, from_h = image.width, image.height
-            to_w, to_h = width, height
-        h = from_h * to_w // from_w
-        if inverse_xy:
-            image = image.resize((h, to_w))
-        else:
-            image = image.resize((to_w, h))
-
-        split_count = math.ceil((h - to_h * overlap_ratio) / (to_h * (1.0 - overlap_ratio)))
-        y_step = (h - to_h) / (split_count - 1)
-        for i in range(split_count):
-            y = int(y_step * i)
-            if inverse_xy:
-                splitted = image.crop((y, 0, y + to_h, to_w))
-            else:
-                splitted = image.crop((0, y, to_w, y + to_h))
-            yield splitted
-
+    params = PreprocessParams()
+    params.dstdir = dst
+    params.flip = process_flip
+    params.process_caption = process_caption
+    params.process_caption_deepbooru = process_caption_deepbooru
+    params.preprocess_txt_action = preprocess_txt_action

    for index, imagefile in enumerate(tqdm.tqdm(files)):
-        subindex = [0]
+        params.subindex = 0
        filename = os.path.join(src, imagefile)
        try:
            img = Image.open(filename).convert("RGB")
        except Exception:
            continue

+        params.src = filename
+
        existing_caption = None
        existing_caption_filename = os.path.splitext(filename)[0] + '.txt'
        if os.path.exists(existing_caption_filename):
@@ -143,8 +163,8 @@ def preprocess_work(process_src, process_dst, process_width, process_height, pre
        process_default_resize = True

        if process_split and ratio < 1.0 and ratio <= split_threshold:
-            for splitted in split_pic(img, inverse_xy):
-                save_pic(splitted, index, existing_caption=existing_caption)
+            for splitted in split_pic(img, inverse_xy, width, height, overlap_ratio):
+                save_pic(splitted, index, params, existing_caption=existing_caption)
            process_default_resize = False

        if process_focal_crop and img.height != img.width:
@@ -165,11 +185,11 @@ def preprocess_work(process_src, process_dst, process_width, process_height, pre
                dnn_model_path = dnn_model_path,
            )
            for focal in autocrop.crop_image(img, autocrop_settings):
-                save_pic(focal, index, existing_caption=existing_caption)
+                save_pic(focal, index, params, existing_caption=existing_caption)
            process_default_resize = False

        if process_default_resize:
            img = images.resize_image(1, img, width, height)
-            save_pic(img, index, existing_caption=existing_caption)
+            save_pic(img, index, params, existing_caption=existing_caption)

-        shared.state.nextjob()
+        shared.state.nextjob()
--- a/modules/textual_inversion/textual_inversion.py
+++ b/modules/textual_inversion/textual_inversion.py
@@ -10,7 +10,7 @@ import csv

 from PIL import Image, PngImagePlugin

-from modules import shared, devices, sd_hijack, processing, sd_models, images
+from modules import shared, devices, sd_hijack, processing, sd_models, images, sd_samplers
 import modules.textual_inversion.dataset
 from modules.textual_inversion.learn_schedule import LearnRateScheduler

@@ -64,7 +64,8 @@ class EmbeddingDatabase:

        self.word_embeddings[embedding.name] = embedding

-        ids = model.cond_stage_model.tokenizer([embedding.name], add_special_tokens=False)['input_ids'][0]
+        # TODO changing between clip and open clip changes tokenization, which will cause embeddings to stop working
+        ids = model.cond_stage_model.tokenize([embedding.name])[0]

        first_id = ids[0]
        if first_id not in self.ids_lookup:
@@ -155,13 +156,11 @@ class EmbeddingDatabase:

 def create_embedding(name, num_vectors_per_token, overwrite_old, init_text='*'):
    cond_model = shared.sd_model.cond_stage_model
-    embedding_layer = cond_model.wrapped.transformer.text_model.embeddings

    with devices.autocast():
        cond_model([""])  # will send cond model to GPU if lowvram/medvram is active

-    ids = cond_model.tokenizer(init_text, max_length=num_vectors_per_token, return_tensors="pt", add_special_tokens=False)["input_ids"]
-    embedded = embedding_layer.token_embedding.wrapped(ids.to(devices.device)).squeeze(0)
+    embedded = cond_model.encode_embedding_init_text(init_text, num_vectors_per_token)
    vec = torch.zeros((num_vectors_per_token, embedded.shape[1]), device=devices.device)

    for i in range(num_vectors_per_token):
@@ -184,7 +183,7 @@ def write_loss(log_directory, filename, step, epoch_len, values):
    if shared.opts.training_write_csv_every == 0:
        return

-    if (step + 1) % shared.opts.training_write_csv_every != 0:
+    if step % shared.opts.training_write_csv_every != 0:
        return
    write_csv_header = False if os.path.exists(os.path.join(log_directory, filename)) else True

@@ -194,21 +193,23 @@ def write_loss(log_directory, filename, step, epoch_len, values):
        if write_csv_header:
            csv_writer.writeheader()

-        epoch = step // epoch_len
-        epoch_step = step % epoch_len 
+        epoch = (step - 1) // epoch_len
+        epoch_step = (step - 1) % epoch_len 

        csv_writer.writerow({
-            "step": step + 1,
+            "step": step,
            "epoch": epoch,
-            "epoch_step": epoch_step + 1,
+            "epoch_step": epoch_step,
            **values,
        })

-def validate_train_inputs(model_name, learn_rate, batch_size, data_root, template_file, steps, save_model_every, create_image_every, log_directory, name="embedding"):
+def validate_train_inputs(model_name, learn_rate, batch_size, gradient_step, data_root, template_file, steps, save_model_every, create_image_every, log_directory, name="embedding"):
    assert model_name, f"{name} not selected"
    assert learn_rate, "Learning rate is empty or 0"
    assert isinstance(batch_size, int), "Batch size must be integer"
    assert batch_size > 0, "Batch size must be positive"
+    assert isinstance(gradient_step, int), "Gradient accumulation step must be integer"
+    assert gradient_step > 0, "Gradient accumulation step must be positive"
    assert data_root, "Dataset directory is empty"
    assert os.path.isdir(data_root), "Dataset directory doesn't exist"
    assert os.listdir(data_root), "Dataset directory is empty"
@@ -224,10 +225,10 @@ def validate_train_inputs(model_name, learn_rate, batch_size, data_root, templat
    if save_model_every or create_image_every:
        assert log_directory, "Log directory is empty"

-def train_embedding(embedding_name, learn_rate, batch_size, data_root, log_directory, training_width, training_height, steps, create_image_every, save_embedding_every, template_file, save_image_with_stored_embedding, preview_from_txt2img, preview_prompt, preview_negative_prompt, preview_steps, preview_sampler_index, preview_cfg_scale, preview_seed, preview_width, preview_height):
+def train_embedding(embedding_name, learn_rate, batch_size, gradient_step, data_root, log_directory, training_width, training_height, steps, shuffle_tags, tag_drop_out, latent_sampling_method, create_image_every, save_embedding_every, template_file, save_image_with_stored_embedding, preview_from_txt2img, preview_prompt, preview_negative_prompt, preview_steps, preview_sampler_index, preview_cfg_scale, preview_seed, preview_width, preview_height):
    save_embedding_every = save_embedding_every or 0
    create_image_every = create_image_every or 0
-    validate_train_inputs(embedding_name, learn_rate, batch_size, data_root, template_file, steps, save_embedding_every, create_image_every, log_directory, name="embedding")
+    validate_train_inputs(embedding_name, learn_rate, batch_size, gradient_step, data_root, template_file, steps, save_embedding_every, create_image_every, log_directory, name="embedding")

    shared.state.textinfo = "Initializing textual inversion training..."
    shared.state.job_count = steps
@@ -255,166 +256,203 @@ def train_embedding(embedding_name, learn_rate, batch_size, data_root, log_direc
    else:
        images_embeds_dir = None

-    cond_model = shared.sd_model.cond_stage_model
-
    hijack = sd_hijack.model_hijack

    embedding = hijack.embedding_db.word_embeddings[embedding_name]
    checkpoint = sd_models.select_checkpoint()

-    ititial_step = embedding.step or 0
-    if ititial_step >= steps:
+    initial_step = embedding.step or 0
+    if initial_step >= steps:
        shared.state.textinfo = f"Model has already been trained beyond specified max steps"
        return embedding, filename
+    scheduler = LearnRateScheduler(learn_rate, steps, initial_step)

-    scheduler = LearnRateScheduler(learn_rate, steps, ititial_step)
-
-    # dataset loading may take a while, so input validations and early returns should be done before this
+   # dataset loading may take a while, so input validations and early returns should be done before this
    shared.state.textinfo = f"Preparing dataset from {html.escape(data_root)}..."
-    with torch.autocast("cuda"):
-        ds = modules.textual_inversion.dataset.PersonalizedBase(data_root=data_root, width=training_width, height=training_height, repeats=shared.opts.training_image_repeats_per_epoch, placeholder_token=embedding_name, model=shared.sd_model, device=devices.device, template_file=template_file, batch_size=batch_size)
-
    old_parallel_processing_allowed = shared.parallel_processing_allowed
+    
+    pin_memory = shared.opts.pin_memory
+    
+    ds = modules.textual_inversion.dataset.PersonalizedBase(data_root=data_root, width=training_width, height=training_height, repeats=shared.opts.training_image_repeats_per_epoch, placeholder_token=embedding_name, model=shared.sd_model, cond_model=shared.sd_model.cond_stage_model, device=devices.device, template_file=template_file, batch_size=batch_size, gradient_step=gradient_step, shuffle_tags=shuffle_tags, tag_drop_out=tag_drop_out, latent_sampling_method=latent_sampling_method)
+
+    latent_sampling_method = ds.latent_sampling_method
+
+    dl = modules.textual_inversion.dataset.PersonalizedDataLoader(ds, latent_sampling_method=latent_sampling_method, batch_size=ds.batch_size, pin_memory=pin_memory)

    if unload:
        shared.parallel_processing_allowed = False
        shared.sd_model.first_stage_model.to(devices.cpu)

    embedding.vec.requires_grad = True
-    optimizer = torch.optim.AdamW([embedding.vec], lr=scheduler.learn_rate)
+    optimizer = torch.optim.AdamW([embedding.vec], lr=scheduler.learn_rate, weight_decay=0.0)
+    scaler = torch.cuda.amp.GradScaler()

-    losses = torch.zeros((32,))
+    batch_size = ds.batch_size
+    gradient_step = ds.gradient_step
+    # n steps = batch_size * gradient_step * n image processed
+    steps_per_epoch = len(ds) // batch_size // gradient_step
+    max_steps_per_epoch = len(ds) // batch_size - (len(ds) // batch_size) % gradient_step
+    loss_step = 0
+    _loss_step = 0 #internal

+    
    last_saved_file = "<none>"
    last_saved_image = "<none>"
    forced_filename = "<none>"
    embedding_yet_to_be_embedded = False
+    
+    pbar = tqdm.tqdm(total=steps - initial_step)
+    try:
+        for i in range((steps-initial_step) * gradient_step):
+            if scheduler.finished:
+                break
+            if shared.state.interrupted:
+                break
+            for j, batch in enumerate(dl):
+                # works as a drop_last=True for gradient accumulation
+                if j == max_steps_per_epoch:
+                    break
+                scheduler.apply(optimizer, embedding.step)
+                if scheduler.finished:
+                    break
+                if shared.state.interrupted:
+                    break

-    pbar = tqdm.tqdm(enumerate(ds), total=steps-ititial_step)
-    for i, entries in pbar:
-        embedding.step = i + ititial_step
+                with devices.autocast():
+                    # c = stack_conds(batch.cond).to(devices.device)
+                    # mask = torch.tensor(batch.emb_index).to(devices.device, non_blocking=pin_memory)
+                    # print(mask)
+                    # c[:, 1:1+embedding.vec.shape[0]] = embedding.vec.to(devices.device, non_blocking=pin_memory)
+                    x = batch.latent_sample.to(devices.device, non_blocking=pin_memory)
+                    c = shared.sd_model.cond_stage_model(batch.cond_text)
+                    loss = shared.sd_model(x, c)[0] / gradient_step
+                    del x
+                    
+                    _loss_step += loss.item()
+                scaler.scale(loss).backward()
+                
+                # go back until we reach gradient accumulation steps
+                if (j + 1) % gradient_step != 0:
+                    continue
+                scaler.step(optimizer)
+                scaler.update()
+                embedding.step += 1
+                pbar.update()
+                optimizer.zero_grad(set_to_none=True)
+                loss_step = _loss_step
+                _loss_step = 0

-        scheduler.apply(optimizer, embedding.step)
-        if scheduler.finished:
-            break
+                steps_done = embedding.step + 1

-        if shared.state.interrupted:
-            break
+                epoch_num = embedding.step // steps_per_epoch
+                epoch_step = embedding.step % steps_per_epoch

-        with torch.autocast("cuda"):
-            c = cond_model([entry.cond_text for entry in entries])
-            x = torch.stack([entry.latent for entry in entries]).to(devices.device)
-            loss = shared.sd_model(x, c)[0]
-            del x
+                pbar.set_description(f"[Epoch {epoch_num}: {epoch_step+1}/{steps_per_epoch}]loss: {loss_step:.7f}")
+                if embedding_dir is not None and steps_done % save_embedding_every == 0:
+                    # Before saving, change name to match current checkpoint.
+                    embedding_name_every = f'{embedding_name}-{steps_done}'
+                    last_saved_file = os.path.join(embedding_dir, f'{embedding_name_every}.pt')
+                    #if shared.opts.save_optimizer_state:
+                        #embedding.optimizer_state_dict = optimizer.state_dict()
+                    save_embedding(embedding, checkpoint, embedding_name_every, last_saved_file, remove_cached_checksum=True)
+                    embedding_yet_to_be_embedded = True

-            losses[embedding.step % losses.shape[0]] = loss.item()
+                write_loss(log_directory, "textual_inversion_loss.csv", embedding.step, steps_per_epoch, {
+                    "loss": f"{loss_step:.7f}",
+                    "learn_rate": scheduler.learn_rate
+                })

-            optimizer.zero_grad()
-            loss.backward()
-            optimizer.step()
+                if images_dir is not None and steps_done % create_image_every == 0:
+                    forced_filename = f'{embedding_name}-{steps_done}'
+                    last_saved_image = os.path.join(images_dir, forced_filename)

-        steps_done = embedding.step + 1
+                    shared.sd_model.first_stage_model.to(devices.device)

-        epoch_num = embedding.step // len(ds)
-        epoch_step = embedding.step % len(ds)
+                    p = processing.StableDiffusionProcessingTxt2Img(
+                        sd_model=shared.sd_model,
+                        do_not_save_grid=True,
+                        do_not_save_samples=True,
+                        do_not_reload_embeddings=True,
+                    )

-        pbar.set_description(f"[Epoch {epoch_num}: {epoch_step+1}/{len(ds)}]loss: {losses.mean():.7f}")
+                    if preview_from_txt2img:
+                        p.prompt = preview_prompt
+                        p.negative_prompt = preview_negative_prompt
+                        p.steps = preview_steps
+                        p.sampler_name = sd_samplers.samplers[preview_sampler_index].name
+                        p.cfg_scale = preview_cfg_scale
+                        p.seed = preview_seed
+                        p.width = preview_width
+                        p.height = preview_height
+                    else:
+                        p.prompt = batch.cond_text[0]
+                        p.steps = 20
+                        p.width = training_width
+                        p.height = training_height

-        if embedding_dir is not None and steps_done % save_embedding_every == 0:
-            # Before saving, change name to match current checkpoint.
-            embedding_name_every = f'{embedding_name}-{steps_done}'
-            last_saved_file = os.path.join(embedding_dir, f'{embedding_name_every}.pt')
-            save_embedding(embedding, checkpoint, embedding_name_every, last_saved_file, remove_cached_checksum=True)
-            embedding_yet_to_be_embedded = True
+                    preview_text = p.prompt

-        write_loss(log_directory, "textual_inversion_loss.csv", embedding.step, len(ds), {
-            "loss": f"{losses.mean():.7f}",
-            "learn_rate": scheduler.learn_rate
-        })
+                    processed = processing.process_images(p)
+                    image = processed.images[0] if len(processed.images) > 0 else None

-        if images_dir is not None and steps_done % create_image_every == 0:
-            forced_filename = f'{embedding_name}-{steps_done}'
-            last_saved_image = os.path.join(images_dir, forced_filename)
+                    if unload:
+                        shared.sd_model.first_stage_model.to(devices.cpu)

-            shared.sd_model.first_stage_model.to(devices.device)
+                    if image is not None:
+                        shared.state.current_image = image
+                        last_saved_image, last_text_info = images.save_image(image, images_dir, "", p.seed, p.prompt, shared.opts.samples_format, processed.infotexts[0], p=p, forced_filename=forced_filename, save_to_dirs=False)
+                        last_saved_image += f", prompt: {preview_text}"

-            p = processing.StableDiffusionProcessingTxt2Img(
-                sd_model=shared.sd_model,
-                do_not_save_grid=True,
-                do_not_save_samples=True,
-                do_not_reload_embeddings=True,
-            )
+                    if save_image_with_stored_embedding and os.path.exists(last_saved_file) and embedding_yet_to_be_embedded:

-            if preview_from_txt2img:
-                p.prompt = preview_prompt
-                p.negative_prompt = preview_negative_prompt
-                p.steps = preview_steps
-                p.sampler_index = preview_sampler_index
-                p.cfg_scale = preview_cfg_scale
-                p.seed = preview_seed
-                p.width = preview_width
-                p.height = preview_height
-            else:
-                p.prompt = entries[0].cond_text
-                p.steps = 20
-                p.width = training_width
-                p.height = training_height
+                        last_saved_image_chunks = os.path.join(images_embeds_dir, f'{embedding_name}-{steps_done}.png')

-            preview_text = p.prompt
+                        info = PngImagePlugin.PngInfo()
+                        data = torch.load(last_saved_file)
+                        info.add_text("sd-ti-embedding", embedding_to_b64(data))

-            processed = processing.process_images(p)
-            image = processed.images[0]
+                        title = "<{}>".format(data.get('name', '???'))

-            if unload:
-                shared.sd_model.first_stage_model.to(devices.cpu)
+                        try:
+                            vectorSize = list(data['string_to_param'].values())[0].shape[0]
+                        except Exception as e:
+                            vectorSize = '?'

-            shared.state.current_image = image
+                        checkpoint = sd_models.select_checkpoint()
+                        footer_left = checkpoint.model_name
+                        footer_mid = '[{}]'.format(checkpoint.hash)
+                        footer_right = '{}v {}s'.format(vectorSize, steps_done)

-            if save_image_with_stored_embedding and os.path.exists(last_saved_file) and embedding_yet_to_be_embedded:
+                        captioned_image = caption_image_overlay(image, title, footer_left, footer_mid, footer_right)
+                        captioned_image = insert_image_data_embed(captioned_image, data)

-                last_saved_image_chunks = os.path.join(images_embeds_dir, f'{embedding_name}-{steps_done}.png')
+                        captioned_image.save(last_saved_image_chunks, "PNG", pnginfo=info)
+                        embedding_yet_to_be_embedded = False

-                info = PngImagePlugin.PngInfo()
-                data = torch.load(last_saved_file)
-                info.add_text("sd-ti-embedding", embedding_to_b64(data))
+                    last_saved_image, last_text_info = images.save_image(image, images_dir, "", p.seed, p.prompt, shared.opts.samples_format, processed.infotexts[0], p=p, forced_filename=forced_filename, save_to_dirs=False)
+                    last_saved_image += f", prompt: {preview_text}"

-                title = "<{}>".format(data.get('name', '???'))
+                shared.state.job_no = embedding.step

-                try:
-                    vectorSize = list(data['string_to_param'].values())[0].shape[0]
-                except Exception as e:
-                    vectorSize = '?'
-
-                checkpoint = sd_models.select_checkpoint()
-                footer_left = checkpoint.model_name
-                footer_mid = '[{}]'.format(checkpoint.hash)
-                footer_right = '{}v {}s'.format(vectorSize, steps_done)
-
-                captioned_image = caption_image_overlay(image, title, footer_left, footer_mid, footer_right)
-                captioned_image = insert_image_data_embed(captioned_image, data)
-
-                captioned_image.save(last_saved_image_chunks, "PNG", pnginfo=info)
-                embedding_yet_to_be_embedded = False
-
-            last_saved_image, last_text_info = images.save_image(image, images_dir, "", p.seed, p.prompt, shared.opts.samples_format, processed.infotexts[0], p=p, forced_filename=forced_filename, save_to_dirs=False)
-            last_saved_image += f", prompt: {preview_text}"
-
-        shared.state.job_no = embedding.step
-
-        shared.state.textinfo = f"""
+                shared.state.textinfo = f"""
 <p>
-Loss: {losses.mean():.7f}<br/>
-Step: {embedding.step}<br/>
-Last prompt: {html.escape(entries[0].cond_text)}<br/>
+Loss: {loss_step:.7f}<br/>
+Step: {steps_done}<br/>
+Last prompt: {html.escape(batch.cond_text[0])}<br/>
 Last saved embedding: {html.escape(last_saved_file)}<br/>
 Last saved image: {html.escape(last_saved_image)}<br/>
 </p>
 """
-
-    filename = os.path.join(shared.cmd_opts.embeddings_dir, f'{embedding_name}.pt')
-    save_embedding(embedding, checkpoint, embedding_name, filename, remove_cached_checksum=True)
-    shared.sd_model.first_stage_model.to(devices.device)
-    shared.parallel_processing_allowed = old_parallel_processing_allowed
+        filename = os.path.join(shared.cmd_opts.embeddings_dir, f'{embedding_name}.pt')
+        save_embedding(embedding, checkpoint, embedding_name, filename, remove_cached_checksum=True)
+    except Exception:
+        print(traceback.format_exc(), file=sys.stderr)
+        pass
+    finally:
+        pbar.leave = False
+        pbar.close()
+        shared.sd_model.first_stage_model.to(devices.device)
+        shared.parallel_processing_allowed = old_parallel_processing_allowed

    return embedding, filename

--- a/modules/textual_inversion/ui.py
+++ b/modules/textual_inversion/ui.py
@@ -18,7 +18,7 @@ def create_embedding(name, initialization_text, nvpt, overwrite_old):
 def preprocess(*args):
    modules.textual_inversion.preprocess.preprocess(*args)

-    return "Preprocessing finished.", ""
+    return f"Preprocessing {'interrupted' if shared.state.interrupted else 'finished'}.", ""


 def train_embedding(*args):