Added some more useful error handeling and logging

This commit is contained in:
Jaret Burkett
2025-04-07 08:01:37 -06:00
parent 7c21eac1b3
commit 6c8b5ab606
2 changed files with 53 additions and 26 deletions

View File

@@ -7,6 +7,7 @@ import shutil
from collections import OrderedDict
import os
import re
import traceback
from typing import Union, List, Optional
import numpy as np
@@ -2008,7 +2009,17 @@ class BaseSDTrainProcess(BaseTrainProcess):
# flush()
### HOOK ###
with self.accelerator.accumulate(self.modules_being_trained):
loss_dict = self.hook_train_loop(batch_list)
try:
loss_dict = self.hook_train_loop(batch_list)
except Exception as e:
traceback.print_exc()
#print batch info
print("Batch Items:")
for batch in batch_list:
for item in batch.file_items:
print(f" - {item.path}")
raise e
self.timer.stop('train_loop')
if not did_first_flush:
flush()