diff --git a/main.py b/main.py
index 8f71248..673ba15 100644
--- a/main.py
+++ b/main.py
@@ -120,7 +120,7 @@ async def load_model(request: Request, data: ModelLoadRequest):
                     break
 
                 if module == 0:
-                    loading_bar: IncrementalBar = IncrementalBar("Modules", max = modules)  
+                    loading_bar: IncrementalBar = IncrementalBar("Modules", max = modules)
                 elif module == modules:
                     loading_bar.next()
                     loading_bar.finish()
@@ -215,8 +215,6 @@ async def load_model(data: LoraLoadRequest):
 # Unload lora endpoint
 @app.get("/v1/lora/unload", dependencies=[Depends(check_admin_key), Depends(_check_model_container)])
 async def unload_loras():
-    global model_container
-
     model_container.unload(True)
 
 # Encode tokens endpoint
diff --git a/model.py b/model.py
index 582b008..26af962 100644
--- a/model.py
+++ b/model.py
@@ -19,7 +19,6 @@ from utils import coalesce, unwrap
 auto_split_reserve_bytes = 96 * 1024**2
 
 class ModelContainer:
-
     config: Optional[ExLlamaV2Config] = None
     draft_config: Optional[ExLlamaV2Config] = None
     model: Optional[ExLlamaV2] = None
@@ -32,7 +31,7 @@ class ModelContainer:
     cache_fp8: bool = False
     gpu_split_auto: bool = True
     gpu_split: Optional[list] = None
-    
+
     active_loras: List[ExLlamaV2Lora] = []
 
     def __init__(self, model_directory: pathlib.Path, quiet = False, **kwargs):
@@ -108,7 +107,6 @@ class ModelContainer:
             enable_draft = False
 
         if enable_draft:
-
             self.draft_config = ExLlamaV2Config()
             draft_model_path = pathlib.Path(unwrap(draft_args.get("draft_model_dir"), "models"))
             draft_model_path = draft_model_path / draft_model_name
@@ -124,7 +122,6 @@ class ModelContainer:
                 self.draft_config.max_input_len = kwargs["chunk_size"]
                 self.draft_config.max_attn_size = kwargs["chunk_size"] ** 2
 
-
     def calculate_rope_alpha(self, base_seq_len):
         ratio = self.config.max_seq_len / base_seq_len
 
@@ -136,7 +133,6 @@ class ModelContainer:
         model_path = pathlib.Path(self.config.model_dir)
         return model_path
 
-
     def load(self, progress_callback = None):
         """
         Load model
@@ -184,13 +180,10 @@ class ModelContainer:
         """
 
         # Load tokenizer
-
         self.tokenizer = ExLlamaV2Tokenizer(self.config)
 
         # Load draft model if a config is present
-
         if self.draft_config:
-
             self.draft_model = ExLlamaV2(self.draft_config)
             if not self.quiet:
                 print("Loading draft model: " + self.draft_config.model_dir)
@@ -200,12 +193,10 @@ class ModelContainer:
             yield from self.draft_model.load_autosplit_gen(self.draft_cache, reserve_vram = reserve, last_id_only = True, callback_gen = progress_callback)
 
             # Test VRAM allocation with a full-length forward pass
-
             input_ids = torch.zeros((1, self.config.max_input_len), dtype = torch.long)
             self.draft_model.forward(input_ids, cache = self.cache, preprocess_only = True)
 
         # Load model
-
         self.model = ExLlamaV2(self.config)
         if not self.quiet:
             print("Loading model: " + self.config.model_dir)
@@ -225,12 +216,10 @@ class ModelContainer:
             yield from self.model.load_autosplit_gen(self.cache, reserve_vram = reserve, last_id_only = True, callback_gen = progress_callback)
 
         # Test VRAM allocation with a full-length forward pass
-
         input_ids = torch.zeros((1, self.config.max_input_len), dtype = torch.long)
         self.model.forward(input_ids, cache = self.cache, preprocess_only = True)
 
         # Create generator
-
         self.generator = ExLlamaV2StreamingGenerator(self.model, self.cache, self.tokenizer, self.draft_model, self.draft_cache)
 
         print("Model successfully loaded.")
@@ -274,7 +263,6 @@ class ModelContainer:
             ids = torch.tensor([ids])
             return self.tokenizer.decode(ids, decode_special_tokens = unwrap(kwargs.get("decode_special_tokens"), True))[0]
 
-
     def generate(self, prompt: str, **kwargs):
         gen = list(self.generate_gen(prompt, **kwargs))
         reponse = "".join(map(lambda o: o[0], gen))
@@ -318,11 +306,9 @@ class ModelContainer:
         generate_window = min(unwrap(kwargs.get("generate_window"), 512), max_tokens)
 
         # Sampler settings
-
         gen_settings = ExLlamaV2Sampler.Settings()
 
         # Warn of unsupported settings if the setting is enabled
-
         if (unwrap(kwargs.get("mirostat"), False)) and not hasattr(gen_settings, "mirostat"):
             print(" !! Warning: Currently installed ExLlamaV2 does not support Mirostat sampling")
 
@@ -335,8 +321,7 @@ class ModelContainer:
         if (unwrap(kwargs.get("temperature_last"), False)) and not hasattr(gen_settings, "temperature_last"):
             print(" !! Warning: Currently installed ExLlamaV2 does not support temperature_last")
 
-        #Apply settings
-
+        # Apply settings
         gen_settings.temperature = unwrap(kwargs.get("temperature"), 1.0)
         gen_settings.temperature_last = unwrap(kwargs.get("temperature_last"), False)
         gen_settings.top_k = unwrap(kwargs.get("top_k"), 0)
@@ -363,14 +348,12 @@ class ModelContainer:
 
 
         # Ban the EOS token if specified. If not, append to stop conditions as well.
-
         if ban_eos_token:
             gen_settings.disallow_tokens(self.tokenizer, [self.tokenizer.eos_token_id])
         else:
             stop_conditions.append(self.tokenizer.eos_token_id)
 
         # Override sampler settings for temp = 0
-
         if gen_settings.temperature == 0:
             gen_settings.temperature = 1.0
             gen_settings.top_k = 1
@@ -378,11 +361,9 @@ class ModelContainer:
             gen_settings.typical = 0
 
         # Stop conditions           
-
         self.generator.set_stop_conditions(stop_conditions)
 
         # Tokenized context
-
         ids = self.tokenizer.encode(
             prompt,
             add_bos = unwrap(kwargs.get("add_bos_token"), True),
@@ -399,7 +380,6 @@ class ModelContainer:
         prompt_tokens = ids.shape[-1]
 
         # Begin
-
         generated_tokens = 0
         full_response = ""
         start_time = time.time()
@@ -410,11 +390,8 @@ class ModelContainer:
         chunk_tokens = 0
 
         while True:
-
             # Ingest prompt
-
             if chunk_tokens == 0:
-
                 ids = torch.cat((ids, save_tokens), dim = - 1)
                 save_tokens = torch.empty((1, 0), dtype = torch.bool)
                 overflow = ids.shape[-1] + generate_window - self.config.max_seq_len
@@ -424,7 +401,6 @@ class ModelContainer:
                 self.generator.begin_stream(active_ids, gen_settings, token_healing = token_healing, loras = self.active_loras)
 
             # Generate
-
             chunk, eos, tokens = self.generator.stream()
 
             if token_healing:
@@ -438,7 +414,6 @@ class ModelContainer:
             chunk_tokens -= 1
 
             # Yield output
-
             now = time.time()
             elapsed = now - last_chunk_time