From 9e6bbd9b8931bbe869a8e28e7005b0e13c2efff0 Mon Sep 17 00:00:00 2001 From: pharmapsychotic Date: Mon, 27 Mar 2023 22:20:44 -0500 Subject: [PATCH] Switch to clip-interrogator 0.5.4 to be compatible with transformers==4.25.1 --- install.py | 2 +- scripts/clip_interrogator_ext.py | 51 ++++++++++++++------------------ 2 files changed, 23 insertions(+), 30 deletions(-) diff --git a/install.py b/install.py index 2c8d851..a88362a 100644 --- a/install.py +++ b/install.py @@ -1,6 +1,6 @@ import launch -CI_VERSION = "0.6.0" +CI_VERSION = "0.5.4" needs_install = False try: diff --git a/scripts/clip_interrogator_ext.py b/scripts/clip_interrogator_ext.py index 5b367d9..fc68a61 100644 --- a/scripts/clip_interrogator_ext.py +++ b/scripts/clip_interrogator_ext.py @@ -7,11 +7,11 @@ import torch from PIL import Image import clip_interrogator -from clip_interrogator import Config, Interrogator, list_caption_models, list_clip_models +from clip_interrogator import Config, Interrogator from modules import devices, lowvram, script_callbacks, shared -__version__ = '0.1.3' +__version__ = '0.1.4' ci = None low_vram = False @@ -49,23 +49,21 @@ class BatchWriter: self.file.close() -def load(clip_model_name, caption_model_name): +def load(clip_model_name): global ci if ci is None: print(f"Loading CLIP Interrogator {clip_interrogator.__version__}...") + config = Config( device=devices.get_optimal_device(), cache_path = 'models/clip-interrogator', - clip_model_name=clip_model_name + clip_model_name=clip_model_name, + blip_model=shared.interrogator.load_blip_model().float() ) - if caption_model_name: - config.caption_model_name = caption_model_name if low_vram: config.apply_low_vram_defaults() ci = Interrogator(config) - if caption_model_name and caption_model_name != ci.config.caption_model_name: - ci.config.caption_model_name = caption_model_name - ci.load_caption_model() + if clip_model_name != ci.config.clip_model_name: ci.config.clip_model_name = clip_model_name ci.load_clip_model() @@ -74,14 +72,14 @@ def unload(): global ci if ci is not None: print("Offloading CLIP Interrogator...") - ci.caption_model = ci.caption_model.to(devices.cpu) + ci.blip_model = ci.blip_model.to(devices.cpu) ci.clip_model = ci.clip_model.to(devices.cpu) - ci.caption_offloaded = True + ci.blip_offloaded = True ci.clip_offloaded = True devices.torch_gc() def image_analysis(image, clip_model_name): - load(clip_model_name, None) + load(clip_model_name) image = image.convert('RGB') image_features = ci.image_to_features(image) @@ -115,7 +113,7 @@ def interrogate(image, mode, caption=None): raise Exception(f"Unknown mode {mode}") return prompt -def image_to_prompt(image, mode, clip_model_name, caption_model_name): +def image_to_prompt(image, mode, clip_model_name): shared.state.begin() shared.state.job = 'interrogate' @@ -124,7 +122,7 @@ def image_to_prompt(image, mode, clip_model_name, caption_model_name): lowvram.send_everything_to_cpu() devices.torch_gc() - load(clip_model_name, caption_model_name) + load(clip_model_name) image = image.convert('RGB') prompt = interrogate(image, mode) except torch.cuda.OutOfMemoryError as e: @@ -146,11 +144,6 @@ def about_tab(): "CLIP models:\n" "* For best prompts with Stable Diffusion 1.* choose the **ViT-L-14/openai** model.\n" "* For best prompts with Stable Diffusion 2.* choose the **ViT-H-14/laion2b_s32b_b79k** model.\n" - "\nCaption models:\n" - "* blip-large is recommended. use blip-base if you have less than 8GB of VRAM.\n" - "* blip-base: 990MB, blip-large: 1.9GB\n" - "* git-large-coco: 1.58GB\n" - "* blip2-2.7b: 15.5GB, blip2-flan-t5-xl: 15.77GB\n" "\nOther:\n" "* When you are done click the **Unload** button to free up memory." ) @@ -166,11 +159,14 @@ def about_tab(): vram_info += "
Using low VRAM configuration" gr.Markdown(vram_info) +def get_models(): + return ['/'.join(x) for x in open_clip.list_pretrained()] + def analyze_tab(): with gr.Column(): with gr.Row(): image = gr.Image(type='pil', label="Image") - model = gr.Dropdown(list_clip_models(), value='ViT-L-14/openai', label='CLIP Model') + model = gr.Dropdown(get_models(), value='ViT-L-14/openai', label='CLIP Model') with gr.Row(): medium = gr.Label(label="Medium", num_top_classes=5) artist = gr.Label(label="Artist", num_top_classes=5) @@ -181,7 +177,7 @@ def analyze_tab(): button.click(image_analysis, inputs=[image, model], outputs=[medium, artist, movement, trending, flavor]) def batch_tab(): - def batch_process(folder, clip_model, caption_model, mode, output_mode): + def batch_process(folder, clip_model, mode, output_mode): if not os.path.exists(folder): return f"Folder {folder} does not exist" if not os.path.isdir(folder): @@ -199,7 +195,7 @@ def batch_tab(): lowvram.send_everything_to_cpu() devices.torch_gc() - load(clip_model, caption_model) + load(clip_model) shared.total_tqdm.updateTotal(len(files)) ci.config.quiet = True @@ -240,8 +236,7 @@ def batch_tab(): with gr.Row(): folder = gr.Text(label="Images folder", value="", interactive=True) with gr.Row(): - clip_model = gr.Dropdown(list_clip_models(), value='ViT-L-14/openai', label='CLIP Model') - caption_model = gr.Dropdown(list_caption_models(), value='blip-base' if low_vram else 'blip-large', label='Caption Model') + clip_model = gr.Dropdown(get_models(), value='ViT-L-14/openai', label='CLIP Model') mode = gr.Radio(['caption', 'best', 'fast', 'classic', 'negative'], label='Prompt Mode', value='fast') output_mode = gr.Dropdown(BATCH_OUTPUT_MODES, value=BATCH_OUTPUT_MODES[0], label='Output Mode') with gr.Row(): @@ -249,7 +244,7 @@ def batch_tab(): interrupt = gr.Button('Interrupt', visible=True) interrupt.click(fn=lambda: shared.state.interrupt(), inputs=[], outputs=[]) - button.click(batch_process, inputs=[folder, clip_model, caption_model, mode, output_mode], outputs=[]) + button.click(batch_process, inputs=[folder, clip_model, mode, output_mode], outputs=[]) def prompt_tab(): with gr.Column(): @@ -257,14 +252,12 @@ def prompt_tab(): image = gr.Image(type='pil', label="Image") with gr.Column(): mode = gr.Radio(['best', 'fast', 'classic', 'negative'], label='Mode', value='best') - clip_model = gr.Dropdown(list_clip_models(), value='ViT-L-14/openai', label='CLIP Model') - caption_model = gr.Dropdown(list_caption_models(), value='blip-base' if low_vram else 'blip-large', label='Caption Model') - list_caption_models + clip_model = gr.Dropdown(get_models(), value='ViT-L-14/openai', label='CLIP Model') prompt = gr.Textbox(label="Prompt", lines=3) with gr.Row(): button = gr.Button("Generate", variant='primary') unload_button = gr.Button("Unload") - button.click(image_to_prompt, inputs=[image, mode, clip_model, caption_model], outputs=prompt) + button.click(image_to_prompt, inputs=[image, mode, clip_model], outputs=prompt) unload_button.click(unload)