diff --git a/demo.ipynb b/demo.ipynb index baaf34c..b86316a 100644 --- a/demo.ipynb +++ b/demo.ipynb @@ -1,5 +1,13 @@ { "cells": [ + { + "cell_type": "markdown", + "id": "bb2e6f60", + "metadata": {}, + "source": [ + "# BLIP: Inference Demo" + ] + }, { "cell_type": "code", "execution_count": null, @@ -53,7 +61,8 @@ "id": "f72f4406", "metadata": {}, "source": [ - "# Image Captioning" + "# Image Captioning\n", + "Perform image captioning using finetuned BLIP model" ] }, { @@ -98,7 +107,8 @@ "id": "fac320a2", "metadata": {}, "source": [ - "# VQA" + "# VQA\n", + "Perform visual question answering using finetuned BLIP model" ] }, { diff --git a/models/blip.py b/models/blip.py index 5c2887e..af28d59 100644 --- a/models/blip.py +++ b/models/blip.py @@ -20,7 +20,7 @@ from timm.models.hub import download_cached_file class BLIP_Base(nn.Module): def __init__(self, - med_config = './configs/med_config.json', + med_config = 'configs/med_config.json', image_size = 384, vit = 'base', vit_grad_ckpt = False, @@ -75,7 +75,7 @@ class BLIP_Base(nn.Module): class BLIP_Decoder(nn.Module): def __init__(self, - med_config = './configs/med_config.json', + med_config = 'configs/med_config.json', image_size = 384, vit = 'base', vit_grad_ckpt = False, diff --git a/models/blip_nlvr.py b/models/blip_nlvr.py index 8824cba..8483716 100644 --- a/models/blip_nlvr.py +++ b/models/blip_nlvr.py @@ -13,7 +13,7 @@ import numpy as np class BLIP_NLVR(nn.Module): def __init__(self, - med_config = './configs/med_config.json', + med_config = 'configs/med_config.json', image_size = 480, vit = 'base', vit_grad_ckpt = False, diff --git a/models/blip_pretrain.py b/models/blip_pretrain.py index 9d0db2e..0684202 100644 --- a/models/blip_pretrain.py +++ b/models/blip_pretrain.py @@ -18,7 +18,7 @@ from models.blip import create_vit, init_tokenizer, load_checkpoint class BLIP_Pretrain(nn.Module): def __init__(self, - med_config = './configs/bert_config.json', + med_config = 'configs/bert_config.json', image_size = 224, vit = 'base', vit_grad_ckpt = False, diff --git a/models/blip_retrieval.py b/models/blip_retrieval.py index 2294db6..bc645f5 100644 --- a/models/blip_retrieval.py +++ b/models/blip_retrieval.py @@ -9,7 +9,7 @@ from models.blip import create_vit, init_tokenizer, load_checkpoint class BLIP_Retrieval(nn.Module): def __init__(self, - med_config = './configs/med_config.json', + med_config = 'configs/med_config.json', image_size = 384, vit = 'base', vit_grad_ckpt = False, diff --git a/models/blip_vqa.py b/models/blip_vqa.py index 9f284b4..d4cb368 100644 --- a/models/blip_vqa.py +++ b/models/blip_vqa.py @@ -9,7 +9,7 @@ import numpy as np class BLIP_VQA(nn.Module): def __init__(self, - med_config = './configs/med_config.json', + med_config = 'configs/med_config.json', image_size = 480, vit = 'base', vit_grad_ckpt = False,