mirror of
https://github.com/salesforce/BLIP.git
synced 2026-01-26 15:19:44 +00:00
update demo
This commit is contained in:
14
demo.ipynb
14
demo.ipynb
@@ -1,5 +1,13 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "bb2e6f60",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# BLIP: Inference Demo"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
@@ -53,7 +61,8 @@
|
||||
"id": "f72f4406",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Image Captioning"
|
||||
"# Image Captioning\n",
|
||||
"Perform image captioning using finetuned BLIP model"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -98,7 +107,8 @@
|
||||
"id": "fac320a2",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# VQA"
|
||||
"# VQA\n",
|
||||
"Perform visual question answering using finetuned BLIP model"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -20,7 +20,7 @@ from timm.models.hub import download_cached_file
|
||||
|
||||
class BLIP_Base(nn.Module):
|
||||
def __init__(self,
|
||||
med_config = './configs/med_config.json',
|
||||
med_config = 'configs/med_config.json',
|
||||
image_size = 384,
|
||||
vit = 'base',
|
||||
vit_grad_ckpt = False,
|
||||
@@ -75,7 +75,7 @@ class BLIP_Base(nn.Module):
|
||||
|
||||
class BLIP_Decoder(nn.Module):
|
||||
def __init__(self,
|
||||
med_config = './configs/med_config.json',
|
||||
med_config = 'configs/med_config.json',
|
||||
image_size = 384,
|
||||
vit = 'base',
|
||||
vit_grad_ckpt = False,
|
||||
|
||||
@@ -13,7 +13,7 @@ import numpy as np
|
||||
|
||||
class BLIP_NLVR(nn.Module):
|
||||
def __init__(self,
|
||||
med_config = './configs/med_config.json',
|
||||
med_config = 'configs/med_config.json',
|
||||
image_size = 480,
|
||||
vit = 'base',
|
||||
vit_grad_ckpt = False,
|
||||
|
||||
@@ -18,7 +18,7 @@ from models.blip import create_vit, init_tokenizer, load_checkpoint
|
||||
|
||||
class BLIP_Pretrain(nn.Module):
|
||||
def __init__(self,
|
||||
med_config = './configs/bert_config.json',
|
||||
med_config = 'configs/bert_config.json',
|
||||
image_size = 224,
|
||||
vit = 'base',
|
||||
vit_grad_ckpt = False,
|
||||
|
||||
@@ -9,7 +9,7 @@ from models.blip import create_vit, init_tokenizer, load_checkpoint
|
||||
|
||||
class BLIP_Retrieval(nn.Module):
|
||||
def __init__(self,
|
||||
med_config = './configs/med_config.json',
|
||||
med_config = 'configs/med_config.json',
|
||||
image_size = 384,
|
||||
vit = 'base',
|
||||
vit_grad_ckpt = False,
|
||||
|
||||
@@ -9,7 +9,7 @@ import numpy as np
|
||||
|
||||
class BLIP_VQA(nn.Module):
|
||||
def __init__(self,
|
||||
med_config = './configs/med_config.json',
|
||||
med_config = 'configs/med_config.json',
|
||||
image_size = 480,
|
||||
vit = 'base',
|
||||
vit_grad_ckpt = False,
|
||||
|
||||
Reference in New Issue
Block a user