diff --git a/configs/caption_coco.yaml b/configs/caption_coco.yaml index b398665..42eab70 100644 --- a/configs/caption_coco.yaml +++ b/configs/caption_coco.yaml @@ -3,7 +3,7 @@ ann_root: 'annotation' coco_gt_root: 'annotation/coco_gt' # set pretrained as a file path or an url -pretrained: 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model*_base_caption.pth' +pretrained: 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model_base_caption_capfilt_large.pth' # size of vit model; base or large vit: 'base' diff --git a/configs/nocaps.yaml b/configs/nocaps.yaml index 27bb115..9028135 100644 --- a/configs/nocaps.yaml +++ b/configs/nocaps.yaml @@ -2,7 +2,7 @@ image_root: '/export/share/datasets/vision/nocaps/' ann_root: 'annotation' # set pretrained as a file path or an url -pretrained: 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model*_base_caption.pth' +pretrained: 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model_base_caption_capfilt_large.pth' vit: 'base' batch_size: 32 diff --git a/configs/vqa.yaml b/configs/vqa.yaml index 118f396..74327e6 100644 --- a/configs/vqa.yaml +++ b/configs/vqa.yaml @@ -4,7 +4,7 @@ train_files: ['vqa_train','vqa_val','vg_qa'] ann_root: 'annotation' # set pretrained as a file path or an url -pretrained: 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model*_vqa.pth' +pretrained: 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model_base_vqa_capfilt_large.pth' # size of vit model; base or large vit: 'base' diff --git a/demo.ipynb b/demo.ipynb index 62e6c3c..3077a1a 100644 --- a/demo.ipynb +++ b/demo.ipynb @@ -99,7 +99,7 @@ "image_size = 384\n", "image = load_demo_image(image_size=image_size, device=device)\n", "\n", - "model_url = 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model*_base_caption.pth'\n", + "model_url = 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model_base_capfilt_large.pth'\n", " \n", "model = blip_decoder(pretrained=model_url, image_size=image_size, vit='base')\n", "model.eval()\n", @@ -153,7 +153,7 @@ "image_size = 480\n", "image = load_demo_image(image_size=image_size, device=device) \n", "\n", - "model_url = 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model*_vqa.pth'\n", + "model_url = 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model_base_vqa_capfilt_large.pth'\n", " \n", "model = blip_vqa(pretrained=model_url, image_size=image_size, vit='base')\n", "model.eval()\n",