From b4ae44927b78d0093b556e3ce43cbdcff422017a Mon Sep 17 00:00:00 2001 From: Jong Wook Kim Date: Thu, 21 Apr 2022 16:45:46 -0700 Subject: [PATCH] ViT-L/14@336px (#234) --- clip/clip.py | 1 + model-card.md | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/clip/clip.py b/clip/clip.py index cf2ba38..9c045ac 100644 --- a/clip/clip.py +++ b/clip/clip.py @@ -36,6 +36,7 @@ _MODELS = { "ViT-B/32": "https://openaipublic.azureedge.net/clip/models/40d365715913c9da98579312b702a82c18be219cc2a73407c4526f58eba950af/ViT-B-32.pt", "ViT-B/16": "https://openaipublic.azureedge.net/clip/models/5806e77cd80f8b59890b7e101eabd078d9fb84e6937f9e85e4ecb61988df416f/ViT-B-16.pt", "ViT-L/14": "https://openaipublic.azureedge.net/clip/models/b8cca3fd41ae0c99ba7e8951adf17d267cdb84cd88be6f7c2e0eca1737a03836/ViT-L-14.pt", + "ViT-L/14@336px": "https://openaipublic.azureedge.net/clip/models/3035c92b350959924f9f00213499208652fc7ea050643e8b385c2dac08641f02/ViT-L-14-336px.pt", } diff --git a/model-card.md b/model-card.md index 126d845..6db1ca4 100644 --- a/model-card.md +++ b/model-card.md @@ -18,7 +18,7 @@ The base model uses a ResNet50 with several modifications as an image encoder an Initially, we’ve released one CLIP model based on the Vision Transformer architecture equivalent to ViT-B/32, along with the RN50 model, using the architecture equivalent to ResNet-50. -As part of the staged release process, we have also released the RN101 model, as well as RN50x4, a RN50 scaled up 4x according to the [EfficientNet](https://arxiv.org/abs/1905.11946) scaling rule. In July 2021, we additionally released the RN50x16 and ViT-B/16 models, and In January 2022, the RN50x64 and ViT-L/14 models were released. +As part of the staged release process, we have also released the RN101 model, as well as RN50x4, a RN50 scaled up 4x according to the [EfficientNet](https://arxiv.org/abs/1905.11946) scaling rule. In July 2021, we additionally released the RN50x16 and ViT-B/16 models, and in January 2022, the RN50x64 and ViT-L/14 models were released. Lastly, the ViT-L/14@336px model was released in April 2022. Please see the paper linked below for further details about their specification.