From cc507c488cd4902d95bc7d2bd248c95e83e39588 Mon Sep 17 00:00:00 2001 From: Lihe Yang Date: Mon, 22 Jan 2024 14:20:44 +0800 Subject: [PATCH] Add local demo --- README.md | 30 +++++++++------ app.py | 96 ++++++++++++++++++++++++++++++++++++++++++++++++ requirements.txt | 2 + 3 files changed, 117 insertions(+), 11 deletions(-) create mode 100644 app.py diff --git a/README.md b/README.md index 8f46d61..6297b28 100644 --- a/README.md +++ b/README.md @@ -1,26 +1,22 @@

Depth Anything: Unleashing the Power of Large-Scale Unlabeled Data

-[**Lihe Yang**](https://liheyoung.github.io/)1 · [**Bingyi Kang**](https://scholar.google.com/citations?user=NmHgX-wAAAAJ)2+ · [**Zilong Huang**](http://speedinghzl.github.io/)2 · [**Xiaogang Xu**](https://xiaogang00.github.io/)3,4 · [**Jiashi Feng**](https://sites.google.com/site/jshfeng/)2 · [**Hengshuang Zhao**](https://hszhao.github.io/)1+ +[**Lihe Yang**](https://liheyoung.github.io/)1 · [**Bingyi Kang**](https://scholar.google.com/citations?user=NmHgX-wAAAAJ)2+ · [**Zilong Huang**](http://speedinghzl.github.io/)2 · [**Xiaogang Xu**](https://xiaogang00.github.io/)3,4, [**Jiashi Feng**](https://sites.google.com/site/jshfeng/)2 · [**Hengshuang Zhao**](https://hszhao.github.io/)1+ 1The University of Hong Kong · 2TikTok · 3Zhejiang Lab · 4Zhejiang University +corresponding authors -Paper PDF +Paper PDF Project Page - +
This work presents Depth Anything, a highly practical solution for robust monocular depth estimation by training on a combination of 1.5M labeled images and **62M+ unlabeled images**. ![teaser](assets/teaser.png) -[![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/depth-anything-unleashing-the-power-of-large/monocular-depth-estimation-on-nyu-depth-v2)](https://paperswithcode.com/sota/monocular-depth-estimation-on-nyu-depth-v2?p=depth-anything-unleashing-the-power-of-large) -[![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/depth-anything-unleashing-the-power-of-large/monocular-depth-estimation-on-kitti-eigen)](https://paperswithcode.com/sota/monocular-depth-estimation-on-kitti-eigen?p=depth-anything-unleashing-the-power-of-large) -[![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/depth-anything-unleashing-the-power-of-large/semantic-segmentation-on-cityscapes-val)](https://paperswithcode.com/sota/semantic-segmentation-on-cityscapes-val?p=depth-anything-unleashing-the-power-of-large) - ## News * **2024-01-22:** Paper, project page, code, models, and demo are released. @@ -50,7 +46,7 @@ This work presents Depth Anything, a highly practical solution for robust monocu Here we compare our Depth Anything with the previously best MiDaS v3.1 BEiTL-512 model. -Please note that the latest MiDaS is also trained on KITTI and NYUv2, while we are not. +Please note that the latest MiDaS is also trained on KITTI and NYUv2, while we do not. | Method | Params | KITTI || NYUv2 || Sintel || DDAD || ETH3D || DIODE || |-|-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:| @@ -98,12 +94,23 @@ For example: python run.py --encoder vitl --load-from checkpoints/depth_anything_vitl14.pth --img-path demo_images --outdir depth_visualization --localhub ``` + +### Gradio demo + +To use our gradio demo locally: + +```bash +python app.py +``` + +You can also try our [online demo](https://huggingface.co/spaces/LiheYoung/Depth-Anything). + ### Import Depth Anything to your project If you want to use Depth Anything in our own project, you can simply follow [``run.py``](run.py) to load our models and define data pre-processing.
-Code snippet (note the difference between our data pre-processing and that of MiDaS) +Code snippet (note the difference between our data pre-processing and that of MiDaS.) ```python from depth_anything.dpt import DPT_DINOv2 @@ -138,6 +145,7 @@ depth = depth_anything(image) ```
+ ## Citation If you find this project useful, please consider citing: @@ -146,7 +154,7 @@ If you find this project useful, please consider citing: @article{depthanything, title={Depth Anything: Unleashing the Power of Large-Scale Unlabeled Data}, author={Yang, Lihe and Kang, Bingyi and Huang, Zilong and Xu, Xiaogang and Feng, Jiashi and Zhao, Hengshuang}, - journal={arXiv:2401.10891}, - year={2024}, + journal={arXiv:}, + year={2024} } ``` \ No newline at end of file diff --git a/app.py b/app.py new file mode 100644 index 0000000..11c708f --- /dev/null +++ b/app.py @@ -0,0 +1,96 @@ +import gradio as gr +import cv2 +import numpy as np +import os +from PIL import Image +import torch +import torch.nn.functional as F +from torchvision.transforms import Compose +import tempfile +from gradio_imageslider import ImageSlider + +from depth_anything.dpt import DPT_DINOv2 +from depth_anything.util.transform import Resize, NormalizeImage, PrepareForNet + +css = """ +#img-display-container { + max-height: 100vh; + } +#img-display-input { + max-height: 80vh; + } +#img-display-output { + max-height: 80vh; + } +""" +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +model = DPT_DINOv2(encoder='vitl', features=256, out_channels=[256, 512, 1024, 1024]).to(DEVICE).eval() +model.load_state_dict(torch.load('checkpoints/depth_anything_vitl14.pth')) + +title = "# Depth Anything" +description = """Official demo for **Depth Anything: Unleashing the Power of Large-Scale Unlabeled Data**. +Please refer to our [paper](https://arxiv.org/abs/2401.10891), [project page](https://depth-anything.github.io), or [github](https://github.com/LiheYoung/Depth-Anything) for more details.""" + +transform = Compose([ + Resize( + width=518, + height=518, + resize_target=False, + keep_aspect_ratio=True, + ensure_multiple_of=14, + resize_method='lower_bound', + image_interpolation_method=cv2.INTER_CUBIC, + ), + NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), + PrepareForNet(), +]) + +@torch.no_grad() +def predict_depth(model, image): + return model(image) + + +with gr.Blocks(css=css) as demo: + gr.Markdown(title) + gr.Markdown(description) + gr.Markdown("### Depth Prediction demo") + gr.Markdown("You can slide the output to compare the depth prediction with input image") + + with gr.Row(): + input_image = gr.Image(label="Input Image", type='numpy', elem_id='img-display-input') + depth_image_slider = ImageSlider(label="Depth Map with Slider View", elem_id='img-display-output', position=0) + raw_file = gr.File(label="16-bit raw depth (can be considered as disparity)") + submit = gr.Button("Submit") + + def on_submit(image): + original_image = image.copy() + + h, w = image.shape[:2] + + image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) / 255.0 + image = transform({'image': image})['image'] + image = torch.from_numpy(image).unsqueeze(0).to(DEVICE) + + depth = predict_depth(model, image) + depth = F.interpolate(depth[None], (h, w), mode='bilinear', align_corners=False)[0, 0] + + raw_depth = Image.fromarray(depth.cpu().numpy().astype('uint16')) + tmp = tempfile.NamedTemporaryFile(suffix='.png', delete=False) + raw_depth.save(tmp.name) + + depth = (depth - depth.min()) / (depth.max() - depth.min()) * 255.0 + depth = depth.cpu().numpy().astype(np.uint8) + colored_depth = cv2.applyColorMap(depth, cv2.COLORMAP_INFERNO)[:, :, ::-1] + + return [(original_image, colored_depth), tmp.name] + + submit.click(on_submit, inputs=[input_image], outputs=[depth_image_slider, raw_file]) + + example_files = os.listdir('assets/examples') + example_files.sort() + example_files = [os.path.join('assets/examples', filename) for filename in example_files] + examples = gr.Examples(examples=example_files, inputs=[input_image], outputs=[depth_image_slider, raw_file], fn=on_submit, cache_examples=False) + + +if __name__ == '__main__': + demo.queue().launch() diff --git a/requirements.txt b/requirements.txt index 4f8641a..0d750ee 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,5 @@ +gradio_imageslider +gradio==4.14.0 torch torchvision opencv-python \ No newline at end of file