mirror of
https://github.com/huchenlei/Depth-Anything.git
synced 2026-01-26 15:29:46 +00:00
Support video depth visualization
This commit is contained in:
41
README.md
41
README.md
@@ -19,7 +19,10 @@ This work presents Depth Anything, a highly practical solution for robust monocu
|
||||
|
||||
## News
|
||||
|
||||
* **2024-01-22:** Paper, project page, code, models, and demo are released.
|
||||
* **2024-01-25:** Support [video depth visualization](./run_video.py). Also, both [online demo](https://huggingface.co/spaces/LiheYoung/Depth-Anything) and [local demo](./app.py) support video input.
|
||||
* **2024-01-23:** The new ControlNet based on Depth Anything is integrated into [ControlNet WebUI](https://github.com/Mikubill/sd-webui-controlnet) and [ComfyUI's ControlNet](https://github.com/Fannovel16/comfyui_controlnet_aux).
|
||||
* **2024-01-23:** Depth Anything [ONNX](https://github.com/fabio-sim/Depth-Anything-ONNX) and [TensorRT](https://github.com/spacewalk01/depth-anything-tensorrt) versions are supported.
|
||||
* **2024-01-22:** Paper, project page, code, models, and demo ([HuggingFace](https://huggingface.co/spaces/LiheYoung/Depth-Anything), [OpenXLab](https://openxlab.org.cn/apps/detail/yyfan/depth_anything)) are released.
|
||||
|
||||
|
||||
## Features of Depth Anything
|
||||
@@ -35,7 +38,7 @@ This work presents Depth Anything, a highly practical solution for robust monocu
|
||||
|
||||
- **Better depth-conditioned ControlNet**
|
||||
|
||||
We re-train **a better depth-conditioned ControlNet** based on Depth Anything. It offers more precise synthesis than the previous MiDaS-based ControlNet. Please refer [here](./controlnet/) for details.
|
||||
We re-train **a better depth-conditioned ControlNet** based on Depth Anything. It offers more precise synthesis than the previous MiDaS-based ControlNet. Please refer [here](./controlnet/) for details. You can also use our new ControlNet based on Depth Anything in [ControlNet WebUI](https://github.com/Mikubill/sd-webui-controlnet).
|
||||
|
||||
- **Downstream high-level scene understanding**
|
||||
|
||||
@@ -68,7 +71,7 @@ We provide three models of varying scales for robust relative depth estimation:
|
||||
| Depth-Anything-Base | 97.5M | 13 | 9 | 6 |
|
||||
| Depth-Anything-Large | 335.3M | 20 | 13 | 12 |
|
||||
|
||||
Note that the V100 and A100 inference time (*without TensorRT*) is computed by excluding the pre-processing and post-processing stages, whereas the last column RTX4090 (*with TensorRT*) is computed by including these two stages. See [here]() for details.
|
||||
Note that the V100 and A100 inference time (*without TensorRT*) is computed by excluding the pre-processing and post-processing stages, whereas the last column RTX4090 (*with TensorRT*) is computed by including these two stages (please refer to [Depth-Anything-TensorRT](https://github.com/spacewalk01/depth-anything-tensorrt)).
|
||||
|
||||
You can easily load our pre-trained models by:
|
||||
```python
|
||||
@@ -115,9 +118,13 @@ For the ``img-path``, you can either 1) point it to an image directory storing a
|
||||
|
||||
For example:
|
||||
```bash
|
||||
python run.py --encoder vitl --img-path assets/examples --outdir depth_visualization
|
||||
python run.py --encoder vitl --img-path assets/examples --outdir depth_vis
|
||||
```
|
||||
|
||||
**If you want to use Depth Anything on videos:**
|
||||
```bash
|
||||
python run_video.py --encoder vitl --video-path assets/examples_video --outdir video_depth_vis
|
||||
```
|
||||
|
||||
### Gradio demo
|
||||
|
||||
@@ -144,7 +151,7 @@ import cv2
|
||||
import torch
|
||||
|
||||
encoder = 'vits' # can also be 'vitb' or 'vitl'
|
||||
depth_anything = DepthAnything.from_pretrained('LiheYoung/depth_anything_{:}14'.format(encoder))
|
||||
depth_anything = DepthAnything.from_pretrained('LiheYoung/depth_anything_{:}14'.format(encoder)).eval()
|
||||
|
||||
transform = Compose([
|
||||
Resize(
|
||||
@@ -169,6 +176,23 @@ depth = depth_anything(image)
|
||||
```
|
||||
</details>
|
||||
|
||||
### Do not want to manually define image pre-processing and download our model definition files?
|
||||
|
||||
Easily use Depth Anything through ``transformers``! Please refer to [these instructions](https://huggingface.co/LiheYoung/depth-anything-small-hf) (credit to [@niels](https://huggingface.co/nielsr)).
|
||||
|
||||
<details>
|
||||
<summary>Click here for a brief demo:</summary>
|
||||
|
||||
```python
|
||||
from transformers import pipeline
|
||||
from PIL import Image
|
||||
|
||||
image = Image.open('Your-image-path')
|
||||
pipe = pipeline(task="depth-estimation", model="LiheYoung/depth-anything-small-hf")
|
||||
depth = pipe(image)["depth"]
|
||||
```
|
||||
</details>
|
||||
|
||||
## Community Support
|
||||
|
||||
**We sincerely appreciate all the extentions built on our Depth Anything from the community. Thank you a lot!**
|
||||
@@ -177,10 +201,17 @@ Here we list the extensions we have found:
|
||||
- Depth Anything ONNX: https://github.com/fabio-sim/Depth-Anything-ONNX
|
||||
- Depth Anything TensorRT: https://github.com/spacewalk01/depth-anything-tensorrt
|
||||
- Depth Anything in ControlNet WebUI: https://github.com/Mikubill/sd-webui-controlnet
|
||||
- Depth Anything in ComfyUI's ControlNet: https://github.com/Fannovel16/comfyui_controlnet_aux
|
||||
- Depth Anything in X-AnyLabeling: https://github.com/CVHub520/X-AnyLabeling
|
||||
- Depth Anything in OpenXLab: https://openxlab.org.cn/apps/detail/yyfan/depth_anything
|
||||
|
||||
If you have your amazing projects supporting or improving (*e.g.*, speed) Depth Anything, please feel free to drop an issue. We will add them here.
|
||||
|
||||
|
||||
## Acknowledgement
|
||||
|
||||
We would like to express our deepest gratitude to [AK(@_akhaliq)](https://twitter.com/_akhaliq) and the awesome HuggingFace team ([@niels](https://huggingface.co/nielsr), [@hysts](https://huggingface.co/hysts), and [@yuvraj](https://huggingface.co/ysharma)) for helping improve the online demo and build the HF models.
|
||||
|
||||
## Citation
|
||||
|
||||
If you find this project useful, please consider citing:
|
||||
|
||||
5
app.py
5
app.py
@@ -28,6 +28,7 @@ model = DepthAnything.from_pretrained('LiheYoung/depth_anything_vitl14').to(DEVI
|
||||
|
||||
title = "# Depth Anything"
|
||||
description = """Official demo for **Depth Anything: Unleashing the Power of Large-Scale Unlabeled Data**.
|
||||
|
||||
Please refer to our [paper](https://arxiv.org/abs/2401.10891), [project page](https://depth-anything.github.io), or [github](https://github.com/LiheYoung/Depth-Anything) for more details."""
|
||||
|
||||
transform = Compose([
|
||||
@@ -56,7 +57,7 @@ with gr.Blocks(css=css) as demo:
|
||||
|
||||
with gr.Row():
|
||||
input_image = gr.Image(label="Input Image", type='numpy', elem_id='img-display-input')
|
||||
depth_image_slider = ImageSlider(label="Depth Map with Slider View", elem_id='img-display-output', position=0)
|
||||
depth_image_slider = ImageSlider(label="Depth Map with Slider View", elem_id='img-display-output', position=0.5)
|
||||
raw_file = gr.File(label="16-bit raw depth (can be considered as disparity)")
|
||||
submit = gr.Button("Submit")
|
||||
|
||||
@@ -88,7 +89,7 @@ with gr.Blocks(css=css) as demo:
|
||||
example_files.sort()
|
||||
example_files = [os.path.join('assets/examples', filename) for filename in example_files]
|
||||
examples = gr.Examples(examples=example_files, inputs=[input_image], outputs=[depth_image_slider, raw_file], fn=on_submit, cache_examples=False)
|
||||
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
demo.queue().launch()
|
||||
BIN
assets/examples_video/davis_dolphins.mp4
Normal file
BIN
assets/examples_video/davis_dolphins.mp4
Normal file
Binary file not shown.
BIN
assets/examples_video/davis_rollercoaster.mp4
Normal file
BIN
assets/examples_video/davis_rollercoaster.mp4
Normal file
Binary file not shown.
BIN
assets/examples_video/davis_seasnake.mp4
Normal file
BIN
assets/examples_video/davis_seasnake.mp4
Normal file
Binary file not shown.
14
run.py
14
run.py
@@ -28,13 +28,11 @@ if __name__ == '__main__':
|
||||
|
||||
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
|
||||
|
||||
depth_anything = DepthAnything.from_pretrained('LiheYoung/depth_anything_{}14'.format(args.encoder)).to(DEVICE)
|
||||
depth_anything = DepthAnything.from_pretrained('LiheYoung/depth_anything_{}14'.format(args.encoder)).to(DEVICE).eval()
|
||||
|
||||
total_params = sum(param.numel() for param in depth_anything.parameters())
|
||||
print('Total parameters: {:.2f}M'.format(total_params / 1e6))
|
||||
|
||||
depth_anything.eval()
|
||||
|
||||
transform = Compose([
|
||||
Resize(
|
||||
width=518,
|
||||
@@ -57,9 +55,11 @@ if __name__ == '__main__':
|
||||
filenames = [args.img_path]
|
||||
else:
|
||||
filenames = os.listdir(args.img_path)
|
||||
filenames = [os.path.join(args.img_path, filename) for filename in filenames]
|
||||
filenames = [os.path.join(args.img_path, filename) for filename in filenames if not filename.startswith('.')]
|
||||
filenames.sort()
|
||||
|
||||
os.makedirs(args.outdir, exist_ok=True)
|
||||
|
||||
for filename in tqdm(filenames):
|
||||
raw_image = cv2.imread(filename)
|
||||
image = cv2.cvtColor(raw_image, cv2.COLOR_BGR2RGB) / 255.0
|
||||
@@ -78,9 +78,6 @@ if __name__ == '__main__':
|
||||
depth = depth.cpu().numpy().astype(np.uint8)
|
||||
depth_color = cv2.applyColorMap(depth, cv2.COLORMAP_INFERNO)
|
||||
|
||||
os.makedirs(args.outdir, exist_ok=True)
|
||||
filename = os.path.basename(filename)
|
||||
|
||||
split_region = np.ones((raw_image.shape[0], margin_width, 3), dtype=np.uint8) * 255
|
||||
combined_results = cv2.hconcat([raw_image, split_region, depth_color])
|
||||
|
||||
@@ -99,4 +96,5 @@ if __name__ == '__main__':
|
||||
|
||||
final_result = cv2.vconcat([caption_space, combined_results])
|
||||
|
||||
cv2.imwrite(os.path.join(args.outdir, filename[:filename.find('.')] + '_img_depth.png'), final_result)
|
||||
filename = os.path.basename(filename)
|
||||
cv2.imwrite(os.path.join(args.outdir, filename[:filename.rfind('.')] + '_img_depth.png'), final_result)
|
||||
|
||||
Reference in New Issue
Block a user