Update depth_to_pointcloud.py

2026-03-13 05:19:48 +00:00 · 2024-02-01 01:55:06 +11:00
parent 632eb64cc5
commit be3dc4c528
1 changed files with 75 additions and 0 deletions
--- a/metric_depth/depth_to_pointcloud.py
+++ b/metric_depth/depth_to_pointcloud.py
@@ -1,4 +1,79 @@
 # Born out of Issue 36. 
 # Allows  the user to set up own test files to infer on (Create a folder my_test and add subfolder input and output in the metric_depth directory before running this script.)
 # Make sure you have the necessary libraries
+# Code by @1ssb

+import argparse
+import os
+import glob
+import torch
+import numpy as np
+from PIL import Image
+import torchvision.transforms as transforms
+import open3d as o3d
+from tqdm import tqdm
+from zoedepth.models.builder import build_model
+from zoedepth.utils.config import get_config
+
+# Global settings
+FL = 715.0873
+FY = 256 * 0.6
+FX = 256 * 0.6
+NYU_DATA = False
+FINAL_HEIGHT = 256
+FINAL_WIDTH = 256
+INPUT_DIR = './my_test/input'
+OUTPUT_DIR = './my_test/output'
+DATASET = 'nyu' # Lets not pick a fight with the model's dataloader
+
+def process_images(model):
+    if not os.path.exists(OUTPUT_DIR):
+        os.makedirs(OUTPUT_DIR)
+
+    image_paths = glob.glob(os.path.join(INPUT_DIR, '*.png')) + glob.glob(os.path.join(INPUT_DIR, '*.jpg'))
+    for image_path in tqdm(image_paths, desc="Processing Images"):
+        try:
+            color_image = Image.open(image_path).convert('RGB')
+            original_width, original_height = color_image.size
+            image_tensor = transforms.ToTensor()(color_image).unsqueeze(0).to('cuda' if torch.cuda.is_available() else 'cpu')
+
+            pred = model(image_tensor, dataset=DATASET)
+            if isinstance(pred, dict):
+                pred = pred.get('metric_depth', pred.get('out'))
+            elif isinstance(pred, (list, tuple)):
+                pred = pred[-1]
+            pred = pred.squeeze().detach().cpu().numpy()
+
+            # Resize color image and depth to final size
+            resized_color_image = color_image.resize((FINAL_WIDTH, FINAL_HEIGHT), Image.LANCZOS)
+            resized_pred = Image.fromarray(pred).resize((FINAL_WIDTH, FINAL_HEIGHT), Image.NEAREST)
+
+            focal_length_x, focal_length_y = (FX, FY) if not NYU_DATA else (FL, FL)
+            x, y = np.meshgrid(np.arange(FINAL_WIDTH), np.arange(FINAL_HEIGHT))
+            x = (x - FINAL_WIDTH / 2) / focal_length_x
+            y = (y - FINAL_HEIGHT / 2) / focal_length_y
+            z = np.array(resized_pred)
+            points = np.stack((np.multiply(x, z), np.multiply(y, z), z), axis=-1).reshape(-1, 3)
+            colors = np.array(resized_color_image).reshape(-1, 3) / 255.0
+
+            pcd = o3d.geometry.PointCloud()
+            pcd.points = o3d.utility.Vector3dVector(points)
+            pcd.colors = o3d.utility.Vector3dVector(colors)
+            o3d.io.write_point_cloud(os.path.join(OUTPUT_DIR, os.path.splitext(os.path.basename(image_path))[0] + ".ply"), pcd)
+        except Exception as e:
+            print(f"Error processing {image_path}: {e}")
+
+def main(model_name, pretrained_resource):
+    config = get_config(model_name, "eval", DATASET)
+    config.pretrained_resource = pretrained_resource
+    model = build_model(config).to('cuda' if torch.cuda.is_available() else 'cpu')
+    model.eval()
+    process_images(model)
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-m", "--model", type=str, default='zoedepth', help="Name of the model to test")
+    parser.add_argument("-p", "--pretrained_resource", type=str, default='local::./checkpoints/depth_anything_metric_depth_indoor.pt', help="Pretrained resource to use for fetching weights.")
+
+    args = parser.parse_args()
+    main(args.model, args.pretrained_resource)