mirror of
https://github.com/huchenlei/Depth-Anything.git
synced 2026-04-30 20:31:13 +00:00
Initial commit
This commit is contained in:
481
metric_depth/zoedepth/data/transforms.py
Normal file
481
metric_depth/zoedepth/data/transforms.py
Normal file
@@ -0,0 +1,481 @@
|
||||
# MIT License
|
||||
|
||||
# Copyright (c) 2022 Intelligent Systems Lab Org
|
||||
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
# File author: Shariq Farooq Bhat
|
||||
|
||||
import math
|
||||
import random
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
|
||||
class RandomFliplr(object):
|
||||
"""Horizontal flip of the sample with given probability.
|
||||
"""
|
||||
|
||||
def __init__(self, probability=0.5):
|
||||
"""Init.
|
||||
|
||||
Args:
|
||||
probability (float, optional): Flip probability. Defaults to 0.5.
|
||||
"""
|
||||
self.__probability = probability
|
||||
|
||||
def __call__(self, sample):
|
||||
prob = random.random()
|
||||
|
||||
if prob < self.__probability:
|
||||
for k, v in sample.items():
|
||||
if len(v.shape) >= 2:
|
||||
sample[k] = np.fliplr(v).copy()
|
||||
|
||||
return sample
|
||||
|
||||
|
||||
def apply_min_size(sample, size, image_interpolation_method=cv2.INTER_AREA):
|
||||
"""Rezise the sample to ensure the given size. Keeps aspect ratio.
|
||||
|
||||
Args:
|
||||
sample (dict): sample
|
||||
size (tuple): image size
|
||||
|
||||
Returns:
|
||||
tuple: new size
|
||||
"""
|
||||
shape = list(sample["disparity"].shape)
|
||||
|
||||
if shape[0] >= size[0] and shape[1] >= size[1]:
|
||||
return sample
|
||||
|
||||
scale = [0, 0]
|
||||
scale[0] = size[0] / shape[0]
|
||||
scale[1] = size[1] / shape[1]
|
||||
|
||||
scale = max(scale)
|
||||
|
||||
shape[0] = math.ceil(scale * shape[0])
|
||||
shape[1] = math.ceil(scale * shape[1])
|
||||
|
||||
# resize
|
||||
sample["image"] = cv2.resize(
|
||||
sample["image"], tuple(shape[::-1]), interpolation=image_interpolation_method
|
||||
)
|
||||
|
||||
sample["disparity"] = cv2.resize(
|
||||
sample["disparity"], tuple(shape[::-1]), interpolation=cv2.INTER_NEAREST
|
||||
)
|
||||
sample["mask"] = cv2.resize(
|
||||
sample["mask"].astype(np.float32),
|
||||
tuple(shape[::-1]),
|
||||
interpolation=cv2.INTER_NEAREST,
|
||||
)
|
||||
sample["mask"] = sample["mask"].astype(bool)
|
||||
|
||||
return tuple(shape)
|
||||
|
||||
|
||||
class RandomCrop(object):
|
||||
"""Get a random crop of the sample with the given size (width, height).
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
width,
|
||||
height,
|
||||
resize_if_needed=False,
|
||||
image_interpolation_method=cv2.INTER_AREA,
|
||||
):
|
||||
"""Init.
|
||||
|
||||
Args:
|
||||
width (int): output width
|
||||
height (int): output height
|
||||
resize_if_needed (bool, optional): If True, sample might be upsampled to ensure
|
||||
that a crop of size (width, height) is possbile. Defaults to False.
|
||||
"""
|
||||
self.__size = (height, width)
|
||||
self.__resize_if_needed = resize_if_needed
|
||||
self.__image_interpolation_method = image_interpolation_method
|
||||
|
||||
def __call__(self, sample):
|
||||
|
||||
shape = sample["disparity"].shape
|
||||
|
||||
if self.__size[0] > shape[0] or self.__size[1] > shape[1]:
|
||||
if self.__resize_if_needed:
|
||||
shape = apply_min_size(
|
||||
sample, self.__size, self.__image_interpolation_method
|
||||
)
|
||||
else:
|
||||
raise Exception(
|
||||
"Output size {} bigger than input size {}.".format(
|
||||
self.__size, shape
|
||||
)
|
||||
)
|
||||
|
||||
offset = (
|
||||
np.random.randint(shape[0] - self.__size[0] + 1),
|
||||
np.random.randint(shape[1] - self.__size[1] + 1),
|
||||
)
|
||||
|
||||
for k, v in sample.items():
|
||||
if k == "code" or k == "basis":
|
||||
continue
|
||||
|
||||
if len(sample[k].shape) >= 2:
|
||||
sample[k] = v[
|
||||
offset[0]: offset[0] + self.__size[0],
|
||||
offset[1]: offset[1] + self.__size[1],
|
||||
]
|
||||
|
||||
return sample
|
||||
|
||||
|
||||
class Resize(object):
|
||||
"""Resize sample to given size (width, height).
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
width,
|
||||
height,
|
||||
resize_target=True,
|
||||
keep_aspect_ratio=False,
|
||||
ensure_multiple_of=1,
|
||||
resize_method="lower_bound",
|
||||
image_interpolation_method=cv2.INTER_AREA,
|
||||
letter_box=False,
|
||||
):
|
||||
"""Init.
|
||||
|
||||
Args:
|
||||
width (int): desired output width
|
||||
height (int): desired output height
|
||||
resize_target (bool, optional):
|
||||
True: Resize the full sample (image, mask, target).
|
||||
False: Resize image only.
|
||||
Defaults to True.
|
||||
keep_aspect_ratio (bool, optional):
|
||||
True: Keep the aspect ratio of the input sample.
|
||||
Output sample might not have the given width and height, and
|
||||
resize behaviour depends on the parameter 'resize_method'.
|
||||
Defaults to False.
|
||||
ensure_multiple_of (int, optional):
|
||||
Output width and height is constrained to be multiple of this parameter.
|
||||
Defaults to 1.
|
||||
resize_method (str, optional):
|
||||
"lower_bound": Output will be at least as large as the given size.
|
||||
"upper_bound": Output will be at max as large as the given size. (Output size might be smaller than given size.)
|
||||
"minimal": Scale as least as possible. (Output size might be smaller than given size.)
|
||||
Defaults to "lower_bound".
|
||||
"""
|
||||
self.__width = width
|
||||
self.__height = height
|
||||
|
||||
self.__resize_target = resize_target
|
||||
self.__keep_aspect_ratio = keep_aspect_ratio
|
||||
self.__multiple_of = ensure_multiple_of
|
||||
self.__resize_method = resize_method
|
||||
self.__image_interpolation_method = image_interpolation_method
|
||||
self.__letter_box = letter_box
|
||||
|
||||
def constrain_to_multiple_of(self, x, min_val=0, max_val=None):
|
||||
y = (np.round(x / self.__multiple_of) * self.__multiple_of).astype(int)
|
||||
|
||||
if max_val is not None and y > max_val:
|
||||
y = (np.floor(x / self.__multiple_of)
|
||||
* self.__multiple_of).astype(int)
|
||||
|
||||
if y < min_val:
|
||||
y = (np.ceil(x / self.__multiple_of)
|
||||
* self.__multiple_of).astype(int)
|
||||
|
||||
return y
|
||||
|
||||
def get_size(self, width, height):
|
||||
# determine new height and width
|
||||
scale_height = self.__height / height
|
||||
scale_width = self.__width / width
|
||||
|
||||
if self.__keep_aspect_ratio:
|
||||
if self.__resize_method == "lower_bound":
|
||||
# scale such that output size is lower bound
|
||||
if scale_width > scale_height:
|
||||
# fit width
|
||||
scale_height = scale_width
|
||||
else:
|
||||
# fit height
|
||||
scale_width = scale_height
|
||||
elif self.__resize_method == "upper_bound":
|
||||
# scale such that output size is upper bound
|
||||
if scale_width < scale_height:
|
||||
# fit width
|
||||
scale_height = scale_width
|
||||
else:
|
||||
# fit height
|
||||
scale_width = scale_height
|
||||
elif self.__resize_method == "minimal":
|
||||
# scale as least as possbile
|
||||
if abs(1 - scale_width) < abs(1 - scale_height):
|
||||
# fit width
|
||||
scale_height = scale_width
|
||||
else:
|
||||
# fit height
|
||||
scale_width = scale_height
|
||||
else:
|
||||
raise ValueError(
|
||||
f"resize_method {self.__resize_method} not implemented"
|
||||
)
|
||||
|
||||
if self.__resize_method == "lower_bound":
|
||||
new_height = self.constrain_to_multiple_of(
|
||||
scale_height * height, min_val=self.__height
|
||||
)
|
||||
new_width = self.constrain_to_multiple_of(
|
||||
scale_width * width, min_val=self.__width
|
||||
)
|
||||
elif self.__resize_method == "upper_bound":
|
||||
new_height = self.constrain_to_multiple_of(
|
||||
scale_height * height, max_val=self.__height
|
||||
)
|
||||
new_width = self.constrain_to_multiple_of(
|
||||
scale_width * width, max_val=self.__width
|
||||
)
|
||||
elif self.__resize_method == "minimal":
|
||||
new_height = self.constrain_to_multiple_of(scale_height * height)
|
||||
new_width = self.constrain_to_multiple_of(scale_width * width)
|
||||
else:
|
||||
raise ValueError(
|
||||
f"resize_method {self.__resize_method} not implemented")
|
||||
|
||||
return (new_width, new_height)
|
||||
|
||||
def make_letter_box(self, sample):
|
||||
top = bottom = (self.__height - sample.shape[0]) // 2
|
||||
left = right = (self.__width - sample.shape[1]) // 2
|
||||
sample = cv2.copyMakeBorder(
|
||||
sample, top, bottom, left, right, cv2.BORDER_CONSTANT, None, 0)
|
||||
return sample
|
||||
|
||||
def __call__(self, sample):
|
||||
width, height = self.get_size(
|
||||
sample["image"].shape[1], sample["image"].shape[0]
|
||||
)
|
||||
|
||||
# resize sample
|
||||
sample["image"] = cv2.resize(
|
||||
sample["image"],
|
||||
(width, height),
|
||||
interpolation=self.__image_interpolation_method,
|
||||
)
|
||||
|
||||
if self.__letter_box:
|
||||
sample["image"] = self.make_letter_box(sample["image"])
|
||||
|
||||
if self.__resize_target:
|
||||
if "disparity" in sample:
|
||||
sample["disparity"] = cv2.resize(
|
||||
sample["disparity"],
|
||||
(width, height),
|
||||
interpolation=cv2.INTER_NEAREST,
|
||||
)
|
||||
|
||||
if self.__letter_box:
|
||||
sample["disparity"] = self.make_letter_box(
|
||||
sample["disparity"])
|
||||
|
||||
if "depth" in sample:
|
||||
sample["depth"] = cv2.resize(
|
||||
sample["depth"], (width,
|
||||
height), interpolation=cv2.INTER_NEAREST
|
||||
)
|
||||
|
||||
if self.__letter_box:
|
||||
sample["depth"] = self.make_letter_box(sample["depth"])
|
||||
|
||||
sample["mask"] = cv2.resize(
|
||||
sample["mask"].astype(np.float32),
|
||||
(width, height),
|
||||
interpolation=cv2.INTER_NEAREST,
|
||||
)
|
||||
|
||||
if self.__letter_box:
|
||||
sample["mask"] = self.make_letter_box(sample["mask"])
|
||||
|
||||
sample["mask"] = sample["mask"].astype(bool)
|
||||
|
||||
return sample
|
||||
|
||||
|
||||
class ResizeFixed(object):
|
||||
def __init__(self, size):
|
||||
self.__size = size
|
||||
|
||||
def __call__(self, sample):
|
||||
sample["image"] = cv2.resize(
|
||||
sample["image"], self.__size[::-1], interpolation=cv2.INTER_LINEAR
|
||||
)
|
||||
|
||||
sample["disparity"] = cv2.resize(
|
||||
sample["disparity"], self.__size[::-
|
||||
1], interpolation=cv2.INTER_NEAREST
|
||||
)
|
||||
|
||||
sample["mask"] = cv2.resize(
|
||||
sample["mask"].astype(np.float32),
|
||||
self.__size[::-1],
|
||||
interpolation=cv2.INTER_NEAREST,
|
||||
)
|
||||
sample["mask"] = sample["mask"].astype(bool)
|
||||
|
||||
return sample
|
||||
|
||||
|
||||
class Rescale(object):
|
||||
"""Rescale target values to the interval [0, max_val].
|
||||
If input is constant, values are set to max_val / 2.
|
||||
"""
|
||||
|
||||
def __init__(self, max_val=1.0, use_mask=True):
|
||||
"""Init.
|
||||
|
||||
Args:
|
||||
max_val (float, optional): Max output value. Defaults to 1.0.
|
||||
use_mask (bool, optional): Only operate on valid pixels (mask == True). Defaults to True.
|
||||
"""
|
||||
self.__max_val = max_val
|
||||
self.__use_mask = use_mask
|
||||
|
||||
def __call__(self, sample):
|
||||
disp = sample["disparity"]
|
||||
|
||||
if self.__use_mask:
|
||||
mask = sample["mask"]
|
||||
else:
|
||||
mask = np.ones_like(disp, dtype=np.bool)
|
||||
|
||||
if np.sum(mask) == 0:
|
||||
return sample
|
||||
|
||||
min_val = np.min(disp[mask])
|
||||
max_val = np.max(disp[mask])
|
||||
|
||||
if max_val > min_val:
|
||||
sample["disparity"][mask] = (
|
||||
(disp[mask] - min_val) / (max_val - min_val) * self.__max_val
|
||||
)
|
||||
else:
|
||||
sample["disparity"][mask] = np.ones_like(
|
||||
disp[mask]) * self.__max_val / 2.0
|
||||
|
||||
return sample
|
||||
|
||||
|
||||
# mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
|
||||
class NormalizeImage(object):
|
||||
"""Normlize image by given mean and std.
|
||||
"""
|
||||
|
||||
def __init__(self, mean, std):
|
||||
self.__mean = mean
|
||||
self.__std = std
|
||||
|
||||
def __call__(self, sample):
|
||||
sample["image"] = (sample["image"] - self.__mean) / self.__std
|
||||
|
||||
return sample
|
||||
|
||||
|
||||
class DepthToDisparity(object):
|
||||
"""Convert depth to disparity. Removes depth from sample.
|
||||
"""
|
||||
|
||||
def __init__(self, eps=1e-4):
|
||||
self.__eps = eps
|
||||
|
||||
def __call__(self, sample):
|
||||
assert "depth" in sample
|
||||
|
||||
sample["mask"][sample["depth"] < self.__eps] = False
|
||||
|
||||
sample["disparity"] = np.zeros_like(sample["depth"])
|
||||
sample["disparity"][sample["depth"] >= self.__eps] = (
|
||||
1.0 / sample["depth"][sample["depth"] >= self.__eps]
|
||||
)
|
||||
|
||||
del sample["depth"]
|
||||
|
||||
return sample
|
||||
|
||||
|
||||
class DisparityToDepth(object):
|
||||
"""Convert disparity to depth. Removes disparity from sample.
|
||||
"""
|
||||
|
||||
def __init__(self, eps=1e-4):
|
||||
self.__eps = eps
|
||||
|
||||
def __call__(self, sample):
|
||||
assert "disparity" in sample
|
||||
|
||||
disp = np.abs(sample["disparity"])
|
||||
sample["mask"][disp < self.__eps] = False
|
||||
|
||||
# print(sample["disparity"])
|
||||
# print(sample["mask"].sum())
|
||||
# exit()
|
||||
|
||||
sample["depth"] = np.zeros_like(disp)
|
||||
sample["depth"][disp >= self.__eps] = (
|
||||
1.0 / disp[disp >= self.__eps]
|
||||
)
|
||||
|
||||
del sample["disparity"]
|
||||
|
||||
return sample
|
||||
|
||||
|
||||
class PrepareForNet(object):
|
||||
"""Prepare sample for usage as network input.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def __call__(self, sample):
|
||||
image = np.transpose(sample["image"], (2, 0, 1))
|
||||
sample["image"] = np.ascontiguousarray(image).astype(np.float32)
|
||||
|
||||
if "mask" in sample:
|
||||
sample["mask"] = sample["mask"].astype(np.float32)
|
||||
sample["mask"] = np.ascontiguousarray(sample["mask"])
|
||||
|
||||
if "disparity" in sample:
|
||||
disparity = sample["disparity"].astype(np.float32)
|
||||
sample["disparity"] = np.ascontiguousarray(disparity)
|
||||
|
||||
if "depth" in sample:
|
||||
depth = sample["depth"].astype(np.float32)
|
||||
sample["depth"] = np.ascontiguousarray(depth)
|
||||
|
||||
return sample
|
||||
Reference in New Issue
Block a user