Add Replicate demo and API #24

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open

chenxwh wants to merge 2 commits into LiheYoung:main from chenxwh:main

README.md

-Original file line number
+Diff line change
@@ Expand Up / @@ -10,6 +10,7 @@ @@
     <a href="https://arxiv.org/abs/2401.10891"><img src='https://img.shields.io/badge/arXiv-Depth Anything-red' alt='Paper PDF'></a>
     <a href='https://depth-anything.github.io'><img src='https://img.shields.io/badge/Project_Page-Depth Anything-green' alt='Project Page'></a>
     <a href='https://huggingface.co/spaces/LiheYoung/Depth-Anything'><img src='https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue'></a>
+    [![Replicate](https://replicate.com/cjwbw/depth-anything/badge)](https://replicate.com/cjwbw/depth-anything)
     </div>
     This work presents Depth Anything, a highly practical solution for robust monocular depth estimation by training on a combination of 1.5M labeled images and **62M+ unlabeled images**.
@@ Expand Down @@

cog.yaml

-Original file line number
+Diff line change
@@ -0,0 +1,16 @@
+    # Configuration for Cog ⚙️
+    # Reference: https://github.com/replicate/cog/blob/main/docs/yaml.md
+    build:
+      gpu: true
+      system_packages:
+        - "libgl1-mesa-glx"
+        - "libglib2.0-0"
+      python_version: "3.11"
+      python_packages:
+        - "opencv-python==4.9.0.80"
+        - "torch==2.0.1"
+        - "torchvision==0.15.2"
+        - "tqdm==4.66.1"
+        - "huggingface_hub==0.20.3"
+    predict: "predict.py:Predictor"

predict.py

-Original file line number
+Diff line change
@@ -0,0 +1,85 @@
+    # Prediction interface for Cog ⚙️
+    # https://github.com/replicate/cog/blob/main/docs/python.md
+    import cv2
+    import numpy as np
+    import torch
+    import torch.nn.functional as F
+    from torchvision.transforms import Compose
+    from cog import BasePredictor, Input, Path
+    from depth_anything.dpt import DepthAnything
+    from depth_anything.util.transform import Resize, NormalizeImage, PrepareForNet
+    class Predictor(BasePredictor):
+        def setup(self) -> None:
+            """Load the model into memory to make running multiple predictions efficient"""
+            encoder_options = ["vits", "vitb", "vitl"]
+            self.device = "cuda:0"
+            model_cache = "model_cache"
+            self.models = {
+                k: DepthAnything.from_pretrained(
+                    f"LiheYoung/depth_anything_{k}14", cache_dir=model_cache
+                ).to(self.device)
+                for k in encoder_options
+            }
+            self.total_params = {
+                k: sum(param.numel() for param in self.models[k].parameters())
+                for k in encoder_options
+            }
+            self.transform = Compose(
+                [
+                    Resize(
+                        width=518,
+                        height=518,
+                        resize_target=False,
+                        keep_aspect_ratio=True,
+                        ensure_multiple_of=14,
+                        resize_method="lower_bound",
+                        image_interpolation_method=cv2.INTER_CUBIC,
+                    ),
+                    NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
+                    PrepareForNet(),
+                ]
+            )
+        def predict(
+            self,
+            image: Path = Input(description="Input image"),
+            encoder: str = Input(
+                description="Choose an encoder.",
+                default="vitl",
+                choices=["vits", "vitb", "vitl"],
+            ),
+        ) -> Path:
+            """Run a single prediction on the model"""
+            depth_anything = self.models[encoder]
+            total_params = self.total_params[encoder]
+            print("Total parameters: {:.2f}M".format(total_params / 1e6))
+            depth_anything.eval()
+            raw_image = cv2.imread(str(image))
+            image = cv2.cvtColor(raw_image, cv2.COLOR_BGR2RGB) / 255.0
+            h, w = image.shape[:2]
+            image = self.transform({"image": image})["image"]
+            image = torch.from_numpy(image).unsqueeze(0).to(self.device)
+            with torch.no_grad():
+                depth = depth_anything(image)
+            depth = F.interpolate(
+                depth[None], (h, w), mode="bilinear", align_corners=False
+            )[0, 0]
+            depth = (depth - depth.min()) / (depth.max() - depth.min()) * 255.0
+            depth = depth.cpu().numpy().astype(np.uint8)
+            depth_color = cv2.applyColorMap(depth, cv2.COLORMAP_INFERNO)
+            output_path = "/tmp/out.png"
+            cv2.imwrite(output_path, depth_color)
+            return Path(output_path)

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Add Replicate demo and API #24

Uh oh!

Diff view

Diff view

There are no files selected for viewing

Add Replicate demo and API #24

Are you sure you want to change the base?

Uh oh!

Add Replicate demo and API #24

Uh oh!

Uh oh!

Diff view

Diff view

There are no files selected for viewing