Skip to content

Commit 49ae03e

Browse files
authored
Merge pull request #40 from jaelgu/main
Add showcase in reverse_video_search notebook
2 parents 7966921 + 47b5600 commit 49ae03e

File tree

1 file changed

+97
-20
lines changed

1 file changed

+97
-20
lines changed

video/reverse_video_search/reverse_video_search.ipynb

Lines changed: 97 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,10 @@
3737
"| -- |\n",
3838
"| pymilvus |\n",
3939
"| towhee |\n",
40+
"| towhee.models |\n",
4041
"| torch |\n",
41-
"| torchvision |"
42+
"| torchvision |\n",
43+
"| gradio |"
4244
]
4345
},
4446
{
@@ -149,7 +151,6 @@
149151
"metadata": {},
150152
"source": [
151153
"For later steps to easier get videos & measure results, we build some helpful functions in advance:\n",
152-
"- **read_videos:** get video paths by video ids;\n",
153154
"- **ground_truth:** get ground-truth video ids for the query video by its path"
154155
]
155156
},
@@ -168,13 +169,6 @@
168169
"# print(id_video)\n",
169170
"# print(label_ids)\n",
170171
"\n",
171-
"def read_videos(results):\n",
172-
" videos = []\n",
173-
" for re in results:\n",
174-
" path = id_video[re.id]\n",
175-
" videos.append(path)\n",
176-
" return videos\n",
177-
"\n",
178172
"def ground_truth(path):\n",
179173
" label = path.split('/')[-2]\n",
180174
" return label_ids[label]"
@@ -266,7 +260,7 @@
266260
"name": "stdout",
267261
"output_type": "stream",
268262
"text": [
269-
"Total insert time: 93.22s\n",
263+
"Total insert time: 95.91s\n",
270264
"Total number of inserted data is 200.\n"
271265
]
272266
}
@@ -324,7 +318,7 @@
324318
"\n",
325319
"Now all embeddings of candidate videos have been inserted into Milvus collection, we can query embeddings across the collection for nearest neighbors.\n",
326320
"\n",
327-
"To get query embeddings, we should go through same pre-insert steps for each input video. Because Milvus returns video ids and vector distances, we use the `read_videos` function to get corresponding video paths based on ids."
321+
"To get query embeddings, we should go through same pre-insert steps for each input video. Because Milvus returns video ids and vector distances, we use the `id_video` dictionary to get corresponding video paths based on ids."
328322
]
329323
},
330324
{
@@ -352,7 +346,7 @@
352346
" .video_classification['frames', 'vec'].pytorchvideo(\n",
353347
" model_name='x3d_m', predict=False, skip_preprocess=True)\n",
354348
" .milvus_search['vec', 'result'](collection=collection, limit=10)\n",
355-
" .runas_op['result', 'res_path'](func=read_videos)\n",
349+
" .runas_op['result', 'res_path'](func=lambda res: [id_video[x.id] for x in res])\n",
356350
" .select['res_path']().to_list()[0].res_path\n",
357351
")"
358352
]
@@ -497,7 +491,7 @@
497491
"name": "stdout",
498492
"output_type": "stream",
499493
"text": [
500-
"Total search time: 10.94s\n"
494+
"Total search time: 10.65s\n"
501495
]
502496
}
503497
],
@@ -555,7 +549,7 @@
555549
"name": "stdout",
556550
"output_type": "stream",
557551
"text": [
558-
"Total insert time: 92.52s\n",
552+
"Total insert time: 92.18s\n",
559553
"Total number of inserted data is 200.\n"
560554
]
561555
},
@@ -613,7 +607,7 @@
613607
"name": "stdout",
614608
"output_type": "stream",
615609
"text": [
616-
"Total search time: 10.78s\n"
610+
"Total search time: 11.75s\n"
617611
]
618612
}
619613
],
@@ -698,7 +692,7 @@
698692
"name": "stdout",
699693
"output_type": "stream",
700694
"text": [
701-
"Total insert time: 151.25s\n",
695+
"Total insert time: 141.29s\n",
702696
"Total number of inserted data is 200.\n"
703697
]
704698
},
@@ -756,7 +750,7 @@
756750
"name": "stdout",
757751
"output_type": "stream",
758752
"text": [
759-
"Total search time: 24.10s\n"
753+
"Total search time: 17.90s\n"
760754
]
761755
}
762756
],
@@ -814,13 +808,96 @@
814808
"Switching to MVIT model increases the mHR to 0.79 and mAP to 0.86, which are much better than X3D model. However, both insert and search time have increased. It's time for you to make trade-off between latency and accuracy. You're always encouraged to play around with this tutorial."
815809
]
816810
},
811+
{
812+
"cell_type": "markdown",
813+
"id": "607783a1",
814+
"metadata": {},
815+
"source": [
816+
"## Release a Showcase\n",
817+
"\n",
818+
"We've learnt how to build a reverse video search engine. Now it's time to add some interface and release a showcase. Towhee provides `towhee.api()` to wrap the data processing pipeline as a function with `.as_function()`. So we can build a quick demo with this `milvus_search_function` with [Gradio](https://gradio.app/)."
819+
]
820+
},
817821
{
818822
"cell_type": "code",
819-
"execution_count": null,
823+
"execution_count": 11,
820824
"id": "a78c9ba6",
821825
"metadata": {},
822-
"outputs": [],
823-
"source": []
826+
"outputs": [
827+
{
828+
"name": "stderr",
829+
"output_type": "stream",
830+
"text": [
831+
"Using cache found in /home/mengjia.gu/.cache/torch/hub/facebookresearch_pytorchvideo_main\n"
832+
]
833+
},
834+
{
835+
"name": "stdout",
836+
"output_type": "stream",
837+
"text": [
838+
"Running on local URL: http://127.0.0.1:7860/\n",
839+
"Running on public URL: https://53758.gradio.app\n",
840+
"\n",
841+
"This share link expires in 72 hours. For free permanent hosting, check out Spaces (https://huggingface.co/spaces)\n"
842+
]
843+
},
844+
{
845+
"data": {
846+
"text/html": [
847+
"\n",
848+
" <iframe\n",
849+
" width=\"900\"\n",
850+
" height=\"500\"\n",
851+
" src=\"https://53758.gradio.app\"\n",
852+
" frameborder=\"0\"\n",
853+
" allowfullscreen\n",
854+
" \n",
855+
" ></iframe>\n",
856+
" "
857+
],
858+
"text/plain": [
859+
"<IPython.lib.display.IFrame at 0x7fb5c95fd8e0>"
860+
]
861+
},
862+
"metadata": {},
863+
"output_type": "display_data"
864+
},
865+
{
866+
"data": {
867+
"text/plain": [
868+
"(<gradio.routes.App at 0x7fb5ccb552e0>,\n",
869+
" 'http://127.0.0.1:7860/',\n",
870+
" 'https://53758.gradio.app')"
871+
]
872+
},
873+
"execution_count": 11,
874+
"metadata": {},
875+
"output_type": "execute_result"
876+
}
877+
],
878+
"source": [
879+
"import gradio\n",
880+
"\n",
881+
"with towhee.api() as api:\n",
882+
" milvus_search_function = (\n",
883+
" api.video_decode.ffmpeg(\n",
884+
" sample_type='uniform_temporal_subsample', args={'num_samples': 32})\n",
885+
" .video_classification.pytorchvideo(\n",
886+
" model_name='mvit_base_32x3', predict=False, skip_preprocess=True)\n",
887+
" .tensor_normalize()\n",
888+
" .milvus_search(collection='mvit_base', limit=5)\n",
889+
" .runas_op(func=lambda res: [id_video[x.id] for x in res])\n",
890+
" .as_function()\n",
891+
" )\n",
892+
" \n",
893+
"\n",
894+
"interface = gradio.Interface(milvus_search_function, \n",
895+
" inputs=gradio.Video(source='upload'),\n",
896+
" outputs=[gradio.Video(format='mp4') for _ in range(5)]\n",
897+
" )\n",
898+
"\n",
899+
"interface.launch(inline=True, share=True)"
900+
]
824901
}
825902
],
826903
"metadata": {

0 commit comments

Comments
 (0)