{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "import json" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "multimodal_data_info_file_path ='multimodal_data_info.json'" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "def read_multimodal_data_information_json_file(json_file_path=\"multimodal_data_info.json\"):\n", " \"\"\"\n", " :param json_file_path:\n", " :return: multimodal_data_information_list\n", " [{'mp4_id': '97930081', 'mp4_download_url': ...'video_label': 'Military'},\n", " {'mp4_id': '64413672', 'mp4_download_url': ... 'video_label': 'Military'}]\n", " \"\"\"\n", " def check_data(line_dict):\n", " for item in ['mp4_id', 'video_label', 'mp4_time', 'mp4_download_url', 'mp4_background_image_url', 'mp4_txt_brief']:\n", " if item not in line_dict:\n", " return False\n", " return True\n", " \n", " multimodal_data_information_list = list()\n", " with open(json_file_path, 'r', encoding='utf-8') as f:\n", " try:\n", " while True:\n", " line = f.readline()\n", " if line:\n", " line_dict = json.loads(line)\n", " if check_data(line_dict):\n", " multimodal_data_information_list.append(line_dict)\n", " else:\n", " print(\"incomplete data:\")\n", " print(line_dict)\n", " else:\n", " break\n", " except:\n", " f.close()\n", " return multimodal_data_information_list" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "multimodal_data_information_list = read_multimodal_data_information_json_file(multimodal_data_info_file_path)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "562342" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(multimodal_data_information_list)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[{'mp4_id': '75265848',\n", " 'mp4_download_url': 'https://p5-v1.xpccdn.com/075265848_main_xl.mp4',\n", " 'mp4_time': '0:13',\n", " 'mp4_background_image_url': 'https://p5-i1.xpccdn.com/075265848_iconl.jpeg',\n", " 'mp4_txt_brief': ' Old antique German military rifle',\n", " 'video_label': 'Military'},\n", " {'mp4_id': '44566064',\n", " 'mp4_download_url': 'https://p5-v1.xpccdn.com/044566064_main_xl.mp4',\n", " 'mp4_time': '0:09',\n", " 'mp4_background_image_url': 'https://p5-i1.xpccdn.com/044566064_iconl.jpeg',\n", " 'mp4_txt_brief': ' quadcopter aerial drone',\n", " 'video_label': 'Military'},\n", " {'mp4_id': '62447549',\n", " 'mp4_download_url': 'https://p5-v1.xpccdn.com/062447549_main_xl.mp4',\n", " 'mp4_time': '0:06',\n", " 'mp4_background_image_url': 'https://p5-i1.xpccdn.com/062447549_iconl.jpeg',\n", " 'mp4_txt_brief': ' Firearm dis-assembly for cleaning and safety check of handheld gun',\n", " 'video_label': 'Military'}]" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "multimodal_data_information_list[:3]" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "def multimodal_data_json_file_to_datafram(json_file_path=\"multimodal_data_info.json\"):\n", " \"\"\"\n", " :param json_file_path: \n", " :return: pandas.datafram\n", " \"\"\"\n", " multimodal_data_information_list = read_multimodal_data_information_json_file(json_file_path)\n", " \n", " multimodal_data_information_dict = {'mp4_id':[], 'video_label':[], 'mp4_time':[], \n", " 'mp4_download_url':[], 'mp4_background_image_url':[], 'mp4_txt_brief':[]}\n", " \n", " for data in multimodal_data_information_list:\n", " multimodal_data_information_dict['mp4_id'].append(data['mp4_id'])\n", " multimodal_data_information_dict['video_label'].append(data['video_label'])\n", " multimodal_data_information_dict['mp4_time'].append(data['mp4_time'])\n", " multimodal_data_information_dict['mp4_download_url'].append(data['mp4_download_url'])\n", " multimodal_data_information_dict['mp4_background_image_url'].append(data['mp4_background_image_url'])\n", " multimodal_data_information_dict['mp4_txt_brief'].append(data['mp4_txt_brief'])\n", " \n", " multimodal_data_information_datafram = pd.DataFrame(multimodal_data_information_dict)\n", " \n", " return multimodal_data_information_datafram" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "multimodal_data_information_datafram = multimodal_data_json_file_to_datafram(json_file_path=\"multimodal_data_info.json\")" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
mp4_idvideo_labelmp4_timemp4_download_urlmp4_background_image_urlmp4_txt_brief
075265848Military0:13https://p5-v1.xpccdn.com/075265848_main_xl.mp4https://p5-i1.xpccdn.com/075265848_iconl.jpegOld antique German military rifle
144566064Military0:09https://p5-v1.xpccdn.com/044566064_main_xl.mp4https://p5-i1.xpccdn.com/044566064_iconl.jpegquadcopter aerial drone
262447549Military0:06https://p5-v1.xpccdn.com/062447549_main_xl.mp4https://p5-i1.xpccdn.com/062447549_iconl.jpegFirearm dis-assembly for cleaning and safety ...
342966432Military0:08https://p5-v1.xpccdn.com/042966432_main_xl.mp4https://p5-i1.xpccdn.com/042966432_iconl.jpegKalashnikov deadly weapon
4103424272Military0:13https://p5-v1.xpccdn.com/103424272_main_xl.mp4https://p5-i1.xpccdn.com/103424272_iconl.jpegRows of ammunition in front of an animated Le...
\n", "
" ], "text/plain": [ " mp4_id video_label mp4_time \\\n", "0 75265848 Military 0:13 \n", "1 44566064 Military 0:09 \n", "2 62447549 Military 0:06 \n", "3 42966432 Military 0:08 \n", "4 103424272 Military 0:13 \n", "\n", " mp4_download_url \\\n", "0 https://p5-v1.xpccdn.com/075265848_main_xl.mp4 \n", "1 https://p5-v1.xpccdn.com/044566064_main_xl.mp4 \n", "2 https://p5-v1.xpccdn.com/062447549_main_xl.mp4 \n", "3 https://p5-v1.xpccdn.com/042966432_main_xl.mp4 \n", "4 https://p5-v1.xpccdn.com/103424272_main_xl.mp4 \n", "\n", " mp4_background_image_url \\\n", "0 https://p5-i1.xpccdn.com/075265848_iconl.jpeg \n", "1 https://p5-i1.xpccdn.com/044566064_iconl.jpeg \n", "2 https://p5-i1.xpccdn.com/062447549_iconl.jpeg \n", "3 https://p5-i1.xpccdn.com/042966432_iconl.jpeg \n", "4 https://p5-i1.xpccdn.com/103424272_iconl.jpeg \n", "\n", " mp4_txt_brief \n", "0 Old antique German military rifle \n", "1 quadcopter aerial drone \n", "2 Firearm dis-assembly for cleaning and safety ... \n", "3 Kalashnikov deadly weapon \n", "4 Rows of ammunition in front of an animated Le... " ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "multimodal_data_information_datafram.head()" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
mp4_idvideo_labelmp4_timemp4_download_urlmp4_background_image_urlmp4_txt_brief
count562342562342562342562342562342562342
unique49960731184499607499607343020
top88460884Alpha Channel0:10https://p5-v1.xpccdn.com/023726153_main_xl.mp4https://p5-i1.xpccdn.com/088460884_iconl.jpegIntro Background Texture Render Animation Col...
freq919200496609910974
\n", "
" ], "text/plain": [ " mp4_id video_label mp4_time \\\n", "count 562342 562342 562342 \n", "unique 499607 31 184 \n", "top 88460884 Alpha Channel 0:10 \n", "freq 9 19200 49660 \n", "\n", " mp4_download_url \\\n", "count 562342 \n", "unique 499607 \n", "top https://p5-v1.xpccdn.com/023726153_main_xl.mp4 \n", "freq 9 \n", "\n", " mp4_background_image_url \\\n", "count 562342 \n", "unique 499607 \n", "top https://p5-i1.xpccdn.com/088460884_iconl.jpeg \n", "freq 9 \n", "\n", " mp4_txt_brief \n", "count 562342 \n", "unique 343020 \n", "top Intro Background Texture Render Animation Col... \n", "freq 10974 " ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "multimodal_data_information_datafram.describe()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.0" } }, "nbformat": 4, "nbformat_minor": 2 }