{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"import json"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"multimodal_data_info_file_path ='multimodal_data_info.json'"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"def read_multimodal_data_information_json_file(json_file_path=\"multimodal_data_info.json\"):\n",
" \"\"\"\n",
" :param json_file_path:\n",
" :return: multimodal_data_information_list\n",
" [{'mp4_id': '97930081', 'mp4_download_url': ...'video_label': 'Military'},\n",
" {'mp4_id': '64413672', 'mp4_download_url': ... 'video_label': 'Military'}]\n",
" \"\"\"\n",
" def check_data(line_dict):\n",
" for item in ['mp4_id', 'video_label', 'mp4_time', 'mp4_download_url', 'mp4_background_image_url', 'mp4_txt_brief']:\n",
" if item not in line_dict:\n",
" return False\n",
" return True\n",
" \n",
" multimodal_data_information_list = list()\n",
" with open(json_file_path, 'r', encoding='utf-8') as f:\n",
" try:\n",
" while True:\n",
" line = f.readline()\n",
" if line:\n",
" line_dict = json.loads(line)\n",
" if check_data(line_dict):\n",
" multimodal_data_information_list.append(line_dict)\n",
" else:\n",
" print(\"incomplete data:\")\n",
" print(line_dict)\n",
" else:\n",
" break\n",
" except:\n",
" f.close()\n",
" return multimodal_data_information_list"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"multimodal_data_information_list = read_multimodal_data_information_json_file(multimodal_data_info_file_path)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"562342"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(multimodal_data_information_list)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[{'mp4_id': '75265848',\n",
" 'mp4_download_url': 'https://p5-v1.xpccdn.com/075265848_main_xl.mp4',\n",
" 'mp4_time': '0:13',\n",
" 'mp4_background_image_url': 'https://p5-i1.xpccdn.com/075265848_iconl.jpeg',\n",
" 'mp4_txt_brief': ' Old antique German military rifle',\n",
" 'video_label': 'Military'},\n",
" {'mp4_id': '44566064',\n",
" 'mp4_download_url': 'https://p5-v1.xpccdn.com/044566064_main_xl.mp4',\n",
" 'mp4_time': '0:09',\n",
" 'mp4_background_image_url': 'https://p5-i1.xpccdn.com/044566064_iconl.jpeg',\n",
" 'mp4_txt_brief': ' quadcopter aerial drone',\n",
" 'video_label': 'Military'},\n",
" {'mp4_id': '62447549',\n",
" 'mp4_download_url': 'https://p5-v1.xpccdn.com/062447549_main_xl.mp4',\n",
" 'mp4_time': '0:06',\n",
" 'mp4_background_image_url': 'https://p5-i1.xpccdn.com/062447549_iconl.jpeg',\n",
" 'mp4_txt_brief': ' Firearm dis-assembly for cleaning and safety check of handheld gun',\n",
" 'video_label': 'Military'}]"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"multimodal_data_information_list[:3]"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"def multimodal_data_json_file_to_datafram(json_file_path=\"multimodal_data_info.json\"):\n",
" \"\"\"\n",
" :param json_file_path: \n",
" :return: pandas.datafram\n",
" \"\"\"\n",
" multimodal_data_information_list = read_multimodal_data_information_json_file(json_file_path)\n",
" \n",
" multimodal_data_information_dict = {'mp4_id':[], 'video_label':[], 'mp4_time':[], \n",
" 'mp4_download_url':[], 'mp4_background_image_url':[], 'mp4_txt_brief':[]}\n",
" \n",
" for data in multimodal_data_information_list:\n",
" multimodal_data_information_dict['mp4_id'].append(data['mp4_id'])\n",
" multimodal_data_information_dict['video_label'].append(data['video_label'])\n",
" multimodal_data_information_dict['mp4_time'].append(data['mp4_time'])\n",
" multimodal_data_information_dict['mp4_download_url'].append(data['mp4_download_url'])\n",
" multimodal_data_information_dict['mp4_background_image_url'].append(data['mp4_background_image_url'])\n",
" multimodal_data_information_dict['mp4_txt_brief'].append(data['mp4_txt_brief'])\n",
" \n",
" multimodal_data_information_datafram = pd.DataFrame(multimodal_data_information_dict)\n",
" \n",
" return multimodal_data_information_datafram"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"multimodal_data_information_datafram = multimodal_data_json_file_to_datafram(json_file_path=\"multimodal_data_info.json\")"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" mp4_id | \n",
" video_label | \n",
" mp4_time | \n",
" mp4_download_url | \n",
" mp4_background_image_url | \n",
" mp4_txt_brief | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 75265848 | \n",
" Military | \n",
" 0:13 | \n",
" https://p5-v1.xpccdn.com/075265848_main_xl.mp4 | \n",
" https://p5-i1.xpccdn.com/075265848_iconl.jpeg | \n",
" Old antique German military rifle | \n",
"
\n",
" \n",
" 1 | \n",
" 44566064 | \n",
" Military | \n",
" 0:09 | \n",
" https://p5-v1.xpccdn.com/044566064_main_xl.mp4 | \n",
" https://p5-i1.xpccdn.com/044566064_iconl.jpeg | \n",
" quadcopter aerial drone | \n",
"
\n",
" \n",
" 2 | \n",
" 62447549 | \n",
" Military | \n",
" 0:06 | \n",
" https://p5-v1.xpccdn.com/062447549_main_xl.mp4 | \n",
" https://p5-i1.xpccdn.com/062447549_iconl.jpeg | \n",
" Firearm dis-assembly for cleaning and safety ... | \n",
"
\n",
" \n",
" 3 | \n",
" 42966432 | \n",
" Military | \n",
" 0:08 | \n",
" https://p5-v1.xpccdn.com/042966432_main_xl.mp4 | \n",
" https://p5-i1.xpccdn.com/042966432_iconl.jpeg | \n",
" Kalashnikov deadly weapon | \n",
"
\n",
" \n",
" 4 | \n",
" 103424272 | \n",
" Military | \n",
" 0:13 | \n",
" https://p5-v1.xpccdn.com/103424272_main_xl.mp4 | \n",
" https://p5-i1.xpccdn.com/103424272_iconl.jpeg | \n",
" Rows of ammunition in front of an animated Le... | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" mp4_id video_label mp4_time \\\n",
"0 75265848 Military 0:13 \n",
"1 44566064 Military 0:09 \n",
"2 62447549 Military 0:06 \n",
"3 42966432 Military 0:08 \n",
"4 103424272 Military 0:13 \n",
"\n",
" mp4_download_url \\\n",
"0 https://p5-v1.xpccdn.com/075265848_main_xl.mp4 \n",
"1 https://p5-v1.xpccdn.com/044566064_main_xl.mp4 \n",
"2 https://p5-v1.xpccdn.com/062447549_main_xl.mp4 \n",
"3 https://p5-v1.xpccdn.com/042966432_main_xl.mp4 \n",
"4 https://p5-v1.xpccdn.com/103424272_main_xl.mp4 \n",
"\n",
" mp4_background_image_url \\\n",
"0 https://p5-i1.xpccdn.com/075265848_iconl.jpeg \n",
"1 https://p5-i1.xpccdn.com/044566064_iconl.jpeg \n",
"2 https://p5-i1.xpccdn.com/062447549_iconl.jpeg \n",
"3 https://p5-i1.xpccdn.com/042966432_iconl.jpeg \n",
"4 https://p5-i1.xpccdn.com/103424272_iconl.jpeg \n",
"\n",
" mp4_txt_brief \n",
"0 Old antique German military rifle \n",
"1 quadcopter aerial drone \n",
"2 Firearm dis-assembly for cleaning and safety ... \n",
"3 Kalashnikov deadly weapon \n",
"4 Rows of ammunition in front of an animated Le... "
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"multimodal_data_information_datafram.head()"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" mp4_id | \n",
" video_label | \n",
" mp4_time | \n",
" mp4_download_url | \n",
" mp4_background_image_url | \n",
" mp4_txt_brief | \n",
"
\n",
" \n",
" \n",
" \n",
" count | \n",
" 562342 | \n",
" 562342 | \n",
" 562342 | \n",
" 562342 | \n",
" 562342 | \n",
" 562342 | \n",
"
\n",
" \n",
" unique | \n",
" 499607 | \n",
" 31 | \n",
" 184 | \n",
" 499607 | \n",
" 499607 | \n",
" 343020 | \n",
"
\n",
" \n",
" top | \n",
" 88460884 | \n",
" Alpha Channel | \n",
" 0:10 | \n",
" https://p5-v1.xpccdn.com/023726153_main_xl.mp4 | \n",
" https://p5-i1.xpccdn.com/088460884_iconl.jpeg | \n",
" Intro Background Texture Render Animation Col... | \n",
"
\n",
" \n",
" freq | \n",
" 9 | \n",
" 19200 | \n",
" 49660 | \n",
" 9 | \n",
" 9 | \n",
" 10974 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" mp4_id video_label mp4_time \\\n",
"count 562342 562342 562342 \n",
"unique 499607 31 184 \n",
"top 88460884 Alpha Channel 0:10 \n",
"freq 9 19200 49660 \n",
"\n",
" mp4_download_url \\\n",
"count 562342 \n",
"unique 499607 \n",
"top https://p5-v1.xpccdn.com/023726153_main_xl.mp4 \n",
"freq 9 \n",
"\n",
" mp4_background_image_url \\\n",
"count 562342 \n",
"unique 499607 \n",
"top https://p5-i1.xpccdn.com/088460884_iconl.jpeg \n",
"freq 9 \n",
"\n",
" mp4_txt_brief \n",
"count 562342 \n",
"unique 343020 \n",
"top Intro Background Texture Render Animation Col... \n",
"freq 10974 "
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"multimodal_data_information_datafram.describe()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.0"
}
},
"nbformat": 4,
"nbformat_minor": 2
}