提交 d4f1c8f8 编写于 作者: H Hsury

改善I/O性能

上级 2d75b032
...@@ -8,11 +8,11 @@ import math ...@@ -8,11 +8,11 @@ import math
import os import os
import re import re
import requests import requests
import struct
import threading import threading
import time import time
import types import types
from bilibili import Bilibili from bilibili import Bilibili
from PIL import Image
def log(message): def log(message):
print(f"[{time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))}] {message}") print(f"[{time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))}] {message}")
...@@ -35,32 +35,26 @@ def read_in_chunks(file_name, chunk_size=1024 * 1024): ...@@ -35,32 +35,26 @@ def read_in_chunks(file_name, chunk_size=1024 * 1024):
else: else:
return return
def image_dump(data, file_name): def bmp_header(data):
merged_data = data + b"\xff" return b"BM" \
pixel_number = math.ceil(len(merged_data) / 3) + struct.pack("<l", 14 + 40 + 8 + len(data)) \
width = math.ceil(math.sqrt(pixel_number)) + b"\x00\x00" \
height = math.ceil(pixel_number / width) + b"\x00\x00" \
image = Image.new("RGB", (width, height)) + b"\x3e\x00\x00\x00" \
image_data = [[]] + b"\x28\x00\x00\x00" \
for byte in merged_data: + struct.pack("<l", len(data)) \
if len(image_data[-1]) == 3: + b"\x01\x00\x00\x00" \
image_data[-1] = tuple(image_data[-1]) + b"\x01\x00" \
image_data.append([]) + b"\x01\x00" \
image_data[-1].append(byte) + b"\x00\x00\x00\x00" \
image_data[-1] = tuple(image_data[-1] + [0] * (3 - len(image_data[-1]))) + struct.pack("<l", math.ceil(len(data) / 8)) \
image.putdata(image_data) + b"\x00\x00\x00\x00" \
image.save(file_name) + b"\x00\x00\x00\x00" \
+ b"\x00\x00\x00\x00" \
+ b"\x00\x00\x00\x00" \
+ b"\x00\x00\x00\x00\xff\xff\xff\x00"
def image_load(file_name): def image_upload(data, cookies):
image = Image.open(file_name)
merged_data = b"".join(bytes(pixel_data) for pixel_data in image.getdata())
merged_data = merged_data.rstrip(b"\x00")
if merged_data[-1] == 255:
return merged_data[:-1]
else:
return b""
def image_upload(file_name, cookies):
url = "https://api.vc.bilibili.com/api/v1/drawImage/upload" url = "https://api.vc.bilibili.com/api/v1/drawImage/upload"
headers = { headers = {
'Origin': "https://t.bilibili.com", 'Origin': "https://t.bilibili.com",
...@@ -68,46 +62,29 @@ def image_upload(file_name, cookies): ...@@ -68,46 +62,29 @@ def image_upload(file_name, cookies):
'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36", 'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36",
} }
files = { files = {
'file_up': (file_name, open(file_name, "rb")), 'file_up': (f"{int(time.time() * 1000)}.bmp", data),
'biz': "draw", 'biz': "draw",
'category': "daily", 'category': "daily",
} }
response = requests.post(url, headers=headers, cookies=cookies, files=files).json() response = requests.post(url, headers=headers, cookies=cookies, files=files).json()
return response return response
def image_download(url, file_name=None): def image_download(url):
if file_name is None: response = requests.get(url)
file_name = url.split("/")[-1] return response.content
with open(file_name, "wb") as f:
response = requests.get(url, stream=True)
length = response.headers.get("content-length")
if length:
length = int(length)
receive = 0
for data in response.iter_content(chunk_size=100 * 1024):
f.write(data)
receive += len(data)
# percent = receive / length
# print(f"\r{file_name} [{'=' * int(50 * percent)}{' ' * (50 - int(50 * percent))}] {percent:.0%}", end="", flush=True)
# print()
else:
f.write(response.content)
return file_name
def fetch_meta(string): def fetch_meta(string):
if string.startswith("http://") or string.startswith("https://"): if string.startswith("http://") or string.startswith("https://"):
meta_file_name = image_download(string) full_meta = image_download(string)
elif re.match(r"^[a-fA-F0-9]{40}$", string): elif re.match(r"^[a-fA-F0-9]{40}$", string):
meta_file_name = image_download(f"http://i0.hdslb.com/bfs/album/{string}.png") full_meta = image_download(f"http://i0.hdslb.com/bfs/album/{string}.x-ms-bmp")
else: else:
meta_file_name = string return None
try: try:
meta_data = json.loads(image_load(meta_file_name).decode("utf-8")) meta_dict = json.loads(full_meta[62:].decode("utf-8"))
return meta_data return meta_dict
except: except:
return None return None
finally:
os.remove(meta_file_name)
def login_handle(args): def login_handle(args):
bilibili = Bilibili() bilibili = Bilibili()
...@@ -117,41 +94,41 @@ def login_handle(args): ...@@ -117,41 +94,41 @@ def login_handle(args):
f.write(json.dumps(bilibili.get_cookies(), ensure_ascii=False, indent=2)) f.write(json.dumps(bilibili.get_cookies(), ensure_ascii=False, indent=2))
def info_handle(args): def info_handle(args):
meta_data = fetch_meta(args.meta) meta_dict = fetch_meta(args.meta)
if meta_data: if meta_dict:
log(f"文件名: {meta_data['filename']}") log(f"文件名: {meta_dict['filename']}")
log(f"大小: {meta_data['size'] / 1024 / 1024:.2f} MB") log(f"大小: {meta_dict['size'] / 1024 / 1024:.2f} MB")
log(f"SHA-1: {meta_data['sha1']}") log(f"SHA-1: {meta_dict['sha1']}")
log(f"上传时间: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(meta_data['time']))}") log(f"上传时间: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(meta_dict['time']))}")
log(f"分块数: {len(meta_data['block'])}") log(f"分块数: {len(meta_dict['block'])}")
for index, block in enumerate(meta_data['block']): for index, block_dict in enumerate(meta_dict['block']):
log(f"分块{index} ({block['size'] / 1024 / 1024:.2f} MB) URL: {block['url']}") log(f"分块{index} ({block_dict['size'] / 1024 / 1024:.2f} MB) URL: {block_dict['url']}")
else: else:
log("元数据解析出错") log("元数据解析出错")
def upload_handle(args): def upload_handle(args):
def core(index, block): def core(index, block):
block_file_name = f"{sha1}_{index}.png" block_sha1 = calc_sha1(block, hexdigest=True)
image_dump(block, block_file_name) full_block = bmp_header(block) + block
block_sha1 = calc_sha1(read_in_chunks(block_file_name), hexdigest=True) full_block_sha1 = calc_sha1(full_block, hexdigest=True)
url = skippable(block_sha1) url = skippable(full_block_sha1)
if url: if url:
log(f"分块{index} ({os.path.getsize(block_file_name) / 1024 / 1024:.2f} MB) 已存在于服务器") log(f"分块{index} ({len(block) / 1024 / 1024:.2f} MB) 已存在于服务器")
block_dict[index] = { block_dict[index] = {
'url': url, 'url': url,
'size': os.path.getsize(block_file_name), 'size': len(block),
'sha1': block_sha1, 'sha1': block_sha1,
} }
done_flag.release() done_flag.release()
else: else:
for _ in range(3): for _ in range(3):
response = image_upload(block_file_name, cookies) response = image_upload(full_block, cookies)
if response['code'] == 0: if response['code'] == 0:
url = response['data']['image_url'] url = response['data']['image_url']
log(f"分块{index} ({os.path.getsize(block_file_name) / 1024 / 1024:.2f} MB) 已上传") log(f"分块{index} ({len(block) / 1024 / 1024:.2f} MB) 已上传")
block_dict[index] = { block_dict[index] = {
'url': url, 'url': url,
'size': os.path.getsize(block_file_name), 'size': len(block),
'sha1': block_sha1, 'sha1': block_sha1,
} }
done_flag.release() done_flag.release()
...@@ -162,17 +139,17 @@ def upload_handle(args): ...@@ -162,17 +139,17 @@ def upload_handle(args):
break break
else: else:
terminate_flag.set() terminate_flag.set()
log(f"分块{index} ({os.path.getsize(block_file_name) / 1024 / 1024:.2f} MB) 上传失败, 服务器返回{response}") log(f"分块{index} ({len(block) / 1024 / 1024:.2f} MB) 上传失败, 服务器返回{response}")
os.remove(block_file_name)
def skippable(sha1): def skippable(sha1):
url = f"http://i0.hdslb.com/bfs/album/{sha1}.png" url = f"http://i0.hdslb.com/bfs/album/{sha1}.x-ms-bmp"
response = requests.head(url) response = requests.head(url)
return url if response.status_code == 200 else None return url if response.status_code == 200 else None
done_flag = threading.Semaphore(0) done_flag = threading.Semaphore(0)
terminate_flag = threading.Event() terminate_flag = threading.Event()
thread_pool = [] thread_pool = []
block_dict = {}
start_time = time.time() start_time = time.time()
try: try:
with open(args.cookies_file, "r", encoding="utf-8") as f: with open(args.cookies_file, "r", encoding="utf-8") as f:
...@@ -181,12 +158,11 @@ def upload_handle(args): ...@@ -181,12 +158,11 @@ def upload_handle(args):
log("Cookies加载失败, 请先登录") log("Cookies加载失败, 请先登录")
return None return None
file_name = args.file file_name = args.file
block_dict = {} log(f"上传: {os.path.basename(file_name)} ({os.path.getsize(file_name) / 1024 / 1024:.2f} MB)")
log(f"上传: {file_name} ({os.path.getsize(file_name) / 1024 / 1024:.2f} MB)")
sha1 = calc_sha1(read_in_chunks(file_name), hexdigest=True) sha1 = calc_sha1(read_in_chunks(file_name), hexdigest=True)
log(f"SHA-1: {sha1}") log(f"SHA-1: {sha1}")
log(f"线程数: {args.thread}") log(f"线程数: {args.thread}")
for index, block in enumerate(read_in_chunks(file_name, chunk_size=args.block_size * 1024 * 1024 - 1)): for index, block in enumerate(read_in_chunks(file_name, chunk_size=args.block_size * 1024 * 1024)):
if len(thread_pool) >= args.thread: if len(thread_pool) >= args.thread:
done_flag.acquire() done_flag.acquire()
if not terminate_flag.is_set(): if not terminate_flag.is_set():
...@@ -198,68 +174,78 @@ def upload_handle(args): ...@@ -198,68 +174,78 @@ def upload_handle(args):
thread.join() thread.join()
if terminate_flag.is_set(): if terminate_flag.is_set():
return None return None
meta_data = { meta_dict = {
'time': int(time.time()), 'time': int(time.time()),
'filename': file_name, 'filename': os.path.basename(file_name),
'size': os.path.getsize(file_name), 'size': os.path.getsize(file_name),
'sha1': sha1, 'sha1': sha1,
'block': [block_dict[i] for i in range(len(block_dict))], 'block': [block_dict[i] for i in range(len(block_dict))],
} }
meta_file_name = f"{sha1}_meta.png" meta = json.dumps(meta_dict, ensure_ascii=False).encode("utf-8")
image_dump(json.dumps(meta_data, ensure_ascii=False).encode("utf-8"), meta_file_name) full_meta = bmp_header(meta) + meta
for _ in range(3): for _ in range(3):
response = image_upload(meta_file_name, cookies) response = image_upload(full_meta, cookies)
if response['code'] == 0: if response['code'] == 0:
url = response['data']['image_url'] url = response['data']['image_url']
log("元数据已上传") log("元数据已上传")
os.remove(meta_file_name) log(f"{os.path.basename(file_name)}上传完毕, 共有{len(meta_dict['block'])}个分块, 用时{int(time.time() - start_time)}秒, 平均速度{meta_dict['size'] / 1024 / 1024 / (time.time() - start_time):.2f} MB/s")
log(f"{file_name}上传完毕, 共有{index + 1}个分块, 耗时{int(time.time() - start_time)}秒") log(f"META: {re.findall(r'[a-fA-F0-9]{40}', url)[0] if re.match(r'^http(s?)://i0.hdslb.com/bfs/album/[a-fA-F0-9]{40}.x-ms-bmp$', url) else url}")
log(f"META: {re.findall(r'[a-fA-F0-9]{40}', url)[0] if re.match(r'^http(s?)://i0.hdslb.com/bfs/album/[a-fA-F0-9]{40}.png$', url) else url}")
return url return url
else: else:
log(f"元数据上传失败, 保留文件{meta_file_name}, 服务器返回{response}") log(f"元数据上传失败, 服务器返回{response}")
return meta_file_name return None
def download_handle(args): def download_handle(args):
def core(index, block): def core(index, block_dict, f):
block_file_name = f"{meta_data['sha1']}_{index}.png" for _ in range(3):
if os.path.exists(block_file_name) and calc_sha1(read_in_chunks(block_file_name), hexdigest=True) == block['sha1']: block = image_download(block_dict['url'])[62:]
log(f"分块{index} ({os.path.getsize(block_file_name) / 1024 / 1024:.2f} MB) 已存在于本地") if calc_sha1(block, hexdigest=True) == block_dict['sha1']:
block_file_name_dict[index] = block_file_name f.seek(block_offset(index))
done_flag.release() f.write(block)
log(f"分块{index} ({block_dict['size'] / 1024 / 1024:.2f} MB) 已下载")
done_flag.release()
break
else: else:
for _ in range(3): terminate_flag.set()
image_download(block['url'], file_name=block_file_name) log(f"分块{index}校验未通过, SHA-1与元数据中的记录{block_dict['sha1']}不匹配")
if calc_sha1(read_in_chunks(block_file_name), hexdigest=True) == block['sha1']: return
log(f"分块{index} ({os.path.getsize(block_file_name) / 1024 / 1024:.2f} MB) 已下载")
block_file_name_dict[index] = block_file_name def block_offset(index):
done_flag.release() return sum(meta_dict['block'][i]['size'] for i in range(index))
break
else:
terminate_flag.set()
log(f"分块{index}校验未通过, SHA-1与元数据中的记录{block['sha1']}不匹配")
os.remove(block_file_name)
return
done_flag = threading.Semaphore(0) done_flag = threading.Semaphore(0)
terminate_flag = threading.Event() terminate_flag = threading.Event()
thread_pool = [] thread_pool = []
block_file_name_dict = {} download_block_list = []
start_time = time.time() start_time = time.time()
meta_data = fetch_meta(args.meta) meta_dict = fetch_meta(args.meta)
if meta_data: if meta_dict:
file_name = args.file if args.file else meta_data['filename'] file_name = args.file if args.file else meta_dict['filename']
log(f"下载: {file_name} ({meta_data['size'] / 1024 / 1024:.2f} MB), 共有{len(meta_data['block'])}个分块, 上传于{time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(meta_data['time']))}") log(f"下载: {file_name} ({meta_dict['size'] / 1024 / 1024:.2f} MB), 共有{len(meta_dict['block'])}个分块, 上传于{time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(meta_dict['time']))}")
else: else:
log("元数据解析出错") log("元数据解析出错")
return None return None
log(f"线程数: {args.thread}") log(f"线程数: {args.thread}")
if not (os.path.exists(file_name) and calc_sha1(read_in_chunks(file_name), hexdigest=True) == meta_data['sha1']): if os.path.exists(file_name) and os.path.getsize(file_name) == meta_dict['size']:
for index, block in enumerate(meta_data['block']): if calc_sha1(read_in_chunks(file_name), hexdigest=True) == meta_dict['sha1']:
log(f"{file_name}已存在于本地")
return file_name
else:
with open(file_name, "rb") as f:
for index, block_dict in enumerate(meta_dict['block']):
f.seek(block_offset(index))
if calc_sha1(f.read(block_dict['size']), hexdigest=True) == block_dict['sha1']:
log(f"分块{index} ({block_dict['size'] / 1024 / 1024:.2f} MB) 已存在于本地")
else:
download_block_list.append(index)
else:
download_block_list = list(range(len(meta_dict['block'])))
with open(file_name, "r+b" if os.path.exists(file_name) else "wb") as f:
for index in download_block_list:
if len(thread_pool) >= args.thread: if len(thread_pool) >= args.thread:
done_flag.acquire() done_flag.acquire()
if not terminate_flag.is_set(): if not terminate_flag.is_set():
thread_pool.append(threading.Thread(target=core, args=(index, block))) thread_pool.append(threading.Thread(target=core, args=(index, meta_dict['block'][index], f)))
thread_pool[-1].start() thread_pool[-1].start()
else: else:
log("已终止下载, 等待线程回收") log("已终止下载, 等待线程回收")
...@@ -267,25 +253,19 @@ def download_handle(args): ...@@ -267,25 +253,19 @@ def download_handle(args):
thread.join() thread.join()
if terminate_flag.is_set(): if terminate_flag.is_set():
return None return None
with open(file_name, "wb") as f: f.truncate(sum(block['size'] for block in meta_dict['block']))
for index in range(len(meta_data['block'])): sha1 = calc_sha1(read_in_chunks(file_name), hexdigest=True)
block_file_name = block_file_name_dict[index] log(f"SHA-1: {sha1}")
f.write(image_load(block_file_name)) if sha1 == meta_dict['sha1']:
os.remove(block_file_name) log(f"{file_name}校验通过")
sha1 = calc_sha1(read_in_chunks(file_name), hexdigest=True) log(f"{file_name}下载完毕, 用时{int(time.time() - start_time)}秒, 平均速度{meta_dict['size'] / 1024 / 1024 / (time.time() - start_time):.2f} MB/s")
log(f"SHA-1: {sha1}") return file_name
if sha1 == meta_data['sha1']:
log(f"{file_name}校验通过")
log(f"{file_name}下载完毕, 耗时{int(time.time() - start_time)}秒")
return file_name
else:
log(f"{file_name}校验未通过, SHA-1与元数据中的记录{meta_data['sha1']}不匹配")
return None
else: else:
log(f"{file_name}已存在于本地") log(f"{file_name}校验未通过, SHA-1与元数据中的记录{meta_dict['sha1']}不匹配")
return None
if __name__ == "__main__": if __name__ == "__main__":
parser = argparse.ArgumentParser(prog="BiliDrive", description="Bilibili Drive", epilog="By Hsury, 2019/10/24") parser = argparse.ArgumentParser(prog="BiliDrive", description="Bilibili Drive", epilog="By Hsury, 2019/10/25")
parser.add_argument("-c", "--cookies-file", default="cookies.json", help="cookies json file name") parser.add_argument("-c", "--cookies-file", default="cookies.json", help="cookies json file name")
subparsers = parser.add_subparsers() subparsers = parser.add_subparsers()
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册