提交 2d75b032 编写于 作者: H Hsury

实现多线程上传下载与断点续传

上级 8d55af4d
此差异已折叠。
...@@ -6,7 +6,9 @@ import hashlib ...@@ -6,7 +6,9 @@ import hashlib
import json import json
import math import math
import os import os
import re
import requests import requests
import threading
import time import time
import types import types
from bilibili import Bilibili from bilibili import Bilibili
...@@ -15,14 +17,14 @@ from PIL import Image ...@@ -15,14 +17,14 @@ from PIL import Image
def log(message): def log(message):
print(f"[{time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))}] {message}") print(f"[{time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))}] {message}")
def calc_md5(data, hexdigest=False): def calc_sha1(data, hexdigest=False):
md5 = hashlib.md5() sha1 = hashlib.sha1()
if isinstance(data, types.GeneratorType): if isinstance(data, types.GeneratorType):
for chunk in data: for chunk in data:
md5.update(chunk) sha1.update(chunk)
else: else:
md5.update(data) sha1.update(data)
return md5.hexdigest() if hexdigest else md5.digest() return sha1.hexdigest() if hexdigest else sha1.digest()
def read_in_chunks(file_name, chunk_size=1024 * 1024): def read_in_chunks(file_name, chunk_size=1024 * 1024):
with open(file_name, "rb") as f: with open(file_name, "rb") as f:
...@@ -34,8 +36,7 @@ def read_in_chunks(file_name, chunk_size=1024 * 1024): ...@@ -34,8 +36,7 @@ def read_in_chunks(file_name, chunk_size=1024 * 1024):
return return
def image_dump(data, file_name): def image_dump(data, file_name):
md5 = calc_md5(data) merged_data = data + b"\xff"
merged_data = data + md5 + b"\xff"
pixel_number = math.ceil(len(merged_data) / 3) pixel_number = math.ceil(len(merged_data) / 3)
width = math.ceil(math.sqrt(pixel_number)) width = math.ceil(math.sqrt(pixel_number))
height = math.ceil(pixel_number / width) height = math.ceil(pixel_number / width)
...@@ -55,10 +56,9 @@ def image_load(file_name): ...@@ -55,10 +56,9 @@ def image_load(file_name):
merged_data = b"".join(bytes(pixel_data) for pixel_data in image.getdata()) merged_data = b"".join(bytes(pixel_data) for pixel_data in image.getdata())
merged_data = merged_data.rstrip(b"\x00") merged_data = merged_data.rstrip(b"\x00")
if merged_data[-1] == 255: if merged_data[-1] == 255:
data, md5 = merged_data[:-(1 + 16)], merged_data[-(1 + 16):-1] return merged_data[:-1]
if calc_md5(data) == md5: else:
return data return b""
return b""
def image_upload(file_name, cookies): def image_upload(file_name, cookies):
url = "https://api.vc.bilibili.com/api/v1/drawImage/upload" url = "https://api.vc.bilibili.com/api/v1/drawImage/upload"
...@@ -94,126 +94,198 @@ def image_download(url, file_name=None): ...@@ -94,126 +94,198 @@ def image_download(url, file_name=None):
f.write(response.content) f.write(response.content)
return file_name return file_name
def fetch_meta(string):
if string.startswith("http://") or string.startswith("https://"):
meta_file_name = image_download(string)
elif re.match(r"^[a-fA-F0-9]{40}$", string):
meta_file_name = image_download(f"http://i0.hdslb.com/bfs/album/{string}.png")
else:
meta_file_name = string
try:
meta_data = json.loads(image_load(meta_file_name).decode("utf-8"))
return meta_data
except:
return None
finally:
os.remove(meta_file_name)
def login_handle(args): def login_handle(args):
bilibili = Bilibili() bilibili = Bilibili()
bilibili.login(username=args.username, password=args.password) bilibili.login(username=args.username, password=args.password)
bilibili.get_user_info()
with open(args.cookies_file, "w", encoding="utf-8") as f: with open(args.cookies_file, "w", encoding="utf-8") as f:
f.write(json.dumps(bilibili.get_cookies(), ensure_ascii=False, indent=2)) f.write(json.dumps(bilibili.get_cookies(), ensure_ascii=False, indent=2))
def info_handle(args): def info_handle(args):
if args.url.startswith("http://") or args.url.startswith("https://"): meta_data = fetch_meta(args.meta)
meta_file_name = image_download(args.url) if meta_data:
else:
meta_file_name = args.url
try:
meta_data = json.loads(image_load(meta_file_name).decode("utf-8"))
os.remove(meta_file_name)
log(f"文件名: {meta_data['filename']}") log(f"文件名: {meta_data['filename']}")
log(f"大小: {meta_data['size'] / 1024 / 1024:.2f} MB") log(f"大小: {meta_data['size'] / 1024 / 1024:.2f} MB")
log(f"MD5: {meta_data['md5']}") log(f"SHA-1: {meta_data['sha1']}")
log(f"上传时间: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(meta_data['time']))}") log(f"上传时间: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(meta_data['time']))}")
log(f"分块数: {len(meta_data['block'])}") log(f"分块数: {len(meta_data['block'])}")
for index, url in enumerate(meta_data['block']): for index, block in enumerate(meta_data['block']):
log(f"分块{index} URL: {url}") log(f"分块{index} ({block['size'] / 1024 / 1024:.2f} MB) URL: {block['url']}")
except: else:
os.remove(meta_file_name)
log("元数据解析出错") log("元数据解析出错")
def upload_handle(args): def upload_handle(args):
def core(index, block):
block_file_name = f"{sha1}_{index}.png"
image_dump(block, block_file_name)
block_sha1 = calc_sha1(read_in_chunks(block_file_name), hexdigest=True)
url = skippable(block_sha1)
if url:
log(f"分块{index} ({os.path.getsize(block_file_name) / 1024 / 1024:.2f} MB) 已存在于服务器")
block_dict[index] = {
'url': url,
'size': os.path.getsize(block_file_name),
'sha1': block_sha1,
}
done_flag.release()
else:
for _ in range(3):
response = image_upload(block_file_name, cookies)
if response['code'] == 0:
url = response['data']['image_url']
log(f"分块{index} ({os.path.getsize(block_file_name) / 1024 / 1024:.2f} MB) 已上传")
block_dict[index] = {
'url': url,
'size': os.path.getsize(block_file_name),
'sha1': block_sha1,
}
done_flag.release()
break
elif response['code'] == -4:
terminate_flag.set()
log("上传失败, 请先登录")
break
else:
terminate_flag.set()
log(f"分块{index} ({os.path.getsize(block_file_name) / 1024 / 1024:.2f} MB) 上传失败, 服务器返回{response}")
os.remove(block_file_name)
def skippable(sha1):
url = f"http://i0.hdslb.com/bfs/album/{sha1}.png"
response = requests.head(url)
return url if response.status_code == 200 else None
done_flag = threading.Semaphore(0)
terminate_flag = threading.Event()
thread_pool = []
start_time = time.time() start_time = time.time()
try: try:
with open(args.cookies_file, "r", encoding="utf-8") as f: with open(args.cookies_file, "r", encoding="utf-8") as f:
cookies = json.loads(f.read()) cookies = json.loads(f.read())
except: except:
log("Cookies文件加载失败") log("Cookies加载失败, 请先登录")
return None return None
file_name = args.file file_name = args.file
url_list = [] block_dict = {}
log(f"上传: {file_name} ({os.path.getsize(file_name) / 1024 / 1024:.2f} MB)") log(f"上传: {file_name} ({os.path.getsize(file_name) / 1024 / 1024:.2f} MB)")
md5 = calc_md5(read_in_chunks(file_name), hexdigest=True) sha1 = calc_sha1(read_in_chunks(file_name), hexdigest=True)
log(f"MD5: {md5}") log(f"SHA-1: {sha1}")
for index, chunk in enumerate(read_in_chunks(file_name, chunk_size=args.block_size * 1024 * 1024)): log(f"线程数: {args.thread}")
part_file_name = f"{md5}_{index}.png" for index, block in enumerate(read_in_chunks(file_name, chunk_size=args.block_size * 1024 * 1024 - 1)):
image_dump(chunk, part_file_name) if len(thread_pool) >= args.thread:
for _ in range(3): done_flag.acquire()
response = image_upload(part_file_name, cookies) if not terminate_flag.is_set():
if response['code'] == 0: thread_pool.append(threading.Thread(target=core, args=(index, block)))
url = response['data']['image_url'] thread_pool[-1].start()
log(f"分块{index} ({os.path.getsize(part_file_name) / 1024 / 1024:.2f} MB) 已上传")
url_list.append(url)
os.remove(part_file_name)
break
elif response['code'] == -4:
log(f"上传失败, 账号未登录")
os.remove(part_file_name)
return None
else: else:
log(f"分块{index} ({os.path.getsize(part_file_name) / 1024 / 1024:.2f} MB) 上传失败, 服务器返回{response}") log("已终止上传, 等待线程回收")
os.remove(part_file_name) for thread in thread_pool:
return None thread.join()
if terminate_flag.is_set():
return None
meta_data = { meta_data = {
'time': int(time.time()), 'time': int(time.time()),
'filename': file_name, 'filename': file_name,
'size': os.path.getsize(file_name), 'size': os.path.getsize(file_name),
'md5': md5, 'sha1': sha1,
'block': url_list, 'block': [block_dict[i] for i in range(len(block_dict))],
} }
meta_file_name = f"{md5}_meta.png" meta_file_name = f"{sha1}_meta.png"
image_dump(json.dumps(meta_data, ensure_ascii=False).encode("utf-8"), meta_file_name) image_dump(json.dumps(meta_data, ensure_ascii=False).encode("utf-8"), meta_file_name)
for _ in range(3): for _ in range(3):
response = image_upload(meta_file_name, cookies) response = image_upload(meta_file_name, cookies)
if response['code'] == 0: if response['code'] == 0:
url = response['data']['image_url'] url = response['data']['image_url']
log(f"元数据已上传") log("元数据已上传")
os.remove(meta_file_name) os.remove(meta_file_name)
log(f"{file_name}上传完毕, 共有{index + 1}个分块, 耗时{int(time.time() - start_time)}秒") log(f"{file_name}上传完毕, 共有{index + 1}个分块, 耗时{int(time.time() - start_time)}秒")
log(f"META URL: {url}") log(f"META: {re.findall(r'[a-fA-F0-9]{40}', url)[0] if re.match(r'^http(s?)://i0.hdslb.com/bfs/album/[a-fA-F0-9]{40}.png$', url) else url}")
return url return url
else: else:
log(f"元数据上传失败, 保留本地文件{meta_file_name}, 服务器返回{response}") log(f"元数据上传失败, 保留文件{meta_file_name}, 服务器返回{response}")
return meta_file_name return meta_file_name
def download_handle(args): def download_handle(args):
def core(index, block):
block_file_name = f"{meta_data['sha1']}_{index}.png"
if os.path.exists(block_file_name) and calc_sha1(read_in_chunks(block_file_name), hexdigest=True) == block['sha1']:
log(f"分块{index} ({os.path.getsize(block_file_name) / 1024 / 1024:.2f} MB) 已存在于本地")
block_file_name_dict[index] = block_file_name
done_flag.release()
else:
for _ in range(3):
image_download(block['url'], file_name=block_file_name)
if calc_sha1(read_in_chunks(block_file_name), hexdigest=True) == block['sha1']:
log(f"分块{index} ({os.path.getsize(block_file_name) / 1024 / 1024:.2f} MB) 已下载")
block_file_name_dict[index] = block_file_name
done_flag.release()
break
else:
terminate_flag.set()
log(f"分块{index}校验未通过, SHA-1与元数据中的记录{block['sha1']}不匹配")
os.remove(block_file_name)
return
done_flag = threading.Semaphore(0)
terminate_flag = threading.Event()
thread_pool = []
block_file_name_dict = {}
start_time = time.time() start_time = time.time()
if args.url.startswith("http://") or args.url.startswith("https://"): meta_data = fetch_meta(args.meta)
meta_file_name = image_download(args.url) if meta_data:
else: file_name = args.file if args.file else meta_data['filename']
meta_file_name = args.url
try:
meta_data = json.loads(image_load(meta_file_name).decode("utf-8"))
os.remove(meta_file_name)
file_name = args.save_as if args.save_as else meta_data['filename']
log(f"下载: {file_name} ({meta_data['size'] / 1024 / 1024:.2f} MB), 共有{len(meta_data['block'])}个分块, 上传于{time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(meta_data['time']))}") log(f"下载: {file_name} ({meta_data['size'] / 1024 / 1024:.2f} MB), 共有{len(meta_data['block'])}个分块, 上传于{time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(meta_data['time']))}")
except: else:
os.remove(meta_file_name)
log("元数据解析出错") log("元数据解析出错")
return None return None
with open(file_name, "wb") as f: log(f"线程数: {args.thread}")
for index, url in enumerate(meta_data['block']): if not (os.path.exists(file_name) and calc_sha1(read_in_chunks(file_name), hexdigest=True) == meta_data['sha1']):
for _ in range(3): for index, block in enumerate(meta_data['block']):
part_file_name = image_download(url) if len(thread_pool) >= args.thread:
part_data = image_load(part_file_name) done_flag.acquire()
if part_data != b"": if not terminate_flag.is_set():
log(f"分块{index} ({len(part_data) / 1024 / 1024:.2f} MB) 已下载") thread_pool.append(threading.Thread(target=core, args=(index, block)))
f.write(part_data) thread_pool[-1].start()
os.remove(part_file_name)
break
else: else:
log(f"分块{index}校验出错") log("已终止下载, 等待线程回收")
os.remove(part_file_name) for thread in thread_pool:
return None thread.join()
log(f"{file_name}下载完毕, 耗时{int(time.time() - start_time)}秒") if terminate_flag.is_set():
md5 = calc_md5(read_in_chunks(file_name), hexdigest=True) return None
log(f"MD5: {md5}") with open(file_name, "wb") as f:
if md5 == meta_data['md5']: for index in range(len(meta_data['block'])):
log(f"{file_name}校验通过") block_file_name = block_file_name_dict[index]
return file_name f.write(image_load(block_file_name))
os.remove(block_file_name)
sha1 = calc_sha1(read_in_chunks(file_name), hexdigest=True)
log(f"SHA-1: {sha1}")
if sha1 == meta_data['sha1']:
log(f"{file_name}校验通过")
log(f"{file_name}下载完毕, 耗时{int(time.time() - start_time)}秒")
return file_name
else:
log(f"{file_name}校验未通过, SHA-1与元数据中的记录{meta_data['sha1']}不匹配")
return None
else: else:
log(f"{file_name}校验出错, MD5与元数据中的记录{meta_data['md5']}不匹配") log(f"{file_name}已存在于本地")
return None
if __name__ == "__main__": if __name__ == "__main__":
parser = argparse.ArgumentParser(prog="BiliDrive", description="Bilibili Drive", epilog="By Hsury, 2019/10/23") parser = argparse.ArgumentParser(prog="BiliDrive", description="Bilibili Drive", epilog="By Hsury, 2019/10/24")
parser.add_argument("-c", "--cookies-file", default="cookies.json", help="cookies json file name") parser.add_argument("-c", "--cookies-file", default="cookies.json", help="cookies json file name")
subparsers = parser.add_subparsers() subparsers = parser.add_subparsers()
...@@ -224,17 +296,19 @@ if __name__ == "__main__": ...@@ -224,17 +296,19 @@ if __name__ == "__main__":
login_parser.set_defaults(func=login_handle) login_parser.set_defaults(func=login_handle)
info_parser = subparsers.add_parser("info", help="get meta info") info_parser = subparsers.add_parser("info", help="get meta info")
info_parser.add_argument("url", help="meta url") info_parser.add_argument("meta", help="meta url")
info_parser.set_defaults(func=info_handle) info_parser.set_defaults(func=info_handle)
upload_parser = subparsers.add_parser("upload", help="upload a file") upload_parser = subparsers.add_parser("upload", help="upload a file")
upload_parser.add_argument("file", help="file name") upload_parser.add_argument("file", help="file name")
upload_parser.add_argument("-b", "--block-size", default=1, type=int, help="block size in MB") upload_parser.add_argument("-b", "--block-size", default=4, type=int, help="block size in MB")
upload_parser.add_argument("-t", "--thread", default=2, type=int, help="thread number")
upload_parser.set_defaults(func=upload_handle) upload_parser.set_defaults(func=upload_handle)
download_parser = subparsers.add_parser("download", help="download a file") download_parser = subparsers.add_parser("download", help="download a file")
download_parser.add_argument("url", help="meta url") download_parser.add_argument("meta", help="meta url")
download_parser.add_argument("save_as", nargs="?", default="", help="save as file name") download_parser.add_argument("file", nargs="?", default="", help="save as file name")
download_parser.add_argument("-t", "--thread", default=4, type=int, help="thread number")
download_parser.set_defaults(func=download_handle) download_parser.set_defaults(func=download_handle)
args = parser.parse_args() args = parser.parse_args()
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册