util.py 4.3 KB
Newer Older
W
wizardforcel 已提交
1 2 3 4 5 6 7 8 9 10
# -*- coding: utf-8 -*-

import os
import sys
from os import path
import hashlib
import types
import requests
import json
import time
W
wizardforcel 已提交
11
import tempfile
W
wizardforcel 已提交
12

W
wizardforcel 已提交
13
bundle_dir = tempfile.gettempdir()
W
wizardforcel 已提交
14 15 16

size_string = lambda byte: f"{byte / 1024 / 1024 / 1024:.2f} GB" if byte > 1024 * 1024 * 1024 else f"{byte / 1024 / 1024:.2f} MB" if byte > 1024 * 1024 else f"{byte / 1024:.2f} KB" if byte > 1024 else f"{int(byte)} B"

W
wizardforcel 已提交
17 18
def calc_hash(data, algo, hex=True):
    hasher = getattr(hashlib, algo)()
W
wizardforcel 已提交
19 20
    if hasattr(data, '__iter__') and \
       type(data) is not bytes:
W
wizardforcel 已提交
21
        for chunk in data:
W
wizardforcel 已提交
22
            hasher.update(chunk)
W
wizardforcel 已提交
23
    else:
W
wizardforcel 已提交
24 25
        hasher.update(data)
    return hasher.hexdigest() if hex else hasher.digest()
W
wizardforcel 已提交
26
    
W
wizardforcel 已提交
27 28
calc_sha1 = lambda data, hex=True: calc_hash(data, 'sha1', hex)
calc_md5 = lambda data, hex=True: calc_hash(data, 'md5', hex)
W
wizardforcel 已提交
29 30 31 32 33 34 35 36
    
def image_download(url):
    headers = {
        'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.79 Safari/537.36",
    }
    content = []
    last_chunk_time = None
    try:
W
wizardforcel 已提交
37 38
        r = request_retry('GET', url, headers=headers, stream=True)
        for chunk in r.iter_content(128 * 1024):
W
wizardforcel 已提交
39 40
            if last_chunk_time is not None and \
               time.time() - last_chunk_time > 5:
W
wizardforcel 已提交
41
                return
W
wizardforcel 已提交
42 43 44
            content.append(chunk)
            last_chunk_time = time.time()
    except:
W
wizardforcel 已提交
45
        return
W
wizardforcel 已提交
46
    return b"".join(content)
W
wizardforcel 已提交
47 48
    

W
wizardforcel 已提交
49 50 51 52 53 54 55 56 57 58
def read_history(site=None):
    fname = path.join(bundle_dir, "history.json")
    if not path.exists(fname):
        return {}
    with open(fname, encoding="utf-8") as f:
        history = json.loads(f.read())
    if not site:
        return history
    else:
        return history.get(site, {})
W
wizardforcel 已提交
59

W
wizardforcel 已提交
60
def write_history(first_4mb_sha1, meta_dict, site, url):
W
wizardforcel 已提交
61
    history = read_history()
W
wizardforcel 已提交
62 63 64
    history.setdefault(site, {})
    history[site][first_4mb_sha1] = meta_dict
    history[site][first_4mb_sha1]['url'] = url
W
wizardforcel 已提交
65 66 67
    with open(os.path.join(bundle_dir, "history.json"), "w", encoding="utf-8") as f:
        f.write(json.dumps(history, ensure_ascii=False, indent=2))
    
W
wizardforcel 已提交
68 69 70
def read_in_chunk(fname, size=4 * 1024 * 1024, cnt=-1):
    with open(fname, "rb") as f:
        idx = 0
W
wizardforcel 已提交
71
        while True:
W
wizardforcel 已提交
72 73 74 75 76
            data = f.read(size)
            if not data or (cnt != -1 and idx >= cnt):
                break
            yield data
            idx += 1
W
wizardforcel 已提交
77 78
                
def log(message):
W
wizardforcel 已提交
79 80
    print(f"[{time.strftime('%Y-%m-%d %H:%M:%S')}] {message}")
    
W
wizardforcel 已提交
81
def request_retry(method, url, retry=10, **kwargs):
W
wizardforcel 已提交
82
    kwargs.setdefault('timeout', 10)
W
wizardforcel 已提交
83 84 85 86 87 88
    for i in range(retry):
        try:
            return requests.request(method, url, **kwargs)
        except Exception as ex:
            if i == retry - 1: raise ex
            
W
wizardforcel 已提交
89 90
get_retry = lambda url, retry=10, **kwargs: request_retry('GET', url, retry, **kwargs)
post_retry = lambda url, retry=10, **kwargs: request_retry('POST', url, retry, **kwargs)
W
wizardforcel 已提交
91 92 93 94 95 96 97 98 99

def print_meta(meta_dict):
    print(f"文件名: {meta_dict['filename']}")
    print(f"大小: {size_string(meta_dict['size'])}")
    print(f"SHA-1: {meta_dict['sha1']}")
    print(f"上传时间: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(meta_dict['time']))}")
    print(f"分块数: {len(meta_dict['block'])}")
    for index, block_dict in enumerate(meta_dict['block']):
        print(f"分块{index + 1} ({size_string(block_dict['size'])}) URL: {block_dict['url']}")
W
wizardforcel 已提交
100 101 102 103 104 105
        
def block_offset(meta_dict, i):
    return sum(meta_dict['block'][j]['size'] for j in range(i))
    
def ask_overwrite():
    return (input(f"文件已存在, 是否覆盖? [y/N] ") in ["y", "Y"])
W
wizardforcel 已提交
106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122
    
def load_cookies(site=None):
    fname = path.join(bundle_dir, "cookies.json")
    if not path.exists(fname):
        return {}
    with open(fname, encoding="utf-8") as f:
        cookies = json.loads(f.read())
    if not site: 
        return cookies
    else: 
        return cookies.get(site, {})

def save_cookies(site, cookies):
    fname = path.join(bundle_dir, "cookies.json")
    full_cookies = load_cookies()
    full_cookies[site] = cookies
    with open(fname, "w", encoding="utf-8") as f:
W
wizardforcel 已提交
123 124 125 126 127 128 129 130 131
        f.write(json.dumps(full_cookies))
        
def parse_cookies(cookie_str):
    cookies = {}
    for kv in cookie_str.split('; '):
        kv = kv.split('=')
        if len(kv) != 2: continue
        cookies[kv[0]] = kv[1]
    return cookies