更新douyin/douyin.py, douyin/douyin_download_ui.py, douyin/tools.py, douyin/抖音短视频合集批量下载器v0.2.exe

f946b950 · 小小明-代码实体 · b1e51542 · f946b950 · f946b950 · f946b950
4 changed file
--- a/douyin/douyin.py
+++ b/douyin/douyin.py
+import json
+import os
+import re
+import time
+from urllib.parse import unquote, urljoin
+
+import requests
+
+from tools import fetch_host_cookie
+
+
+def get_video_url(url, cookies):
+    headers = {
+        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36",
+        "referer": "https://www.douyin.com"
+    }
+    res = requests.get(url, headers=headers, cookies=cookies)
+    if res.status_code == 200:
+        RENDER_DATA, = re.findall(
+            r'<script id="RENDER_DATA" type="application/json">([^<>]+)</script>', res.text)
+        data = json.loads(unquote(RENDER_DATA))
+        key = '8' if url.find("collection") != -1 else '34'
+        if key not in data:
+            print("视频无效...")
+            return
+        detail = data[key]['aweme']['detail']
+        title = detail['desc']
+        if not title:
+            title, = re.findall("<title[^>]+>\s*([^>]+)\s*</title>", res.text)
+        video_url = urljoin(url, detail['video']['playApi'])
+        collection_urls = set(re.findall("//www.douyin.com/collection/\d+/\d+", res.text))
+        collection_urls = [urljoin("https://www.douyin.com", url) for url in collection_urls]
+        collection_urls.sort(key=lambda s: int(s[s.rfind("/") + 1:]))
+
+        collection_title = re.findall("<h2 [^>]+>([^<>]+)</h2>", res.text)[0]
+        return video_url, title, collection_urls, collection_title
+    else:
+        print('视频链接请求失败！！！')
+
+
+def format_filename(title):
+    return re.sub("[/\\\\:\\*\\?\\<\\>\\|\"\s]", "", title)
+
+
+def download_video(video_url, title, folder):
+    start_time = time.time()
+    res = requests.get(url=video_url, stream=True)
+    done_size, total = 0, int(res.headers['content-length'])
+    chunk_size = 1024 * 1024
+    title = format_filename(title)
+    file_size = round(int(res.headers['content-length']) / 1024 / 1024, 2)
+    basename = f"{title}.mp4"
+    filename = f"{folder}/{title}.mp4"
+    if os.path.exists(filename):
+        print(basename, "已存在，跳过...")
+        return
+    print("-----------------------------------")
+    print(f'开始下载文件：{basename}\n当前文件大小：{file_size}MB')
+    with open(filename, 'wb') as f:
+        for chunk in res.iter_content(chunk_size):
+            f.write(chunk)
+            done_size += len(chunk)
+            cost_time = time.time() - start_time
+            yield done_size, cost_time, total
+            # print(f"进度：{done_size / total:.2%},{done_size / cost_time / 1024 / 1024:.2f}MB/s")
+    cost_time = time.time() - start_time
+    print(f'文件：{basename} 下载完成！耗时：{cost_time:0.2f} 秒')
+
+
+# def download_douyin_collection(collection_urls, collection_title, save_dir="."):
+#     cookies = fetch_host_cookie("douyin.com")
+#     if not cookies:
+#         print("从谷歌游览器提取的douyin cookie为空，请确认本地80以上版本的谷歌游览器有访问过抖音主页")
+#         return
+#     n = len(collection_urls)
+#     print(f"该合集共解析出{n}个视频，合集名称：{collection_title}")
+#     os.makedirs(collection_title, exist_ok=True)
+#     for i, collection_url in enumerate(collection_urls, 1):
+#         num = int(collection_url[collection_url.rfind("/") + 1:])
+#         video_url, title, _, _ = get_video_url(collection_url, cookies)
+#         for done_size, cost_time, total in download_video(video_url, f"{num:>3d}. {title}", save_dir):
+#             yield i, n, done_size, cost_time, total
+#
+#
+# def download_douyin_video(video_url, title, save_dir="."):
+#     for done_size, cost_time, total in download_video(video_url, title, save_dir):
+#         yield 1, 1, done_size, cost_time, total
+
+
+def download_douyin(url, download_collection=False, save_dir="."):
+    """
+    下载抖音视频
+    :param url: 抖音视频链接
+    :param download_collection: 下载整个视频合集（假如属于合集视频）
+    :param save_dir: 存储位置
+    :return:
+    """
+    cookies = fetch_host_cookie("douyin.com")
+    if not cookies:
+        print("从谷歌游览器提取的douyin cookie为空，请确认本地80以上版本的谷歌游览器有访问过抖音主页")
+        return
+    video_url, title, collection_urls, collection_title = get_video_url(url, cookies)
+    if download_collection and collection_urls:
+        n = len(collection_urls)
+        print(f"该合集共解析出{n}个视频，合集名称：{collection_title}")
+        save_dir = save_dir.rstrip("/\\") + "\\" + collection_title
+        os.makedirs(save_dir, exist_ok=True)
+        for i, collection_url in enumerate(collection_urls, 1):
+            num = int(collection_url[collection_url.rfind("/") + 1:])
+            video_url, title, _, _ = get_video_url(collection_url, cookies)
+            for done_size, cost_time, total in download_video(video_url, f"{num:>3d}. {title}", save_dir):
+                yield i, n, done_size, cost_time, total
+    else:
+        if collection_urls:
+            print(f"当前视频属于合集视频，合集名称：{collection_title}")
+        for done_size, cost_time, total in download_video(video_url, title, save_dir):
+            yield 1, 1, done_size, cost_time, total
+    print("下载完成！存储路径为", save_dir)
+
+
+def parse_vdouyin_url(url):
+    r = requests.head(url)
+    suburl = r.headers['Location']
+    vid, = re.findall(r"/video/(\d+)", suburl)
+    return f"https://www.douyin.com/video/{vid}"
+
+
+if __name__ == "__main__":
+    download_douyin('https://www.douyin.com/video/7054896959680711966')
+    # fire.Fire(download_douyin)
--- a/douyin/douyin_download_ui.py
+++ b/douyin/douyin_download_ui.py
+"""
+小小明的代码
+CSDN主页：https://blog.csdn.net/as604049322
+"""
+__author__ = '小小明'
+__time__ = '2022/1/23'
+
+import os
+import re
+import sys
+
+import PySimpleGUI as sg
+import requests
+
+from douyin import get_video_url, download_douyin, parse_vdouyin_url
+from tools import get_chrome_path
+
+import subprocess as sub
+
+sg.change_look_and_feel("Python")
+# 布局设置
+layout = [
+    [sg.Text('抖音视频地址：', font=("楷体", 12)),
+     sg.In(key='url', size=(70, 1), text_color="#bb8b59",
+           default_text="https://www.douyin.com/video/6803929443069988103")],
+    [sg.Checkbox('如果是一个合集则下载整个合集', key="download_collection", default=False),
+     sg.Button('开始下载'),
+     sg.Button('清空输出'),
+     sg.Button('访问抖音主页'),
+     sg.Button('访问当前地址'),
+     ],
+    [sg.Output(size=(85, 10), key="out", text_color="#15d36a")],
+    [
+        sg.ProgressBar(1000, size=(20, 20), key='video_bar', bar_color=("#bb8b59", "#295273")),
+        sg.Text('000.0MB,00/00\n00:00<00:00', key="message_video"),
+        sg.ProgressBar(1000, size=(20, 20), key='progressbar', bar_color=("#15d36a", "#295273")),
+        sg.Text('00.00MB/00.00MB\n00.00MB/s', key="message")
+    ],
+    [sg.Text('输出目录：', font=("楷体", 12)),
+     sg.In(size=(35, 1), key="save_dir"),
+     sg.FolderBrowse('...', target='save_dir', initial_folder="."),
+     sg.Checkbox(' 下载完毕后 \n打开所在目录', key="open_folder", default=True),
+     sg.Button('打开输出目录'),
+     ],
+    [sg.Text("@小小明：https://blog.csdn.net/as604049322"), ],
+]
+
+
+def resource_path(relative_path):
+    base_path = getattr(sys, '_MEIPASS', os.path.dirname(os.path.abspath(__file__)))
+    return os.path.join(base_path, relative_path)
+
+
+window = sg.Window('抖音短视频下载器v0.2 By 小小明', layout, icon=resource_path("./douyin.ico"))
+window.finalize()
+print("注意：程序依赖80以上版本的谷歌游览器！仅支持https://v.douyin.com/和https://www.douyin.com/video/开头的抖音视频下载。")
+window["save_dir"].Update(os.path.abspath("."))
+
+while True:
+    event, values = window.read()
+    if event in (None,):
+        break
+    elif event == "开始下载":
+        url = values["url"]
+        if url.startswith("https://v.douyin.com/"):
+            url = parse_vdouyin_url(url)
+        if not url.startswith("https://www.douyin.com/video/"):
+            print("视频地址无效！！！仅支持https://www.douyin.com/video/开头的抖音地址")
+            continue
+        window["url"].Update(url)
+        total_cost_time, total_done_size, calc_cost_time = 0, 0, 0
+        if os.path.exists(values['save_dir']):
+            save_dir = os.path.abspath(values['save_dir'])
+        else:
+            save_dir = os.path.abspath(".")
+            window["save_dir"].Update(save_dir)
+        for i, n, done_size, cost_time, total in download_douyin(url, values["download_collection"],
+                                                                 save_dir):
+            window["video_bar"].Update(i * 1000 // n)
+            if done_size == total:
+                total_cost_time += cost_time
+                calc_cost_time = total_cost_time * n // i
+                total_done_size += total
+                message_video = f"{total_done_size / 1024 / 1024:.1f}MB,{i}/{n}\n{total_cost_time // 60:.0f}:{total_cost_time % 60:.0f}<{calc_cost_time // 60:.0f}:{calc_cost_time % 60:.0f}"
+            else:
+                total_cost_time_add = total_cost_time + cost_time
+                message_video = f"{total_done_size / 1024 / 1024:.1f}MB,{i}/{n}\n{total_cost_time_add // 60:.0f}:{total_cost_time_add % 60:.0f}<{calc_cost_time // 60:.0f}:{calc_cost_time % 60:.0f}"
+            window["message_video"].Update(message_video)
+            window["progressbar"].Update(done_size * 1000 // total)
+            message = f"{done_size / 1024 / 1024:.2f}MB/{total / 1024 / 1024:.2f}MB\n{done_size / cost_time / 1024 / 1024:.2f}MB/s"
+            window["message"].Update(message)
+        if values["open_folder"]:
+            sub.Popen(f"explorer {save_dir}", shell=False)
+    elif event == "清空输出":
+        window["out"].Update("")
+    elif event == "访问抖音主页":
+        chrome_path = get_chrome_path()
+        if chrome_path is None:
+            print("未获取到注册表项HKEY_CLASSES_ROOT\ChromeHTML\Application，请检查本地是否已经安装谷歌游览器！！！")
+            continue
+        sub.Popen(f'"{chrome_path}" douyin.com', shell=False)
+    elif event == "访问当前地址":
+        chrome_path = get_chrome_path()
+        if chrome_path is None:
+            print("未获取到注册表项HKEY_CLASSES_ROOT\ChromeHTML\Application，请检查本地是否已经安装谷歌游览器！！！")
+            continue
+        sub.Popen(f'"{chrome_path}" {values["url"]}', shell=False)
+    elif event == "打开输出目录":
+        if os.path.exists(values['save_dir']):
+            save_dir = os.path.abspath(values['save_dir'])
+            sub.Popen(f"explorer {save_dir}", shell=False)
+        else:
+            sg.popup("输出目录不存在！")
+            window["save_dir"].Update(os.path.abspath("."))
+window.close()
--- a/douyin/tools.py
+++ b/douyin/tools.py
+"""
+小小明的代码
+CSDN主页：https://blog.csdn.net/as604049322
+"""
+__author__ = '小小明'
+__time__ = '2022/1/23'
+
+import base64
+import json
+import os
+import sqlite3
+import subprocess
+import winreg
+
+import win32crypt
+from cryptography.hazmat.primitives.ciphers.aead import AESGCM
+
+
+def load_local_key(localStateFilePath):
+    "读取chrome保存在json文件中的key再进行base64解码和DPAPI解密得到真实的AESGCM key"
+    with open(localStateFilePath, encoding='u8') as f:
+        encrypted_key = json.load(f)['os_crypt']['encrypted_key']
+    encrypted_key_with_header = base64.b64decode(encrypted_key)
+    encrypted_key = encrypted_key_with_header[5:]
+    key = win32crypt.CryptUnprotectData(encrypted_key, None, None, None, 0)[1]
+    return key
+
+
+def decrypt_value(key, data):
+    "AESGCM解密"
+    nonce, cipherbytes = data[3:15], data[15:]
+    aesgcm = AESGCM(key)
+    plaintext = aesgcm.decrypt(nonce, cipherbytes, None).decode('u8')
+    return plaintext
+
+
+def fetch_host_cookie(host):
+    "获取指定域名下的所有cookie"
+    userDataDir = os.environ['LOCALAPPDATA'] + r'\Google\Chrome\User Data'
+    localStateFilePath = userDataDir + r'\Local State'
+    cookiepath = userDataDir + r'\Default\Cookies'
+    # 97版本已经将Cookies移动到Network目录下
+    if not os.path.exists(cookiepath) or os.stat(cookiepath).st_size == 0:
+        cookiepath = userDataDir + r'\Default\Network\Cookies'
+    # print(cookiepath)
+    sql = f"select name,encrypted_value from cookies where host_key like '%.{host}'"
+    cookies = {}
+    key = load_local_key(localStateFilePath)
+    with sqlite3.connect(cookiepath) as conn:
+        cu = conn.cursor()
+        for name, encrypted_value in cu.execute(sql).fetchall():
+            cookies[name] = decrypt_value(key, encrypted_value)
+    return cookies
+
+
+def get_chrome_path():
+    try:
+        key = winreg.OpenKey(winreg.HKEY_CLASSES_ROOT, r"ChromeHTML\Application")
+        path = winreg.QueryValueEx(key, "ApplicationIcon")[0]
+        chrome_path = path[:path.rfind(",")]
+        return chrome_path
+    except FileNotFoundError as e:
+        return
+
+
+if __name__ == '__main__':
+    # print(fetch_host_cookie("douyin.com"))
+    print(get_chrome_path())
--- a/douyin/抖音短视频合集批量下载器v0.2.exe
+++ b/douyin/抖音短视频合集批量下载器v0.2.exe