From 737404a1babee8d80641753c908c031c4b0e1592 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=A2=A6=E6=83=B3=E6=A9=A1=E7=9A=AE=E6=93=A6?= Date: Tue, 31 Aug 2021 15:01:34 +0800 Subject: [PATCH] =?UTF-8?q?=E8=99=8E=E7=89=99=E7=9B=B4=E6=92=AD=E6=95=B0?= =?UTF-8?q?=E6=8D=AE=E9=87=87=E9=9B=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- "NO24/\350\231\216\347\211\231.py" | 62 +++++++++++++++++++ .../\347\251\272\347\233\256\345\275\225.txt" | 0 2 files changed, 62 insertions(+) create mode 100644 "NO24/\350\231\216\347\211\231.py" create mode 100644 "NO24/\350\231\216\347\211\231/\347\251\272\347\233\256\345\275\225.txt" diff --git "a/NO24/\350\231\216\347\211\231.py" "b/NO24/\350\231\216\347\211\231.py" new file mode 100644 index 0000000..33e003a --- /dev/null +++ "b/NO24/\350\231\216\347\211\231.py" @@ -0,0 +1,62 @@ +import threading +import requests +import random + +class Common: + def __init__(self): + pass + + def get_headers(self): + uas = [ + "Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)", + "其余内容" + ] + ua = random.choice(uas) + headers = { + "user-agent": ua, + "referer": "https://www.baidu.com" + } + return headers + + +def run(index, url, semaphore, headers): + semaphore.acquire() # 加锁 + res = requests.get(url, headers=headers, timeout=5) + res.encoding = 'utf-8' + text = res.text + text = text.replace('getLiveListJsonpCallback(', '') + text = text[:-1] + # print(text) + # json_data = json.loads(text) + # print(json_data) + save(index,text) + semaphore.release() # 释放 + + +def save(index, text): + with open(f"./虎牙/{index}.json", "w", encoding="utf-8") as f: + f.write(f"{text}") + print("该URL地址数据写入完毕") + + +if __name__ == '__main__': + # 获取总页码 + first_url = 'https://www.huya.com/cache.php?m=LiveList&do=getLiveListByPage&tagAll=0&callback=&page=1' + c = Common() + res = requests.get(url=first_url, headers=c.get_headers()) + data = res.json() + if data['status'] == 200: + total_page = data['data']['totalPage'] + + url_format = 'https://www.huya.com/cache.php?m=LiveList&do=getLiveListByPage&tagAll=0&callback=&page={}' + # 拼接URL,全局共享变量 + urls = [url_format.format(i) for i in range(1, total_page)] + # 最多允许5个线程同时运行 + semaphore = threading.BoundedSemaphore(5) + for i, url in enumerate(urls): + t = threading.Thread(target=run, args=(i, url, semaphore, c.get_headers())) + t.start() + while threading.active_count() != 1: + pass + else: + print('所有线程运行完毕') diff --git "a/NO24/\350\231\216\347\211\231/\347\251\272\347\233\256\345\275\225.txt" "b/NO24/\350\231\216\347\211\231/\347\251\272\347\233\256\345\275\225.txt" new file mode 100644 index 0000000..e69de29 -- GitLab