From a55a654c083f806c21a39c97b4bf225371b1ea2c Mon Sep 17 00:00:00 2001 From: hjCodeCloud <7482185+hjcodecloud@user.noreply.gitee.com> Date: Wed, 16 Jun 2021 15:07:48 +0800 Subject: [PATCH] cosplay8 --- NO6/index.py | 107 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 107 insertions(+) create mode 100644 NO6/index.py diff --git a/NO6/index.py b/NO6/index.py new file mode 100644 index 0000000..32c7201 --- /dev/null +++ b/NO6/index.py @@ -0,0 +1,107 @@ +import requests +import re +import os +import time + +headers = { + "user-agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36", + "host": 'www.cosplay8.com' +} + + +def get_list(url): + """ + 获取全部详情页链接 + """ + all_list = [] + + res = requests.get(url, headers=headers) + html = res.text + pattern = re.compile('
  • ') + all_list = pattern.findall(html) + + return all_list + + +def save_img(path, title, first_img, index): + try: + # 请求图片 + img_res = requests.get( + f"http://www.cosplay8.com{first_img}", headers=headers) + img_data = img_res.content + + with open(f"{path}/{title}_{index}.png", "wb+") as f: + f.write(img_data) + except Exception as e: + print(e) + + +def get_detail(url): + res = requests.get(url=url, headers=headers) + res.encoding = "utf-8" + html = res.text + + # 拆解页码,保存第一张图片 + size_pattern = re.compile('共(\d+)页: ') + # title_pattern = re.compile('(.*?)-Cosplay中国') + title_pattern = re.compile('(.*?)-Cosplay(中国|8)') + first_img_pattern = re.compile("