diff --git a/NO6/index.py b/NO6/index.py new file mode 100644 index 0000000000000000000000000000000000000000..32c7201e7f4836400ad961036e1d3929de844a6e --- /dev/null +++ b/NO6/index.py @@ -0,0 +1,107 @@ +import requests +import re +import os +import time + +headers = { + "user-agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36", + "host": 'www.cosplay8.com' +} + + +def get_list(url): + """ + 获取全部详情页链接 + """ + all_list = [] + + res = requests.get(url, headers=headers) + html = res.text + pattern = re.compile('
  • ') + all_list = pattern.findall(html) + + return all_list + + +def save_img(path, title, first_img, index): + try: + # 请求图片 + img_res = requests.get( + f"http://www.cosplay8.com{first_img}", headers=headers) + img_data = img_res.content + + with open(f"{path}/{title}_{index}.png", "wb+") as f: + f.write(img_data) + except Exception as e: + print(e) + + +def get_detail(url): + res = requests.get(url=url, headers=headers) + res.encoding = "utf-8" + html = res.text + + # 拆解页码,保存第一张图片 + size_pattern = re.compile('共(\d+)页: ') + # title_pattern = re.compile('(.*?)-Cosplay中国') + title_pattern = re.compile('(.*?)-Cosplay(中国|8)') + first_img_pattern = re.compile("