diff --git a/NO4/index.py b/NO4/index.py new file mode 100644 index 0000000000000000000000000000000000000000..767caa21d4c3eb712c6a0a6f72c25a9913ea6acb --- /dev/null +++ b/NO4/index.py @@ -0,0 +1,78 @@ +import requests +import re +import time + +# 声明 UA +headers = { + "User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36" +} +# 存储异常路径,防止出现爬取失败情况 +error_list = [] + +# 爬虫入口 +def run(): + url = "http://www.ultramanclub.com/allultraman/" + try: + # 网页访问速度慢,需要设置 timeout + res = requests.get(url=url, headers=headers, timeout=10) + res.encoding = "gb2312" + html = res.text + return get_detail_list(html) + + except Exception as e: + print("请求异常", e) + + +# 获取全部奥特曼详情页 +def get_detail_list(html): + start_index = '