diff --git "a/NO9/html/\346\265\213\350\257\225\347\224\250.html" "b/NO9/html/\346\265\213\350\257\225\347\224\250.html" new file mode 100644 index 0000000000000000000000000000000000000000..c4b9e5e901345df69764f73009a058487646502b --- /dev/null +++ "b/NO9/html/\346\265\213\350\257\225\347\224\250.html" @@ -0,0 +1,584 @@ + + + + + + + + +Mox.moe [Kindle漫畫|Kobo漫畫|epub漫畫] [vol.moe] + + + + +
+ + +
+ +
+ +
+ + + +
+ + + + +
+     分類: + +  全部  +  幽默  +  愛情  +  競技  +  熱血  +  格鬥  +  冒險  +  恐怖  +  生存  +  懸疑  +  偵探  +  歷史  +  戰爭  +  生活  +  勵志  +  校園  +  職場  +  美食  +  音樂舞蹈  +
+         +  機戰  +  科幻  +  魔幻  +  魔法  +  奇幻  +  神鬼  +  武俠  +  仙俠  +  治癒  +  萌系  +  宅系  +  青年  +  少年  +  少女  +  後宮  +  百合  +  偽娘  +  性轉換  +  耽美 X +
+         +  四格  +  繪本  +  輕小說改編  +  連環畫  +
+     語言: + +  全部  +  中文  +  日語  +  英文  +        地區: + +  全部  +  日本  +  歐美  +  港臺  +  大陸  +  韓國  +         + 本站支持 Kindle 設備自帶瀏覽器瀏覽下載 +
+     篇幅: + +  全部  +  短篇  +  中篇  +  長篇  +        狀態: + +  全部  +  完結  +  連載  +              + 友情鏈接 : Bookshop.tw 購買正版漫畫,台灣直郵 +
+     排序: + +  綜合排序  +  評價排名  +  熱度排序  + +  最近熱門  +  隨機排序  +  最近收錄  +  最近更新  +              + + +
+
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+  全部 +
+
+ 11430 部 / 545 頁 +
+
+ + + + + + + + + +
+
+
+
+

9.4

+
+ 名偵探柯南
[青山剛昌]
+ 第 1071-1075 話
+ 2021-06-16

+
+
+
+

9.4

+
+ 庫洛魔法使 透明牌篇
[CLAMP]
+ 第 045-054 話
+ 2021-06-25

+
+
+
+

7.8

+
+ 絕命製裁X
[佐藤ショウジ]
+ 第 21 卷
+ 2021-06-25

+
+
+
+

8.8

+
+ 賭博墮天錄-和也篇
[福本伸行]
+ 第 386-390 話
+ 2021-06-21

+
+
+
+

7.8

+
+ 美眉憋不住
[走馬燈,ソウマトウ]
+ 第 06 卷
+ 2021-06-25

+
+
+
+

8.0

+
+ 我妹的時尚宅男改造計畫
[縞野やえ]
+ 第 08 卷
+ 2021-06-25

+
+
+
+

7.2

+
+ 愛吃拉麵的小泉同學
[鳴見なる]
+ 第 08 卷
+ 2021-06-25

+
+
+
+

7.5

+
+ 食戟之靈 L'etoile
[伊藤美智子,昭時大紀,佐伯俊]
+ 第 07 卷
+ 2021-06-25

+
+
+
+

8.1

+
+ 敬啟者:我與殺手小姐結婚了
[高坂曇天]
+ 第 01 卷
+ 2021-06-25

+
+
+
+

7.3

+
+ 世界頂尖的暗殺者轉生為異世界貴族
[月夜涙,れい亜,皇ハマオ]
+ 第 01 卷
+ 2021-06-25

+
+
+
+

8.5

+
+ 戀上月犬男子
[山田南平]
+ 第 021-25.5 話
+ 2021-06-24

+
+
+
+

6.4

+
+ 小林家的龍女僕 露可亞是我的××
[クール教信者,歌麿]
+ 第 01 卷
+ 2021-06-25

+
+
+
+

7.6

+
+ 戰國千年
[藍迪漫娛,時代漫王]
+ 第 156-160 話
+ 2021-06-25

+
+
+
+

6.0

+
+ 事件記者兔兔子
[丸山薫]
+ 第 02 卷
+ 2021-06-25

+
+
+
+

7.5

+
+ 晴空與陰空
[三秋縋,loundraw]
+ 第 02 卷
+ 2021-06-25

+
+
+
+

6.0

+
+ 泡沫之夏
[明曉溪,米沙]
+ 第 03 卷
+ 2021-06-25

+
+
+
+

7.9

+
+ 今際の國のアリス
[麻生羽呂]
+ Vol. 18
+ 2021-06-24

+
+
+
+

7.8

+
+ 為醜女獻上花束
[作楽ロク]
+ 第 08 卷
+ 2021-06-25

+
+
+
+

6.4

+
+ 被瘋狂溺愛的反派大小姐~濃密性愛對象是仆從
[Yuzushiwo]
+ 第 001-007 話
+ 2021-06-25

+
+
+
+

6.0

+
+ 快樂天曆史漫談
[火鳥]
+ 第 02 卷
+ 2021-06-25

+
+
+
+

6.0

+
+ 如果作為冠軍的我成為了公主的小白臉的話。
[藍藤唯,霜降(Laplacian),杠憲太]
+ 第 01 卷
+ 2021-06-25

+
+  全部 +
+
+ 11430 部 / 545 頁 +
+
+ + + + + + + + + +
+
+ +
+ +
+
+
+
+ +
+
+
+
+
+ +
+ + + + + + + + diff --git "a/NO9/imgs/\346\265\213\350\257\225\347\224\250.jpg" "b/NO9/imgs/\346\265\213\350\257\225\347\224\250.jpg" new file mode 100644 index 0000000000000000000000000000000000000000..469fa30f17198f5c2bd6065af0bafdd04c841cf4 Binary files /dev/null and "b/NO9/imgs/\346\265\213\350\257\225\347\224\250.jpg" differ diff --git "a/NO9/\346\225\260\346\215\256\346\217\220\345\217\226\344\273\243\347\240\201.py" "b/NO9/\346\225\260\346\215\256\346\217\220\345\217\226\344\273\243\347\240\201.py" new file mode 100644 index 0000000000000000000000000000000000000000..cc38796421f69c744a7de7afa78a61ea47f8b6a0 --- /dev/null +++ "b/NO9/\346\225\260\346\215\256\346\217\220\345\217\226\344\273\243\347\240\201.py" @@ -0,0 +1,50 @@ +import os +import re +import requests + + +def reade_html(): + path = r"E:\pythonProject\test\html" + files = os.listdir(path) + + for file in files: + file_path = os.path.join(path, file) + with open(file_path, "r", encoding="utf-8") as f: + html = f.read() + img_pattern = re.compile('
(?P.*?)</a> <br /> \[(?P<author>.*?)\] <br />") + score_pattern = re.compile('<p style=".*?"><b>(.*?)</b></p>') + img_urls = img_pattern.findall(html) + details = title_pattern.findall(html) + scores = score_pattern.findall(html) + + # save(details, scores) + for index, url in enumerate(img_urls): + save_img(details[index][1], url) + + +def save(details, scores): + for index, detail in enumerate(details): + my_str = "%s,%s,%s,%s\n" % (detail[1].replace(",", ","), detail[0], detail[2].replace(",", ","), scores[index]) + with open("./comic.csv", "a+", encoding="utf-8") as f: + f.write(my_str) + + +def save_img(title, url): + print(f"正在抓取{title}--{url}") + headers = { + "User-Agent": "Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; fr) Presto/2.9.168 Version/11.52" + } + try: + res = requests.get(url, headers=headers, allow_redirects=False, timeout=10) + + data = res.content + with open(f"imgs/{title}.jpg", "wb+") as f: + f.write(data) + + except Exception as e: + print(e) + + +if __name__ == '__main__': + reade_html() diff --git "a/NO9/\351\235\231\346\200\201\351\241\265\347\210\254\345\217\226\344\273\243\347\240\201.py" "b/NO9/\351\235\231\346\200\201\351\241\265\347\210\254\345\217\226\344\273\243\347\240\201.py" new file mode 100644 index 0000000000000000000000000000000000000000..81b9aa0e46e9c3403191ac66d3557197c86a88d6 --- /dev/null +++ "b/NO9/\351\235\231\346\200\201\351\241\265\347\210\254\345\217\226\344\273\243\347\240\201.py" @@ -0,0 +1,76 @@ +import requests +import re +import threading +import time +import random + +USER_AGENTS = [ + "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; AcooBrowser; .NET CLR 1.1.4322; .NET CLR 2.0.50727)", + "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; Acoo Browser; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; .NET CLR 3.0.04506)", + "Mozilla/4.0 (compatible; MSIE 7.0; AOL 9.5; AOLBuild 4337.35; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)", + "Mozilla/5.0 (Windows; U; MSIE 9.0; Windows NT 9.0; en-US)", + "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Win64; x64; Trident/5.0; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 2.0.50727; Media Center PC 6.0)", + "Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 1.0.3705; .NET CLR 1.1.4322)", + "Mozilla/4.0 (compatible; MSIE 7.0b; Windows NT 5.2; .NET CLR 1.1.4322; .NET CLR 2.0.50727; InfoPath.2; .NET CLR 3.0.04506.30)", + "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN) AppleWebKit/523.15 (KHTML, like Gecko, Safari/419.3) Arora/0.3 (Change: 287 c9dfb30)", + "Mozilla/5.0 (X11; U; Linux; en-US) AppleWebKit/527+ (KHTML, like Gecko, Safari/419.3) Arora/0.6", + "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.2pre) Gecko/20070215 K-Ninja/2.1.1", + "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9) Gecko/20080705 Firefox/3.0 Kapiko/3.0", + "Mozilla/5.0 (X11; Linux i686; U;) Gecko/20070322 Kazehakase/0.4.5", + "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.8) Gecko Fedora/1.9.0.8-1.fc10 Kazehakase/0.5.6", + "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.20 (KHTML, like Gecko) Chrome/19.0.1036.7 Safari/535.20", + "Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; fr) Presto/2.9.168 Version/11.52", +] + + +# 循环获取 URL +def get_image(base_url, index): + headers = { + "User-Agent": random.choice(USER_AGENTS) + + } + + print(f"正在抓取{index}") + try: + res = requests.get(url=base_url, headers=headers, allow_redirects=False, timeout=10) + print(res.status_code) + while res.status_code == 302: + ip_json = requests.get("http://118.24.52.95:5010/get/", headers=headers).json() + ip = ip_json["proxy"] + proxies = { + "http": ip, + "https": ip + } + print(proxies) + res = requests.get(url=base_url, headers=headers, proxies=proxies, allow_redirects=False, timeout=10) + time.sleep(5) + print(res.status_code) + + else: + html = res.text + with open(f"html/{index}.html", "w+", encoding="utf-8") as f: + f.write(html) + + semaphore.release() + except Exception as e: + print(e) + print("睡眠10s,再去抓取") + time.sleep(10) + get_image(base_url, index) + + +if __name__ == '__main__': + num = 0 + # 最多开启5个线程 + semaphore = threading.BoundedSemaphore(5) + lst_record_threads = [] + for index in range(1, 525): + semaphore.acquire() + t = threading.Thread(target=get_image, args=( + f"https://vol.moe/l/all,all,all,sortpoint,all,all,BL/{index}.htm", index)) + t.start() + lst_record_threads.append(t) + + for rt in lst_record_threads: + rt.join()