diff --git "a/\346\241\210\344\276\2136/demo.py" "b/\346\241\210\344\276\2136/demo.py" new file mode 100644 index 0000000000000000000000000000000000000000..d9d685f352c0152ed0f6d24f0678a3b467652feb --- /dev/null +++ "b/\346\241\210\344\276\2136/demo.py" @@ -0,0 +1,99 @@ +from six import indexbytes +from http_help import R # 这个文件自己去上篇博客找,或者去 github 找 +import threading +import time +import json +import re + +img_list = [] +imgs_lock = threading.Lock() # 图片操作锁 + + +# 生产者类 +class Product(threading.Thread): + + def __init__(self): + threading.Thread.__init__(self) + + self.__headers = {"Referer": "http://image.fengniao.com/", + "Host": "image.fengniao.com", + "X-Requested-With": "XMLHttpRequest" + } + # 链接模板 + self.__start = "http://image.fengniao.com/list.php?action=getList&class_id=192&sub_classid=1587&page={}¬_in_id=0" + self.__res = R(headers=self.__headers) + + def run(self): + + # 因为不知道循环次数,所有采用while循环 + index = 2 # 起始页码设置为1 + + while True: + url = self.__start.format(index) + print("开始操作:{}".format(url)) + index += 1 + + content = self.__res.get_content(url, charset="gbk") + + if content is None: + print("数据可能已经没有了====") + continue + + time.sleep(3) # 睡眠3秒 + json_content = json.loads(content) + + if json_content["status"] == 1: + for item in json_content["data"]: + title = item["title"] + child_url = item["url"] # 获取到链接之后 + + img_content = self.__res.get_content( + child_url, charset="gbk") + + pattern = re.compile('"pic_url_1920_b":"(.*?)"') + imgs_json = pattern.findall(img_content) + if len(imgs_json) > 0: + + if imgs_lock.acquire(): + # 这个地方,我用的是字典+列表的方式,主要是想后面生成文件夹用,你可以进行改造 + img_list.append( + {"title": title, "urls": imgs_json}) + imgs_lock.release() + + +# 消费者 +class Consumer(threading.Thread): + def __init__(self): + threading.Thread.__init__(self) + self.__res = R() + + def run(self): + + while True: + if len(img_list) <= 0: + continue # 进入下一次循环 + + if imgs_lock.acquire(): + + data = img_list[0] + del img_list[0] # 删除第一项 + + imgs_lock.release() + + urls = [url.replace("\\", "") for url in data["urls"]] + + # 创建文件目录 + for item_url in urls: + try: + file = self.__res.get_file(item_url) + # 记得在项目根目录先把fengniaos文件夹创建完毕 + with open("./fengniaos/{}".format(str(time.time())+".jpg"), "wb+") as f: + f.write(file) + except Exception as e: + print(e) +if __name__ == '__main__': + p = Product() + p.start() + + c = Consumer() + c.start() \ No newline at end of file diff --git "a/\346\241\210\344\276\2136/fengniaos/1626940942.2697268.jpg" "b/\346\241\210\344\276\2136/fengniaos/1626940942.2697268.jpg" new file mode 100644 index 0000000000000000000000000000000000000000..29b98dc9cde35c144eb19e7b2b46df56ab09fd44 Binary files /dev/null and "b/\346\241\210\344\276\2136/fengniaos/1626940942.2697268.jpg" differ diff --git "a/\346\241\210\344\276\2136/fengniaos/1626940942.4334345.jpg" "b/\346\241\210\344\276\2136/fengniaos/1626940942.4334345.jpg" new file mode 100644 index 0000000000000000000000000000000000000000..d0b1adaf5c39dcde708ad1e8287336b0953ac9c3 Binary files /dev/null and "b/\346\241\210\344\276\2136/fengniaos/1626940942.4334345.jpg" differ diff --git "a/\346\241\210\344\276\2136/fengniaos/1626940942.540149.jpg" "b/\346\241\210\344\276\2136/fengniaos/1626940942.540149.jpg" new file mode 100644 index 0000000000000000000000000000000000000000..d771b27a4fea0924ad27986a6e09521692efaa06 Binary files /dev/null and "b/\346\241\210\344\276\2136/fengniaos/1626940942.540149.jpg" differ diff --git "a/\346\241\210\344\276\2136/fengniaos/1626940942.668287.jpg" "b/\346\241\210\344\276\2136/fengniaos/1626940942.668287.jpg" new file mode 100644 index 0000000000000000000000000000000000000000..06acd1ea8541d4ea74f01e95b7c967f04b41c542 Binary files /dev/null and "b/\346\241\210\344\276\2136/fengniaos/1626940942.668287.jpg" differ diff --git "a/\346\241\210\344\276\2136/fengniaos/1626940942.8817163.jpg" "b/\346\241\210\344\276\2136/fengniaos/1626940942.8817163.jpg" new file mode 100644 index 0000000000000000000000000000000000000000..0a16584f405dcfaef8ee8166b20f3c266455a952 Binary files /dev/null and "b/\346\241\210\344\276\2136/fengniaos/1626940942.8817163.jpg" differ diff --git "a/\346\241\210\344\276\2136/fengniaos/1626940943.018818.jpg" "b/\346\241\210\344\276\2136/fengniaos/1626940943.018818.jpg" new file mode 100644 index 0000000000000000000000000000000000000000..3e316c41bfe2f805fc9ec73b283e3c8d6915b6a0 Binary files /dev/null and "b/\346\241\210\344\276\2136/fengniaos/1626940943.018818.jpg" differ diff --git "a/\346\241\210\344\276\2136/fengniaos/1626940943.2291882.jpg" "b/\346\241\210\344\276\2136/fengniaos/1626940943.2291882.jpg" new file mode 100644 index 0000000000000000000000000000000000000000..5c3a89075976ee6d625bf6b4c209953a4c37e515 Binary files /dev/null and "b/\346\241\210\344\276\2136/fengniaos/1626940943.2291882.jpg" differ diff --git "a/\346\241\210\344\276\2136/fengniaos/1626940943.336363.jpg" "b/\346\241\210\344\276\2136/fengniaos/1626940943.336363.jpg" new file mode 100644 index 0000000000000000000000000000000000000000..4eecf96cc9f3694204243ec96d5094194cdd8403 Binary files /dev/null and "b/\346\241\210\344\276\2136/fengniaos/1626940943.336363.jpg" differ diff --git "a/\346\241\210\344\276\2136/fengniaos/1626940943.4812524.jpg" "b/\346\241\210\344\276\2136/fengniaos/1626940943.4812524.jpg" new file mode 100644 index 0000000000000000000000000000000000000000..708b42b4890ea211f9d39d43ae2b7b9006005773 Binary files /dev/null and "b/\346\241\210\344\276\2136/fengniaos/1626940943.4812524.jpg" differ diff --git "a/\346\241\210\344\276\2136/fengniaos/1626940943.5971122.jpg" "b/\346\241\210\344\276\2136/fengniaos/1626940943.5971122.jpg" new file mode 100644 index 0000000000000000000000000000000000000000..97e62260f51bd1ce857fc9972d4555bc4bbbab95 Binary files /dev/null and "b/\346\241\210\344\276\2136/fengniaos/1626940943.5971122.jpg" differ diff --git "a/\346\241\210\344\276\2136/fengniaos/1626940943.8396525.jpg" "b/\346\241\210\344\276\2136/fengniaos/1626940943.8396525.jpg" new file mode 100644 index 0000000000000000000000000000000000000000..20a5c3bc4fa0277f001b1bffa602a22d86658493 Binary files /dev/null and "b/\346\241\210\344\276\2136/fengniaos/1626940943.8396525.jpg" differ diff --git "a/\346\241\210\344\276\2136/fengniaos/1626940943.9953933.jpg" "b/\346\241\210\344\276\2136/fengniaos/1626940943.9953933.jpg" new file mode 100644 index 0000000000000000000000000000000000000000..a1f3155ac925b17c6be3739bbaa43ecd1f5b1249 Binary files /dev/null and "b/\346\241\210\344\276\2136/fengniaos/1626940943.9953933.jpg" differ diff --git "a/\346\241\210\344\276\2136/fengniaos/1626940944.120482.jpg" "b/\346\241\210\344\276\2136/fengniaos/1626940944.120482.jpg" new file mode 100644 index 0000000000000000000000000000000000000000..73271d7b6755ed51ffb93ac7195ff9a3ef9a7a50 Binary files /dev/null and "b/\346\241\210\344\276\2136/fengniaos/1626940944.120482.jpg" differ diff --git "a/\346\241\210\344\276\2136/fengniaos/1626940944.348597.jpg" "b/\346\241\210\344\276\2136/fengniaos/1626940944.348597.jpg" new file mode 100644 index 0000000000000000000000000000000000000000..bdbbacf1c26c4061a4fd66f95fc4e3fc4f3b0b8d Binary files /dev/null and "b/\346\241\210\344\276\2136/fengniaos/1626940944.348597.jpg" differ diff --git "a/\346\241\210\344\276\2136/fengniaos/1626940944.4909654.jpg" "b/\346\241\210\344\276\2136/fengniaos/1626940944.4909654.jpg" new file mode 100644 index 0000000000000000000000000000000000000000..74d90e0a844f1ab51ec2820990ba057d9a331038 Binary files /dev/null and "b/\346\241\210\344\276\2136/fengniaos/1626940944.4909654.jpg" differ diff --git "a/\346\241\210\344\276\2136/fengniaos/1626940944.6896877.jpg" "b/\346\241\210\344\276\2136/fengniaos/1626940944.6896877.jpg" new file mode 100644 index 0000000000000000000000000000000000000000..09e45b2aa3671e97be9f2ca42d227b0d7e0cbf06 Binary files /dev/null and "b/\346\241\210\344\276\2136/fengniaos/1626940944.6896877.jpg" differ diff --git "a/\346\241\210\344\276\2136/fengniaos/1626940944.8146625.jpg" "b/\346\241\210\344\276\2136/fengniaos/1626940944.8146625.jpg" new file mode 100644 index 0000000000000000000000000000000000000000..a55606d7a34e3a79b6a5064bad649891e03b3f35 Binary files /dev/null and "b/\346\241\210\344\276\2136/fengniaos/1626940944.8146625.jpg" differ diff --git "a/\346\241\210\344\276\2136/fengniaos/1626940944.9312336.jpg" "b/\346\241\210\344\276\2136/fengniaos/1626940944.9312336.jpg" new file mode 100644 index 0000000000000000000000000000000000000000..5948274302a8ff213c69fc0817aad42f592b95e8 Binary files /dev/null and "b/\346\241\210\344\276\2136/fengniaos/1626940944.9312336.jpg" differ diff --git "a/\346\241\210\344\276\2136/fengniaos/1626940945.1559083.jpg" "b/\346\241\210\344\276\2136/fengniaos/1626940945.1559083.jpg" new file mode 100644 index 0000000000000000000000000000000000000000..d7a723da1c59ca8e0d9c7863ce7c268c02aa9b28 Binary files /dev/null and "b/\346\241\210\344\276\2136/fengniaos/1626940945.1559083.jpg" differ diff --git "a/\346\241\210\344\276\2136/fengniaos/1626940945.27445.jpg" "b/\346\241\210\344\276\2136/fengniaos/1626940945.27445.jpg" new file mode 100644 index 0000000000000000000000000000000000000000..930345c1f9b98bde13a45fcf0e048363f52bb40c Binary files /dev/null and "b/\346\241\210\344\276\2136/fengniaos/1626940945.27445.jpg" differ diff --git "a/\346\241\210\344\276\2136/fengniaos/1626940945.4233613.jpg" "b/\346\241\210\344\276\2136/fengniaos/1626940945.4233613.jpg" new file mode 100644 index 0000000000000000000000000000000000000000..a669a8c544d95ff4315d1795ea0a8bf3f82330c1 Binary files /dev/null and "b/\346\241\210\344\276\2136/fengniaos/1626940945.4233613.jpg" differ diff --git "a/\346\241\210\344\276\2136/fengniaos/1626940945.5736575.jpg" "b/\346\241\210\344\276\2136/fengniaos/1626940945.5736575.jpg" new file mode 100644 index 0000000000000000000000000000000000000000..0938d1d4a60ae7c45f1c7b6d7acb63a33cd1e914 Binary files /dev/null and "b/\346\241\210\344\276\2136/fengniaos/1626940945.5736575.jpg" differ diff --git "a/\346\241\210\344\276\2136/fengniaos/1626940945.6770055.jpg" "b/\346\241\210\344\276\2136/fengniaos/1626940945.6770055.jpg" new file mode 100644 index 0000000000000000000000000000000000000000..a327a520933d1e9a445c6198644b5dc6cbead7b4 Binary files /dev/null and "b/\346\241\210\344\276\2136/fengniaos/1626940945.6770055.jpg" differ diff --git "a/\346\241\210\344\276\2136/fengniaos/1626940945.851344.jpg" "b/\346\241\210\344\276\2136/fengniaos/1626940945.851344.jpg" new file mode 100644 index 0000000000000000000000000000000000000000..40b6df3f3fb695243ee4f959af6eb4e5d2f0d264 Binary files /dev/null and "b/\346\241\210\344\276\2136/fengniaos/1626940945.851344.jpg" differ diff --git "a/\346\241\210\344\276\2136/fengniaos/1626940946.0003598.jpg" "b/\346\241\210\344\276\2136/fengniaos/1626940946.0003598.jpg" new file mode 100644 index 0000000000000000000000000000000000000000..fcdc2c07290b5043e145e6a2abc3951c2b6924e7 Binary files /dev/null and "b/\346\241\210\344\276\2136/fengniaos/1626940946.0003598.jpg" differ diff --git "a/\346\241\210\344\276\2136/fengniaos/1626940946.1573005.jpg" "b/\346\241\210\344\276\2136/fengniaos/1626940946.1573005.jpg" new file mode 100644 index 0000000000000000000000000000000000000000..2ffe25822322f788244380ab684db5343bc943f4 Binary files /dev/null and "b/\346\241\210\344\276\2136/fengniaos/1626940946.1573005.jpg" differ diff --git "a/\346\241\210\344\276\2136/fengniaos/1626940946.5043745.jpg" "b/\346\241\210\344\276\2136/fengniaos/1626940946.5043745.jpg" new file mode 100644 index 0000000000000000000000000000000000000000..f3c99e392128a64a902f06a95dc843deb4f01521 Binary files /dev/null and "b/\346\241\210\344\276\2136/fengniaos/1626940946.5043745.jpg" differ diff --git "a/\346\241\210\344\276\2136/fengniaos/1626940946.676191.jpg" "b/\346\241\210\344\276\2136/fengniaos/1626940946.676191.jpg" new file mode 100644 index 0000000000000000000000000000000000000000..8791db9b0af369fa56f778f34c7e1127a4e3fb42 Binary files /dev/null and "b/\346\241\210\344\276\2136/fengniaos/1626940946.676191.jpg" differ diff --git "a/\346\241\210\344\276\2136/fengniaos/1626940946.8507576.jpg" "b/\346\241\210\344\276\2136/fengniaos/1626940946.8507576.jpg" new file mode 100644 index 0000000000000000000000000000000000000000..e0776fae45ea2fc8af3bd2fcd142435e1882f597 Binary files /dev/null and "b/\346\241\210\344\276\2136/fengniaos/1626940946.8507576.jpg" differ diff --git "a/\346\241\210\344\276\2136/fengniaos/1626940947.2981198.jpg" "b/\346\241\210\344\276\2136/fengniaos/1626940947.2981198.jpg" new file mode 100644 index 0000000000000000000000000000000000000000..5da48e2904a154b94181dda80ec82ded8cb3c0ef Binary files /dev/null and "b/\346\241\210\344\276\2136/fengniaos/1626940947.2981198.jpg" differ diff --git "a/\346\241\210\344\276\2136/fengniaos/1626940947.8035967.jpg" "b/\346\241\210\344\276\2136/fengniaos/1626940947.8035967.jpg" new file mode 100644 index 0000000000000000000000000000000000000000..be8ac9f1bf648ee0bc9770c09b28e757fbd07a49 Binary files /dev/null and "b/\346\241\210\344\276\2136/fengniaos/1626940947.8035967.jpg" differ diff --git "a/\346\241\210\344\276\2136/fengniaos/1626940948.3544166.jpg" "b/\346\241\210\344\276\2136/fengniaos/1626940948.3544166.jpg" new file mode 100644 index 0000000000000000000000000000000000000000..02b1f4837ea39ee1295270bd5e7eb98a25469ea2 Binary files /dev/null and "b/\346\241\210\344\276\2136/fengniaos/1626940948.3544166.jpg" differ diff --git "a/\346\241\210\344\276\2136/http_help.py" "b/\346\241\210\344\276\2136/http_help.py" new file mode 100644 index 0000000000000000000000000000000000000000..89c3ce17a10e9394ae1cb8dac70ab441d6414f81 --- /dev/null +++ "b/\346\241\210\344\276\2136/http_help.py" @@ -0,0 +1,67 @@ +import requests +from retrying import retry +import random +import datetime + +class R: + # 类的初始化方法 + def __init__(self,method="get",params=None,headers=None,cookies=None): + self.__method = method + myheaders = self.get_headers() + if headers is not None: + myheaders.update(headers) + self.__headers = myheaders + self.__cookies = cookies + self.__params = params + + + def get_headers(self): + user_agent_list = [ \ + "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/22.0.1207.1 Safari/537.1" \ + "Mozilla/5.0 (X11; CrOS i686 2268.111.0) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.57 Safari/536.11", \ + "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1092.0 Safari/536.6", \ + "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1090.0 Safari/536.6", \ + "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/19.77.34.5 Safari/537.1", \ + "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.9 Safari/536.5", \ + "Mozilla/5.0 (Windows NT 6.0) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.36 Safari/536.5", \ + "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3", \ + "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3", \ + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3", \ + "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3", \ + "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3", \ + "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3", \ + "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3", \ + "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3", \ + "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.0 Safari/536.3", \ + "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24", \ + "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24" + ] + UserAgent = random.choice(user_agent_list) + headers = {'User-Agent': UserAgent} + return headers + + + @retry(stop_max_attempt_number=3) + def __retrying_requests(self,url): + if self.__method == "get": + response = requests.get(url,headers=self.__headers,cookies=self.__cookies,timeout=3) + else: + response = requests.post(url,params=self.__params,headers=self.__headers,cookies=self.__cookies,timeout=3) + return response.content + + + # get请求 + def get_content(self,url,charset="utf-8"): + try: + html_str = self.__retrying_requests(url).decode(charset) + except: + html_str = None + return html_str + + def get_file(self,file_url): + try: + file = self.__retrying_requests(file_url) + except: + file = None + return file +