提交 8690fb95 编写于 作者: 梦想橡皮擦's avatar 梦想橡皮擦 💬

41案例,协程最后一篇

上级 1ad7750a
import time
import asyncio
import aiohttp
from bs4 import BeautifulSoup
async def get_title(semaphore, url):
async with semaphore:
print("正在采集:", url)
async with aiohttp.request('GET', url) as res:
html = await res.text()
soup = BeautifulSoup(html, 'html.parser')
title_tags = soup.find_all(attrs={'class': 'item-title'})
event_names = [item.a.text for item in title_tags]
print(event_names)
async def main():
semaphore = asyncio.Semaphore(10) # 控制每次最多执行 10 个线程
tasks = [asyncio.ensure_future(get_title(semaphore, "http://www.lishiju.net/hotevents/p{}".format(i))) for i in
range(111)]
dones, pendings = await asyncio.wait(tasks)
# for task in dones:
# print(len(task.result()))
if __name__ == '__main__':
start_time = time.perf_counter()
asyncio.run(main())
print("代码运行时间为:", time.perf_counter() - start_time)
# # 创建事件循环。
# event_loop = asyncio.get_event_loop()
# # 启动事件循环并等待协程main()结束。
# event_loop.run_until_complete(main())
# # 代码运行时间为: 2.227831242
import threading
import time
import requests
from bs4 import BeautifulSoup
class MyThread(threading.Thread):
def __init__(self, url):
threading.Thread.__init__(self)
self.__url = url
def run(self):
if semaphore.acquire(): # 计数器 -1
print("正在采集:", self.__url)
res = requests.get(url=self.__url)
soup = BeautifulSoup(res.text, 'html.parser')
title_tags = soup.find_all(attrs={'class': 'item-title'})
event_names = [item.a.text for item in title_tags]
print(event_names)
print("")
semaphore.release() # 计数器 +1
if __name__ == "__main__":
semaphore = threading.Semaphore(5) # 控制每次最多执行 5 个线程
start_time = time.perf_counter()
threads = []
for i in range(111): # 创建了110个线程。
threads.append(MyThread(url="http://www.lishiju.net/hotevents/p{}".format(i)))
for t in threads:
t.start() # 启动了110个线程。
for t in threads:
t.join() # 等待线程结束
print("累计耗时:", time.perf_counter() - start_time)
# 累计耗时: 2.8005530640000003
import time
import asyncio
import aiohttp
from bs4 import BeautifulSoup
async def get_title(session, url):
async with session.get(url) as res:
print("正在采集:", url)
html = await res.text()
soup = BeautifulSoup(html, 'html.parser')
title_tags = soup.find_all(attrs={'class': 'item-title'})
event_names = [item.a.text for item in title_tags]
print(event_names)
async def main():
connector = aiohttp.TCPConnector(limit=1) # 限制同时连接数
async with aiohttp.ClientSession(connector=connector) as session:
tasks = [asyncio.ensure_future(get_title(session, "http://www.lishiju.net/hotevents/p{}".format(i))) for i in
range(111)]
await asyncio.wait(tasks)
if __name__ == '__main__':
start_time = time.perf_counter()
asyncio.run(main())
print("代码运行时间为:", time.perf_counter() - start_time)
import threading
import time
import requests
from bs4 import BeautifulSoup
class MyThread(threading.Thread):
def __init__(self, url):
threading.Thread.__init__(self)
self.__url = url
def run(self):
print("正在采集:", self.__url)
res = requests.get(url=self.__url)
soup = BeautifulSoup(res.text, 'html.parser')
title_tags = soup.find_all(attrs={'class': 'item-title'})
event_names = [item.a.text for item in title_tags]
print(event_names)
print("")
if __name__ == "__main__":
start_time = time.perf_counter()
threads = []
for i in range(111): # 创建了110个线程。
threads.append(MyThread(url="http://www.lishiju.net/hotevents/p{}".format(i)))
for t in threads:
t.start() # 启动了110个线程。
for t in threads:
t.join() # 等待线程结束
print("累计耗时:", time.perf_counter() - start_time)
# 累计耗时: 1.537718624
......@@ -79,11 +79,11 @@
37. [python 爬虫爱好者必须掌握的知识点“ 协程爬虫”,看一下如何用 gevent 采集女生用头像](https://dream.blog.csdn.net/article/details/120421824)
38. [python协程总学不会?不可能的,边学协程边采集Coser图吧!](https://dream.blog.csdn.net/article/details/120445004)
39. 中少绘本 MP4 视频采集,asyncio 协程第3篇
40. Bensound 站 MP3 采集,asyncio + aiohttp 协程第4篇
41. 历史剧网采集,协程并发控制
39. [你是不是已经成为【爸爸程序员】了?用Python给自己的宝下载200+绘本动画吧,协程第3遍学习](https://dream.blog.csdn.net/article/details/120463479)
40. [python 协程第4课,目标数据源为 mp3 ,目标站点为 bensound.com](https://dream.blog.csdn.net/article/details/120507981)
41. [python 协程补个知识点,控制并发数,python 数据采集必会技能](https://dream.blog.csdn.net/article/details/120879805)
### 📘 scrapy 库学习
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册