提交 30eb3273 编写于 作者: F feilong

改进代码

上级 94d7588d
...@@ -9,7 +9,7 @@ from scrapy.crawler import CrawlerProcess ...@@ -9,7 +9,7 @@ from scrapy.crawler import CrawlerProcess
from scrapy.settings import Settings from scrapy.settings import Settings
class StackOverflowTagSpider(scrapy.Spider): class StackOverflowTagSpider(scrapy.Spider):
name = "vscode_tags" name = "stackoverflow_tags"
allowed_domains = ["visualstudio.com"] allowed_domains = ["visualstudio.com"]
start_urls = ['https://stackoverflow.com/tags/synonyms?page=1'] start_urls = ['https://stackoverflow.com/tags/synonyms?page=1']
custom_settings = { custom_settings = {
......
...@@ -7,29 +7,14 @@ import scrapy ...@@ -7,29 +7,14 @@ import scrapy
from scrapy.crawler import CrawlerProcess from scrapy.crawler import CrawlerProcess
from scrapy.settings import Settings from scrapy.settings import Settings
class CategoryItem(scrapy.Item):
name = scrapy.Field()
addr = scrapy.Field()
class TagItem(scrapy.Item):
name = scrapy.Field()
class VSCodeTagSpider(scrapy.Spider): class VSCodeTagSpider(scrapy.Spider):
name = "vscode_tags" name = "vscode_tags"
allowed_domains = ["visualstudio.com"] allowed_domains = ["visualstudio.com"]
# start_urls = ['https://marketplace.visualstudio.com/search?target=VSCode&category=All%20categories&sortBy=Installs'] start_urls = ['https://marketplace.visualstudio.com/search?target=VSCode&category=All%20categories&sortBy=Installs']
start_urls = ['https://stackoverflow.com/tags/synonyms?page=1']
def parse(self, response): def parse(self, response):
print('todo') print('todo')
class Categoryline(object):
def process_item(self, item, spider):
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:52.0) Gecko/20100101 Firefox/52.0'}
req = urllib.request.Request(url=item['addr'],headers=headers)
res = urllib.request.urlopen(req)
def fetch(): def fetch():
settings = Settings() settings = Settings()
process = CrawlerProcess() process = CrawlerProcess()
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册