From 85758e8149c90d07c9604becd6df7b9c05caeb9f Mon Sep 17 00:00:00 2001 From: 1_bit <757164220@qq.com> Date: Sun, 13 Sep 2020 22:08:16 +0800 Subject: [PATCH] Add new file --- Browser.py | 84 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) create mode 100644 Browser.py diff --git a/Browser.py b/Browser.py new file mode 100644 index 0000000..76b2665 --- /dev/null +++ b/Browser.py @@ -0,0 +1,84 @@ +from selenium import webdriver +from bs4 import BeautifulSoup +from SearchEngine import EngineConfManage +from selenium.webdriver.support.wait import WebDriverWait +from selenium.webdriver.support import expected_conditions as EC +from selenium.webdriver.common.by import By +import hashlib +import time +import xlwt + +class Browser: + def __init__(self,conf): + self.browser=webdriver.Chrome() + self.conf=conf + self.conf['kw']='' + self.engine_conf=EngineConfManage().get_Engine_conf(conf['engine']).get_conf() + #搜索内容设置 + def set_kw(self,kw): + self.conf['kw']=kw + #搜索内容写入到搜素引擎中 + def send_keyword(self): + input = self.browser.find_element_by_id(self.engine_conf['searchTextID']) + input.send_keys(self.conf['kw']) + #搜索框点击 + def click_search_btn(self): + search_btn = self.browser.find_element_by_id(self.engine_conf['searchBtnID']) + search_btn.click() + #获取搜索结果与文本 + def get_search_res_url(self): + res_link={} + WebDriverWait(self.browser,timeout=30,poll_frequency=1).until(EC.presence_of_element_located((By.ID, "page"))) + #内容通过 BeautifulSoup 解析 + content=self.browser.page_source + soup = BeautifulSoup(content, "html.parser") + search_res_list=soup.select('.'+self.engine_conf['searchContentHref_class']) + while len(res_link)100: + return False + return True +class BrowserManage(Browser): + #打开目标搜索引擎进行搜索 + def search(self): + self.browser.get(self.engine_conf['website']) #打开搜索引擎站点 + self.send_keyword() #输入搜索kw + self.click_search_btn() #点击搜索 + return self.get_search_res_url() #获取web页搜索数据 + + + \ No newline at end of file -- GitLab