提交 4e2f1d9d 编写于 作者: Love And Program's avatar Love And Program

更新CHANGELOG, TB.py

上级
from selenium import webdriver
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.by import By
from time import sleep
# =============================================================================
# 1、打开淘宝
# 2、找到登录界面登录账号
# 3、找到商品,进行购买
# 4、抢购模式,快速抢购
# =============================================================================
options = webdriver.ChromeOptions()
#隐藏window.navigator.webdriver,禁用启用Blink运行时的功能
options.add_argument("--disable-blink-features=AutomationControlled")
options.add_argument('disable-infobars')
# 设置为开发者模式,防止网站识别
#chrome 不显示正受到自动测试软件的控制
options.add_experimental_option(
'excludeSwitches', ['enable-automation'])
driver = webdriver.Chrome(options=options)
driver.maximize_window()
#driver.get("http://www.taobao.com")
#info=input("搜索内容")
driver.get("https://login.taobao.com/member/login.jhtml?spm=a21bo.jianhua.201864-2.d1.5af911d9LLFl7P&f=top&redirectURL=http%3A%2F%2Fwww.taobao.com%2F")
def Running(user,pswd):
try:
# element=driver.find_element(By.CLASS_NAME,'h').click()
element = driver.find_element(By.XPATH,'//*[@id="fm-login-id"]')
ActionChains(driver).move_to_element(element).perform()
sleep(0.5)
element.click()
element.send_keys(user)
sleep(0.5)
element = driver.find_element(By.XPATH,'//*[@id="fm-login-password"]')
ActionChains(driver).move_to_element(element).perform()
sleep(0.5)
element.click()
element.send_keys(pswd)
sleep(0.5)
# driver.back()回退一步
element = driver.find_element(By.XPATH,'//button[text()="登录"]').click()
sleep(0.5)
#当前句柄
now_handle = driver.current_window_handle
print(now_handle)
# element = driver.find_element(By.XPATH ,'//input[@name="q"]')
currentPageUrl = driver.current_url
print("当前页面的url是:", currentPageUrl)
except Exception as e:
print(e)
def Search(url):
currentPageUrl = driver.current_url
print("当前页面的url是:", currentPageUrl)
driver.get(url)
#找搜索框
element = driver.find_element(By.XPATH,'//input[@name="q"]')
element.send_keys('充电宝')
element = driver.find_element(By.XPATH ,'//button[text()="搜索"]').click()
print(element)
currentPageUrl = driver.current_url
print("当前页面的url是:", currentPageUrl)
def Find_info(hrefs_,imgs_list,texts_list):
# currentPageUrl = driver.current_url
# print("当前页面的url是:", currentPageUrl)
sleep(0.5)
for link in driver.find_elements(By.XPATH,'//div[@class="row row-2 title"]/a'):
sleep(0.1)
hrefs_.append(link.get_attribute('href'))
# print(hrefs_)
for link in driver.find_elements(By.XPATH,'//div[@class="pic"]/a/img'):
sleep(0.1)
imgs_list .append(link.get_attribute('src'))
# print(imgs_list)
for link in driver.find_elements(By.XPATH,'//div[@class="row row-2 title"]/a'):
sleep(0.1)
#信息已隐藏,可以用.is_displayed()来判断是否隐藏
texts_list.append(link.get_attribute('textContent').strip())
# print(texts_list)
#1、通过直接接入网址进入下一页进行搜索,切换到第二页。
#2、也可以输入0-100页点确定按钮
currentPageUrl = driver.current_url
number = 44
url = currentPageUrl+'&bcoffset=1&ntoffset=1&p4ppushleft=2%2C48&s={}'.format(number)
driver.get(url)
return hrefs_,imgs_list,texts_list
Running('账号','密码')
driver.implicitly_wait(10)
sleep(10)
#获取页面搜索信息
Search("http://www.taobao.com")
#找到所有id = item J_MouserOnverReq item-ad ,进行爬取
Hrefs = []
Imgs = []
Texts = []
Find_info(Hrefs,Imgs,Texts)
# =============================================================================
# 第三页
# https://s.taobao.com/search?q=%E5%85%85%E7%94%B5%E5%AE%9D&imgfile=
# &commend=all&ssid=s5-e&search_type=item&sourceId=tb.index&spm=a21bo.
# jianhua.201856-taobao-item.2&ie=utf8&initiative_id=tbindexz_20170306
# &bcoffset=-2&ntoffset=-2&p4ppushleft=2%2C48&s=88
# 第二页
# https://s.taobao.com/search?q=%E5%85%85%E7%94%B5%E5%AE%9D&imgfile=
# &commend=all&ssid=s5-e&search_type=item&sourceId=tb.index&spm=a21bo.
# jianhua.201856-taobao-item.2&ie=utf8&initiative_id=tbindexz_20170306
# &bcoffset=1&ntoffset=1&p4ppushleft=2%2C48&s=44
# 第一页
# https://s.taobao.com/search?q=%E5%85%85%E7%94%B5%E5%AE%9D&imgfile=&
# commend=all&ssid=s5-e&search_type=item&sourceId=tb.index&spm=a21bo.
# jianhua.201856-taobao-item.2&ie=utf8&initiative_id=tbindexz_20170306
# &bcoffset=4&ntoffset=4&p4ppushleft=2%2C48&s=0
# =============================================================================
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册