1 from selenium import webdriver 2 from selenium.webdriver.common.by import By 3 from selenium.webdriver.support import expected_conditions as EC 4 from selenium.webdriver.support.wait import WebDriverWait 5 from selenium.common.exceptions import TimeoutException 6 from pyquery import PyQuery as pq 7 import re 8 from config import * 9 import pymongo10 11 client = pymongo.MongoClient(MONGO_URL)12 db = client[MONGO_DB]13 browser = webdriver.Chrome()14 15 wait = WebDriverWait(browser, 10)16 17 18 def search():19 try:20 browser.get('https://www.taobao.com')21 input_ = wait.until(22 EC.presence_of_element_located((By.CSS_SELECTOR, '#q'))23 )24 submit = wait.until(25 EC.element_to_be_clickable((By.CSS_SELECTOR, '#J_TSearchForm > div.search-button > button')) 26 )27 28 input_.send_keys('xiaomi')29 submit.click()30 31 total = wait.until(32 EC.presence_of_element_located((By.CSS_SELECTOR, '#mainsrp-pager > div > div > div > div.total'))33 ) 34 get_products()35 return total.text 36 except TimeoutException:37 return search()38 39 def next_page(page_num):40 try:41 input_ = wait.until(42 EC.presence_of_element_located((By.CSS_SELECTOR, '#mainsrp-pager > div > div > div > div.form > input'))43 )44 submit = wait.until(45 EC.element_to_be_clickable((By.CSS_SELECTOR, '#mainsrp-pager > div > div > div > div.form > span.btn.J_Submit')) 46 )47 input_.clear()48 input_.send_keys(page_num)49 submit.click()50 wait.until(EC.text_to_be_present_in_element(51 (By.CSS_SELECTOR, '#mainsrp-pager > div > div > div > ul > li.item.active > span'), str(page_num)))52 get_products()53 except TimeoutException:54 next_page(page_num)55 56 def get_products():57 wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '#mainsrp-itemlist .items .item')))58 html = browser.page_source59 doc = pq(html)60 items = doc('#mainsrp-itemlist .items .item').items()61 for item in items:62 product ={63 'image': item.find('.pic .img').attr('src'),64 'price': item.find('.price').text(),65 'deal': item.find('.deal-cnt').text()[:-3],66 'title': item.find('.title').text(),67 'shop': item.find('.shop').text(),68 'location': item.find('.location').text()69 70 }71 print(product)72 save_to_mongo(product)73 74 def save_to_mongo(result):75 try:76 if db[MONGO_TABLE].insert(result):77 print('success save to mongodb', result)78 except Exception:79 print('error to mongo')80 81 def main():82 total = search()83 total = int(re.compile('(\d+)').search(total).group(1))84 # print(total)85 for i in range(2, total):86 next_page(i)87 browser.close()88 89 if __name__ == '__main__':90 main()
config.py
1 MONGO_URL = 'localhost'2 MONGO_DB = 'taobao'3 MONGO_TABLE = 'product'
运行结果:
数据库: