首页 > 解决方案 > 循环产品链接以抓取数据

问题描述

我想在循环后遍历所有页面的产品链接,驱动程序应该打开每个产品链接以抓取数据。对我来说,只有 1 个产品正在打开,它不会移动到下一个产品链接。帮我解决这个问题。提前致谢

import xlwt
from selenium import webdriver
import re
import time
from datetime import date

class expert:
    def __init__(self):
        self.url='https://expert938.expertonline.it/dm-IT-it/Vendita_Smartphone_W8D.aspx'
        self.country='IT'
        self.currency='euro'
        self.VAT='Included'
    def experts(self):

        wb = xlwt.Workbook()
        ws = wb.add_sheet('Sheet1',cell_overwrite_ok=True)
        ws.write(0,0,"Product_Url")
        ws.write(0,1,"Product_Manufacturer")
        ws.write(0,2,"Product_Model")
        ws.write(0,3,"Product_color")
        ws.write(0,4,"Product_memory")
        ws.write(0,5,"Product_Price")
        ws.write(0,6,"Currency")
        ws.write(0,7,"VAT")
        ws.write(0,8,"Shipping Cost")
        ws.write(0,9,"Country")
        ws.write(0,10,"Date")
        wb.save(r"C:\Users\Karthick R\Desktop\VS code\expert938.xls")
        driver=webdriver.Chrome()            
        driver.get(self.url)
        today = date.today()
        time.sleep(5)
        driver.maximize_window()                    
        while True:             
            containers = []
            flag = False
            containers =driver.find_elements_by_css_selector('div[class="col-xs-12 skywalker_riga skywalker_riga_articolo"]')                 
            for container in containers:                
                url = container.find_element_by_css_selector('div[class="text-center relative-container"]')
                urls = url.find_element_by_tag_name('a').get_attribute('href')
                product_links = []
                #print(urls)
                product_links.append(urls)
                print(product_links)
                for links in product_links:
                    driver.get(links)
                    time.sleep(10)
            break
expertit=expert()
expertit.experts()

标签: pythonselenium

解决方案


尝试从

url = container.find_element_by_css_selector('div[class="text-center relative-container"]')

url = container.find_element_by_xpath('.//div[@class="text-center relative-container"]')

推荐阅读