python - 如何前往网站的下一页 我在导航到下一页时遇到问题
问题描述
请任何人帮助我。我正在尝试浏览页面,但我不知道我的代码不起作用。我得到了第一页的产品详细信息,我想抓取网站中所有页面的详细信息。以下是我的代码,请检查您的参考。提前致谢
以下是网站链接 https://www.kotsovolos.gr/mobile-phones-gps/mobile-phones/smartphones?pageSize=60
import xlwt
from selenium import webdriver
import re
import time
from datetime import date
class kotsovolosmobiles:
def __init__(self):
self.url='https://www.kotsovolos.gr/mobile-phones-gps/mobile-phones/smartphones?pageSize=60'
self.country='GR'
self.currency='euro'
self.VAT= 'Included'
self.shipping = 'Available for shipment'
self.Pre_PromotionPrice ='N/A'
def kotsovolos(self):
wb = xlwt.Workbook()
ws = wb.add_sheet('Sheet1',cell_overwrite_ok=True)
ws.write(0,0,"Product_Url")
ws.write(0,0,"Product_Manufacturer")
ws.write(0,1,"Product_Url")
ws.write(0,2,"Product_Price")
ws.write(0,3,"Product_Model")
ws.write(0,4,"Memory")
ws.write(0,5,"Currency")
ws.write(0,6,"Color")
ws.write(0,7,"VAT")
ws.write(0,8,"Shipping Cost")
ws.write(0,9,"Pre-PromotionPrice")
ws.write(0,10,"Country")
ws.write(0,11,"Date")
ws.write(0,12,"Raw_Model")
wb.save(r"C:\Users\Karthick R\Desktop\VS code\kotsovolos.xls")
driver=webdriver.Chrome()
driver.get(self.url)
today = date.today()
time.sleep(5)
cookies = driver.find_element_by_css_selector('a[id="CybotCookiebotDialogBodyLevelButtonLevelOptinAllowAll"]')
cookies.click()
print("cookies accepted")
driver.maximize_window()
time.sleep(5)
titles = []
models = []
memorys = []
prod_prices = []
p_links =[]
p_colors = []
while True:
storage_box = []
storage_box = driver.find_elements_by_css_selector('div[class="product"]')
for storage_boxes in storage_box:
product_url = storage_boxes.find_element_by_css_selector('div[class="title"] a').get_attribute('href')
print(product_url)
p_links.append(product_url)
p_model = storage_boxes.find_element_by_css_selector('div[class="title"] a').text
print(p_model)
models.append(p_model)
manufacturer1 = p_model.split(" ")
print(manufacturer1[0])
titles.append(manufacturer1[0])
memory = []
memory = re.findall('\d+ ?[gG][bB]',p_model)
print(memory)
memory1 = str(memory).replace("['",'').replace("']",'').replace("[]",'').strip()
if "," in memory1:
arr=memory1.split(",")
for str1 in arr:
str2=str1.replace("GB", "").replace("gb", "").replace("'", "").strip()
if len(str2)!=1:
memory_str=str1
break
elif (memory1 == ""):
memory_str ='N/A'
else:
memory_str=memory1
memory_str = memory_str.replace("'", "").strip()
print(memory_str)
memorys.append(memory_str)
colors= []
prod_color = p_model.split(" ")
length = len(prod_color)
indexcolor = length-3
colors.append(prod_color[indexcolor])
color1 = str(colors).replace("['",'').replace("']",'').strip()
print(color1)
p_colors.append(color1)
p_price = storage_boxes.find_element_by_css_selector('.priceWithVat > .price').text
print(p_price)
prod_prices.append(p_price)
next = driver.find_element_by_css_selector('.pagination_next a')
time.sleep(3)
next.click()
print("next page")
time.sleep(3)
kotsovolos_gr = kotsovolosmobiles()
kotsovolos_gr.kotsovolos()
解决方案
推荐阅读
- javascript - Javascript 未检测到 div 是否正确为空
- kotlin - 带有接收器的函数的 Kotlin 扩展函数
- javascript - 在 Draft-js 中,当用户光标/选择位于该实体内时,如何将类应用于实体以突出显示它?
- rest - 在 CRNK 中过滤双向关系
- c# - 如何从外部 REST Web 服务获取记录
- amazon-web-services - Amazon Web Services 缺少实例
- django - Django通过url参数过滤列表视图不起作用
- asp.net-core - 使用 MVC/端点路由重写 URL
- jinja2 - 使用 Jinja 的 Sphinx Rtd 主题覆盖页脚
- sql-server - 选择哪个返回多个值不能按预期工作