首页 > 解决方案 > 如何使用 Selenium 和多个 Url 写入 csv

问题描述

import re
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
import csv

oranev = r"^(?!.*Handikaplı).*^(?!.*Yarı).*^(?!.*Alt).*Maç Sonucu.*$[\s\S]*?>1<[\s\S]*?[\>](-|\d\d*\.\d\d|\d\d*)"
oranX = r"^(?!.*Handikaplı).*^(?!.*Yarı).*^(?!.*Alt).*Maç Sonucu.*$[\s\S]*?>X<[\s\S]*?[\>](-|\d\d*\.\d\d|\d\d*)"
orandep = r"^(?!.*Handikaplı).*^(?!.*Yarı).*^(?!.*Alt).*Maç Sonucu.*$[\s\S]*?>2<[\s\S]*?[\>](-|\d\d*\.\d\d|\d\d*)"

driver = webdriver.Chrome(ChromeDriverManager().install())
driver.get("http://arsiv.mackolik.com/Mac/3495245/Boca-Juniors-Santos")

html_source = driver.page_source
driver.quit()

file = open('oranlar.csv', 'w+', newline='')
writer = csv.writer(file)
writer.writerow(['Ev', 'X', 'Dep'])



oranevoran = re.search(oranev, html_source, re.MULTILINE)
oranxoran = re.search(oranX, html_source, re.MULTILINE)
orandeporan = re.search(orandep, html_source, re.MULTILINE)
print(oranevoran.group(1))
print(oranxoran.group(1))
print(orandeporan.group(1))
writer.writerow([oranevoran.group(1), oranxoran.group(1), orandeporan.group(1)])

当我使用顶部代码时,它会在 csv 中给我

| Ev | X | Dep |
|----|---|-----|
|1.45 |3.10|4.60  |

我想自动化这个脚本。我想在完成第一个链接然后转到第二个链接时使用。这是我的其他代码。但我不知道如何产生这个。

import re
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
import csv

URLs =["http://arsiv.mackolik.com/Mac/3495245/Boca-Juniors-Santos","http://arsiv.mackolik.com/Mac/3482298/Aris-Saloniki-Volos-NFC"]


oranev = r"^(?!.*Handikaplı).*^(?!.*Yarı).*^(?!.*Alt).*Maç Sonucu.*$[\s\S]*?>1<[\s\S]*?[\>](-|\d\d*\.\d\d|\d\d*)"
oranX = r"^(?!.*Handikaplı).*^(?!.*Yarı).*^(?!.*Alt).*Maç Sonucu.*$[\s\S]*?>X<[\s\S]*?[\>](-|\d\d*\.\d\d|\d\d*)"
orandep = r"^(?!.*Handikaplı).*^(?!.*Yarı).*^(?!.*Alt).*Maç Sonucu.*$[\s\S]*?>2<[\s\S]*?[\>](-|\d\d*\.\d\d|\d\d*)"

for index , url in enumerate(URLs):  #Stack here. i can't figure how to continue

driver = webdriver.Chrome(ChromeDriverManager().install())
driver.get("url")

html_source = driver.page_source
driver.quit()

file = open('oranlar.csv', 'w+', newline='')
writer = csv.writer(file)
writer.writerow(['Ev', 'X', 'Dep'])



oranevoran = re.search(oranev, html_source, re.MULTILINE)
oranxoran = re.search(oranX, html_source, re.MULTILINE)
orandeporan = re.search(orandep, html_source, re.MULTILINE)
print(oranevoran.group(1))
print(oranxoran.group(1))
print(orandeporan.group(1))
writer.writerow([oranevoran.group(1), oranxoran.group(1), orandeporan.group(1)])

最后我想要这样的 csv 文件

| Ev | X | Dep |
|----|---|-----|
|1.45 |3.10|4.60  |
|next link number|next link number|next link number|

感谢帮助。

标签: python-3.xseleniumselenium-chromedriver

解决方案


像这样的东西应该工作:

import re
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
import csv

oranev = r"^(?!.*Handikaplı).*^(?!.*Yarı).*^(?!.*Alt).*Maç Sonucu.*$[\s\S]*?>1<[\s\S]*?[\>](-|\d\d*\.\d\d|\d\d*)"
oranX = r"^(?!.*Handikaplı).*^(?!.*Yarı).*^(?!.*Alt).*Maç Sonucu.*$[\s\S]*?>X<[\s\S]*?[\>](-|\d\d*\.\d\d|\d\d*)"
orandep = r"^(?!.*Handikaplı).*^(?!.*Yarı).*^(?!.*Alt).*Maç Sonucu.*$[\s\S]*?>2<[\s\S]*?[\>](-|\d\d*\.\d\d|\d\d*)"

driver = webdriver.Chrome(ChromeDriverManager().install())
URLs =['http://arsiv.mackolik.com/Mac/3495245/Boca-Juniors-Santos",http://arsiv.mackolik.com/Mac/3482298/Aris-Saloniki-Volos-NFC']

file = open('oranlar.csv', 'w+', newline='')
writer = csv.writer(file)
writer.writerow(['Ev', 'X', 'Dep'])

for link in URLs:
  driver.get(link)

  html_source = driver.page_source    
  oranevoran = re.search(oranev, html_source, re.MULTILINE)
  oranxoran = re.search(oranX, html_source, re.MULTILINE)
  orandeporan = re.search(orandep, html_source, re.MULTILINE)
  print(oranevoran.group(1))
  print(oranxoran.group(1))
  print(orandeporan.group(1))
  writer.writerow([oranevoran.group(1), oranxoran.group(1), 
  orandeporan.group(1)])

driver.quit()

推荐阅读