python-3.x - 如何使用 Selenium 和多个 Url 写入 csv
问题描述
import re
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
import csv
oranev = r"^(?!.*Handikaplı).*^(?!.*Yarı).*^(?!.*Alt).*Maç Sonucu.*$[\s\S]*?>1<[\s\S]*?[\>](-|\d\d*\.\d\d|\d\d*)"
oranX = r"^(?!.*Handikaplı).*^(?!.*Yarı).*^(?!.*Alt).*Maç Sonucu.*$[\s\S]*?>X<[\s\S]*?[\>](-|\d\d*\.\d\d|\d\d*)"
orandep = r"^(?!.*Handikaplı).*^(?!.*Yarı).*^(?!.*Alt).*Maç Sonucu.*$[\s\S]*?>2<[\s\S]*?[\>](-|\d\d*\.\d\d|\d\d*)"
driver = webdriver.Chrome(ChromeDriverManager().install())
driver.get("http://arsiv.mackolik.com/Mac/3495245/Boca-Juniors-Santos")
html_source = driver.page_source
driver.quit()
file = open('oranlar.csv', 'w+', newline='')
writer = csv.writer(file)
writer.writerow(['Ev', 'X', 'Dep'])
oranevoran = re.search(oranev, html_source, re.MULTILINE)
oranxoran = re.search(oranX, html_source, re.MULTILINE)
orandeporan = re.search(orandep, html_source, re.MULTILINE)
print(oranevoran.group(1))
print(oranxoran.group(1))
print(orandeporan.group(1))
writer.writerow([oranevoran.group(1), oranxoran.group(1), orandeporan.group(1)])
当我使用顶部代码时,它会在 csv 中给我
| Ev | X | Dep |
|----|---|-----|
|1.45 |3.10|4.60 |
我想自动化这个脚本。我想在完成第一个链接然后转到第二个链接时使用。这是我的其他代码。但我不知道如何产生这个。
import re
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
import csv
URLs =["http://arsiv.mackolik.com/Mac/3495245/Boca-Juniors-Santos","http://arsiv.mackolik.com/Mac/3482298/Aris-Saloniki-Volos-NFC"]
oranev = r"^(?!.*Handikaplı).*^(?!.*Yarı).*^(?!.*Alt).*Maç Sonucu.*$[\s\S]*?>1<[\s\S]*?[\>](-|\d\d*\.\d\d|\d\d*)"
oranX = r"^(?!.*Handikaplı).*^(?!.*Yarı).*^(?!.*Alt).*Maç Sonucu.*$[\s\S]*?>X<[\s\S]*?[\>](-|\d\d*\.\d\d|\d\d*)"
orandep = r"^(?!.*Handikaplı).*^(?!.*Yarı).*^(?!.*Alt).*Maç Sonucu.*$[\s\S]*?>2<[\s\S]*?[\>](-|\d\d*\.\d\d|\d\d*)"
for index , url in enumerate(URLs): #Stack here. i can't figure how to continue
driver = webdriver.Chrome(ChromeDriverManager().install())
driver.get("url")
html_source = driver.page_source
driver.quit()
file = open('oranlar.csv', 'w+', newline='')
writer = csv.writer(file)
writer.writerow(['Ev', 'X', 'Dep'])
oranevoran = re.search(oranev, html_source, re.MULTILINE)
oranxoran = re.search(oranX, html_source, re.MULTILINE)
orandeporan = re.search(orandep, html_source, re.MULTILINE)
print(oranevoran.group(1))
print(oranxoran.group(1))
print(orandeporan.group(1))
writer.writerow([oranevoran.group(1), oranxoran.group(1), orandeporan.group(1)])
最后我想要这样的 csv 文件
| Ev | X | Dep |
|----|---|-----|
|1.45 |3.10|4.60 |
|next link number|next link number|next link number|
感谢帮助。
解决方案
像这样的东西应该工作:
import re
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
import csv
oranev = r"^(?!.*Handikaplı).*^(?!.*Yarı).*^(?!.*Alt).*Maç Sonucu.*$[\s\S]*?>1<[\s\S]*?[\>](-|\d\d*\.\d\d|\d\d*)"
oranX = r"^(?!.*Handikaplı).*^(?!.*Yarı).*^(?!.*Alt).*Maç Sonucu.*$[\s\S]*?>X<[\s\S]*?[\>](-|\d\d*\.\d\d|\d\d*)"
orandep = r"^(?!.*Handikaplı).*^(?!.*Yarı).*^(?!.*Alt).*Maç Sonucu.*$[\s\S]*?>2<[\s\S]*?[\>](-|\d\d*\.\d\d|\d\d*)"
driver = webdriver.Chrome(ChromeDriverManager().install())
URLs =['http://arsiv.mackolik.com/Mac/3495245/Boca-Juniors-Santos",http://arsiv.mackolik.com/Mac/3482298/Aris-Saloniki-Volos-NFC']
file = open('oranlar.csv', 'w+', newline='')
writer = csv.writer(file)
writer.writerow(['Ev', 'X', 'Dep'])
for link in URLs:
driver.get(link)
html_source = driver.page_source
oranevoran = re.search(oranev, html_source, re.MULTILINE)
oranxoran = re.search(oranX, html_source, re.MULTILINE)
orandeporan = re.search(orandep, html_source, re.MULTILINE)
print(oranevoran.group(1))
print(oranxoran.group(1))
print(orandeporan.group(1))
writer.writerow([oranevoran.group(1), oranxoran.group(1),
orandeporan.group(1)])
driver.quit()
推荐阅读
- css - Bootstrap 4 行和列属性在 Safari 中的行为不正常,对 row::before 和 ::after 的修复似乎也不适合我
- java - json 键的值是另一个有效的 json itef。这个值可以解析为String而不是把内部json解析为json吗?
- android - 是否有任何用于原生 Android 开发的 SafeArea 小部件?
- ios - INUIAddVoiceShortcutViewController 始终以英文显示
- python-3.x - 在不使用终端命令的情况下从 BioBERT 中提取固定向量?
- sql - 如何修复基于某些条件阻止插入的触发器代码
- php - 不下载怎么看pdf?
- javascript - addEventListener 点击在轻弹滑块内被多次触发
- jquery - 为什么jQuery数据表中的复选框不起作用?
- pentaho - Pentaho 6.1 BI CE - 为什么管理部分没有身份验证按钮?