selenium - 如何从新标签中抓取数据?
问题描述
browser.get("http://event.ybu.edu.tr/kulupler/") #main url
time.sleep(1)
browser.execute_script("window.open('http://event.ybu.edu.tr/kulup/afak', 'new window')") #open new tab
for i in range(1):
browser.execute_script('window.scrollTo(0,document.body.scrollHeight)') #sayfayı aşağıya doğru çekmek için
time.sleep(1)
kulupnames = browser.find_elements_by_xpath("/html/body/div[2]/div[2]/section/div/div[2]/div/div[1]/div/div[1]/table/tbody/tr[1]/td[2]")
kulupList=[]
for kulupname in kulupnames:
kulupList.append(kulupname.text)
mails = browser.find_elements_by_css_selector("#bilgiler > a.btn.bg-orange.btn-social")
MailList=[]
for mail in mails:
MailList.append(mail.text)
FacebookAdresses = browser.find_elements_by_css_selector("#bilgiler > a.btn.bg-blue.btn-social")
FacebookList=[]
for FacebookAdress in FacebookAdresses:
FacebookList.append(FacebookAdress.text)
TwitterAdresses = browser.find_elements_by_css_selector("#bilgiler > a.btn.btn-social.bg-aqua")
TwitterList=[]
for TwitterAdress in TwitterAdresses:
TwitterList.append(TwitterAdress.text)
InstagramAdresses = browser.find_elements_by_css_selector("#bilgiler > a.btn.btn-social.bg-light-blue")
InstagramList=[]
for InstagramAdress in InstagramAdresses:
InstagramList.append(InstagramAdress.text)
AkademikDanismanlar = browser.find_elements_by_xpath("/html/body/div[2]/div[2]/section/div/div[2]/div/div[1]/div/div[1]/table/tbody/tr[2]/td[2]")
DanismanList=[]
for AkademikDanisman in AkademikDanismanlar:
DanismanList.append(AkademikDanisman.text)
KulupBaskanlari = browser.find_elements_by_xpath("/html/body/div[2]/div[2]/section/div/div[2]/div/div[1]/div/div[1]/table/tbody/tr[3]/td[2]")
BaskanList=[]
for KulupBaskani in KulupBaskanlari:
BaskanList.append(KulupBaskani.text)
ToplamUyeler = browser.find_elements_by_xpath("/html/body/div[2]/div[2]/section/div/div[2]/div/div[1]/div/div[1]/table/tbody/tr[4]/td[2]")
UyeList=[]
for Uye in ToplamUyeler:
UyeList.append(Uye.text)
Etkinlikler = browser.find_elements_by_xpath("/html/body/div[2]/div[2]/section/div/div[2]/div/div[1]/div/div[1]/table/tbody/tr[5]/td[2]")
EtkinlikList=[]
for Etkinlik in Etkinlikler:
EtkinlikList.append(Etkinlik.text)
time.sleep(2)
browser.quit()
#DataFile = csv.writer(open('AYBU.csv','w'))
#DataFile.writerow(['KulupAdi','MailAdresi','FacebookAdresi','TwitterAdresi','InstagramAdresi','AkademikDanisman','KulupBaskani','ToplamUyeSayisi','ToplamEtkinlikSayisi'])
#DataFile.to_csv("AYBU.csv", index = False, encoding='utf-8-sig')
liste = {'kulupList':kulupList,'MailList':MailList,'FacebookList':FacebookList,'TwitterList':TwitterList,'InstagramList':InstagramList,'DanismanList':DanismanList,'BaskanList':BaskanList,'UyeList':UyeList,'EtkinlikList':EtkinlikList}
df = pd.DataFrame(data = liste)
df.to_csv("AYBU.csv", index=False, encoding='utf-8-sig')
解决方案
一般流程:
browser.get("http://event.ybu.edu.tr/kulupler/") #main url
# Open a new window
browser.execute_script("window.open('');")
# Switch to the new window
browser.switch_to.window(browser.window_handles[1])
browser.get('http://event.ybu.edu.tr/kulup/afak')
:
:
# close the active tab
browser.close()
time.sleep(3)
# Switch back to the first tab
browser.switch_to.window(browser.window_handles[0])
推荐阅读
- python - ValueError:导入sklearn时源代码字符串不能包含空字节
- python - GluonCV - 在目标检测中使用 GPU 进行推理
- ios - iOS 经常得到“10.00 秒用尽的真实(挂钟)时间余量”
- javascript - 单击菜单项时需要帮助关闭侧边栏/拉导航菜单
- machine-learning - 保存/重用基于 doc2vec 的模型以进行进一步预测
- node.js - 在ExpressJS自己的文件中提取方法:不能在模块外使用import语句
- angularjs - 如何等待我的服务功能在 AngularJS 中完成?
- android - 使用 Firebase 测试实验室为 Android 库执行检测测试?
- ruby - 如何使用 dig 方法获取嵌套属性
- r - 用 R dyplyr 有条件地重新编码/替换变量?