首页 > 解决方案 > Selenium Python,通过网站解析,打开新标签,然后抓取?

问题描述

from selenium import webdriver
import time
from selenium.webdriver.common.keys import Keys # keys içerisinden enter yapabilmesini sağlıyoruz
browser = webdriver.Chrome("C:/Users/EMRE/Desktop/chromedriver_win32/chromedriver.exe")
import pandas as pd
browser.get("http://event.ybu.edu.tr/kulupler/") #main url
import csv

#browser.fullscreen_window()
#time.sleep(2)


#for i in range(6): 
    #browser.execute_script('window.scrollTo(0,document.body.scrollHeight)') #sayfayı aşağıya doğru çekmek için
    #time.sleep(1)

Kulup_button = browser.find_element_by_xpath("/html/body/div[2]/div[2]/div[1]/a/div/div[1]/div") #ilk kulüp için sonra değiştir
Kulup_button.click()    
time.sleep(1)

for i in range(1): 
    browser.execute_script('window.scrollTo(0,document.body.scrollHeight)') #sayfayı aşağıya doğru çekmek için
    time.sleep(1)


kulupnames = browser.find_elements_by_xpath("/html/body/div[2]/div[2]/section/div/div[2]/div/div[1]/div/div[1]/table/tbody/tr[1]/td[2]")
kulupList=[]
for kulupname in kulupnames:
    kulupList.append(kulupname.text)  

mails = browser.find_elements_by_css_selector("#bilgiler > a.btn.bg-orange.btn-social")
MailList=[]
for mail in mails:
    MailList.append(mail.text)
    
    
FacebookAdresses = browser.find_elements_by_css_selector("#bilgiler > a.btn.bg-blue.btn-social") 
FacebookList=[] 
for FacebookAdress in FacebookAdresses: 
   FacebookList.append(FacebookAdress.text)    
    
TwitterAdresses = browser.find_elements_by_css_selector("#bilgiler > a.btn.btn-social.bg-aqua") 
TwitterList=[] 
for TwitterAdress in TwitterAdresses: 
   TwitterList.append(TwitterAdress.text) 
   
   
InstagramAdresses = browser.find_elements_by_css_selector("#bilgiler > a.btn.btn-social.bg-light-blue") 
InstagramList=[] 
for InstagramAdress in InstagramAdresses:
   InstagramList.append(InstagramAdress.text)
   

AkademikDanismanlar = browser.find_elements_by_xpath("/html/body/div[2]/div[2]/section/div/div[2]/div/div[1]/div/div[1]/table/tbody/tr[2]/td[2]")
DanismanList=[] 
for AkademikDanisman in AkademikDanismanlar: 
   DanismanList.append(AkademikDanisman.text)      

KulupBaskanlari = browser.find_elements_by_xpath("/html/body/div[2]/div[2]/section/div/div[2]/div/div[1]/div/div[1]/table/tbody/tr[3]/td[2]")
BaskanList=[] 
for KulupBaskani in KulupBaskanlari: 
   BaskanList.append(KulupBaskani.text)  

ToplamUyeler = browser.find_elements_by_xpath("/html/body/div[2]/div[2]/section/div/div[2]/div/div[1]/div/div[1]/table/tbody/tr[4]/td[2]")
UyeList=[] 
for Uye in ToplamUyeler: 
   UyeList.append(Uye.text)  

Etkinlikler = browser.find_elements_by_xpath("/html/body/div[2]/div[2]/section/div/div[2]/div/div[1]/div/div[1]/table/tbody/tr[5]/td[2]")
EtkinlikList=[] 
for Etkinlik in Etkinlikler: 
   EtkinlikList.append(Etkinlik.text)  


time.sleep(2)

browser.quit()


#DataFile = csv.writer(open('AYBU.csv','w'))
#DataFile.writerow(['KulupAdi','MailAdresi','FacebookAdresi','TwitterAdresi','InstagramAdresi','AkademikDanisman','KulupBaskani','ToplamUyeSayisi','ToplamEtkinlikSayisi'])
#DataFile.to_csv("AYBU.csv", index = False, encoding='utf-8-sig')


liste = {'kulupList':kulupList,'MailList':MailList,'FacebookList':FacebookList,'TwitterList':TwitterList,'InstagramList':InstagramList,'DanismanList':DanismanList,'BaskanList':BaskanList,'UyeList':UyeList,'EtkinlikList':EtkinlikList}
df = pd.DataFrame(data = liste)
df.to_csv("AYBU.csv", index = False, encoding='utf-8-sig')



*emphasized text*

标签: seleniumweb-scraping

解决方案


推荐阅读