python-3.x - python中多个页面的数据连接出错
问题描述
在连接多个页面的数据并将其导出到单个 CSV 文件中时,我遇到了错误。根据我的代码,数据最多导出到第 10 页,但在第 10 页之后它正在工作。
import urllib.request
from bs4 import BeautifulSoup
import csv
import os
from selenium import webdriver
from selenium.webdriver.support.select import Select
from selenium.webdriver.common.keys import Keys
import time
import pandas as pd
import os
url = 'http://www.igrmaharashtra.gov.in/eASR/eASRCommon.aspx?
hDistName=Buldhana'
chrome_path =
r'C:/Users/User/AppData/Local/Programs/Python/Python36/Scripts/chromedriver.exe'
d = webdriver.Chrome(executable_path=chrome_path)
d.implicitly_wait(10)
d.get(url)
Select(d.find_element_by_name('ctl00$ContentPlaceHolder5$ddlTaluka')).select_by_value('7') Select(d.find_element_by_name('ctl00$ContentPlaceHolder5$ddlVillage')).select_by_value('1464')
tableElement = d.find_element_by_id(
'ctl00_ContentPlaceHolder5_grdUrbanSubZoneWiseRate')
table = pd.read_html(tableElement.get_attribute('outerHTML'))[0]
#print(table)
table.columns = table.iloc[0]
table = table.iloc[1:]
#print(type(table))
table = table[table.Select == 'SurveyNo']
#print(table) #assumption SurveyNo exists for all wanted rows
surveyNo_scripts = [item.get_attribute('href') for item in
d.find_elements_by_css_selector("#ctl00_ContentPlaceHolder5_grdUrbanSubZoneWi seRate [href*='Select$']")]
#print(surveyNo_scripts)
i = 0
for script in surveyNo_scripts:
d.execute_script(script)
surveys = d.find_element_by_css_selector('textarea').text
table.iloc[[i],table.columns.get_loc('Select')] = surveys
i += 1
print(table)
j=2
while True:
if len(d.find_elements_by_css_selector("#ctl00_ContentPlaceHolder5_grdUrbanSubZoneWiseRate a[href*='Page${}']".format(j)))>0:
#print( d.find_elements_by_css_selector("#ctl00_ContentPlaceHolder5_grdUrbanSubZoneWiseRate a[href*='Page${}']".format(i))[0].get_attribute('href'))
d.find_elements_by_css_selector("#ctl00_ContentPlaceHolder5_grdUrbanSubZoneWiseRate a[href*='Page${}']".format(j))[0].click()
tableElement = d.find_element_by_css_selector(
"#ctl00_ContentPlaceHolder5_grdUrbanSubZoneWiseRate")
table1 = pd.read_html(tableElement.get_attribute('outerHTML'))[0]
table1.columns = table1.iloc[0]
table1 = table1.iloc[1:]
#print(type(table))
table1 = table1[table1.Select == 'SurveyNo']
#print(table) #assumption SurveyNo exists for all wanted rows
surveyNo_scripts = [item.get_attribute('href') for item in
d.find_elements_by_css_selector(
"#ctl00_ContentPlaceHolder5_grdUrbanSubZoneWiseRate
[href*='Select$']")]
#print(surveyNo_scripts)
i = 0
for script in surveyNo_scripts:
d.execute_script(script)
surveys =
d.find_element_by_css_selector('textarea').text
table1.iloc[[i],table1.columns.get_loc('Select')] =
surveys
i += 1
#print(table1)
#table = table.append(table1.reindex(columns=table.columns))
table1.columns = table.columns
table = pd.concat([table, table1] ,ignore_index=True)
print(table)
j+=1
else:
break
table.to_csv(r"C:\Users\Guest\Desktop\Sample_buldhana.csv", sep=',', encoding='utf-8-sig',index = False )
解决方案
推荐阅读
- javascript - Select random entry from string list in javascript
- python - 如何在 Google Core IoT 中进行配置?
- regex - Regex for getting content of a html property when another specific property doesn't exist
- php - 使用 axios 将表单数据发布到 php 脚本
- django - 502 Bad gateway nginx with gunicorn & django
- postgresql - 从单个要素图层中选择相交面
- spring-boot - JPA Query works only native
- python - Python:at 符号 (@) 之间的单词
- javascript - 是否可以使用 Javascript 复制到剪贴板?
- react-native - React native : can't unzip the file I get with rn-fetch-blob