python - 为什么我的网站抓取不适用于逗号前有 3 位或更多位的值?
问题描述
我的 Python 程序从网站获取共享值,但当值高于 99 时不起作用。我不明白错误消息。该网站是一个 HTML,我可以在其中获取表格的值。
def loadFromWebsite(company,ISIN):
# counter to load past 4 weeks
count = 28
# generating URL to website
URL = "https://www.boerse.de/historische-kurse/{0}-Aktie/{1}".format (company, ISIN)
shareValues = []
# getting onto website
response = requests.get(URL)
# get current date
date_object = datetime.datetime.now()
# website lists values starting from previous day
start = date_object + datetime.timedelta(days = -1)
previous = start
# counting 4 weeks
while count > 0:
# Market is closed on saturdays (5) and sundays (6)
if previous.weekday() < 5:
# storing content of page
src = response.content
# create BeatifulSoup Object based on src
soup = BeautifulSoup(src, 'html.parser')
tables = soup.find_all("table")
for table in tables:
if start.strftime('%d.%m.%y') in table.text:
df = pd.read_html(str(table))[0]
# get row of the requested date
row = df[df['Datum']== previous.strftime('%d.%m.%y')].reset_index()
# add value of share (beginning of day) to values array
value = (row.loc[0,'Erster Schluss'].split()[0]).replace(',','.')
print(value)
# add date of the current value to date array
date = (previous.strftime('%d.%m.%y'))
test = []
test.append(value)
test.append(date)
# save value and date into list
shareValues.append(test)
count = count - 1
previous = previous + datetime.timedelta(days = -1)
loadFromWebsite("TecDax","DE0007203275")
错误信息:
Traceback (most recent call last):
File "C:\Users\hendr\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.8_qbz5n2kfra8p0\LocalCache\local-packages\Python38\site-packages\pandas\core\indexes\range.py", line 350,
in get_loc
return self._range.index(new_key)
ValueError: 0 is not in range
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "c:/Users/hendr/Documents/Python_Projects/loadShareValues.py", line 99, in <module>
loadFromWebsite("TecDax","DE0007203275")
File "c:/Users/hendr/Documents/Python_Projects/loadShareValues.py", line 82, in loadFromWebsite
value = (row.loc[0,'Erster Schluss'].split()[0]).replace(',','.')
File "C:\Users\hendr\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.8_qbz5n2kfra8p0\LocalCache\local-packages\Python38\site-packages\pandas\core\indexing.py", line 1762, in __getitem__
return self._getitem_tuple(key)
File
"C:\Users\hendr\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.8_qbz5n2kfra8p0\LocalCache\local-packages\Python38\site-packages\pandas\core\indexing.py", line 1272, in _getitem_tuple
return self._getitem_lowerdim(tup)
File
"C:\Users\hendr\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.8_qbz5n2kfra8p0\LocalCache\local-packages\Python38\site-packages\pandas\core\indexing.py", line 1389, in _getitem_lowerdim
section = self._getitem_axis(key, axis=i)
File
"C:\Users\hendr\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.8_qbz5n2kfra8p0\LocalCache\local-packages\Python38\site-packages\pandas\core\indexing.py", line 1965, in _getitem_axis
return self._get_label(key, axis=axis)
File
"C:\Users\hendr\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.8_qbz5n2kfra8p0\LocalCache\local-packages\Python38\site-packages\pandas\core\indexing.py", line 625, in _get_label
return self.obj._xs(label, axis=axis)
File
"C:\Users\hendr\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.8_qbz5n2kfra8p0\LocalCache\local-packages\Python38\site-packages\pandas\core\generic.py", line 3537, in xs
loc = self.index.get_loc(key)
File
"C:\Users\hendr\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.8_qbz5n2kfra8p0\LocalCache\local-packages\Python38\site-packages\pandas\core\indexes\range.py", line 352,
in get_loc
raise KeyError(key)
KeyError: 0
row:
index Datum Erster Schluss Hoch Tief Tief Schluss Volumen
0 0 18.03.20 4,95 5,10 5,45 4,91 491 510 2.197.259
解决方案
问题在于解析 date = we want '18.03.2020'
,而它正在寻找'18.03.20'
. 所以它会出现一个空行,然后您尝试拆分None
并获取 0 索引。
所以只要改变你看到'%d.%m.%y'
的地方'%d.%m.%Y'
def loadFromWebsite(company,ISIN):
# counter to load past 4 weeks
count = 28
# generating URL to website
URL = "https://www.boerse.de/historische-kurse/{0}-Aktie/{1}".format (company, ISIN)
shareValues = []
# getting onto website
response = requests.get(URL)
# get current date
date_object = datetime.datetime.now()
# website lists values starting from previous day
start = date_object + datetime.timedelta(days = -1)
previous = start
# counting 4 weeks
while count > 0:
# Market is closed on saturdays (5) and sundays (6)
if previous.weekday() < 5:
# storing content of page
src = response.content
# create BeatifulSoup Object based on src
soup = BeautifulSoup(src, 'html.parser')
tables = soup.find_all("table")
for table in tables:
if start.strftime('%d.%m.%Y') in table.text:
df = pd.read_html(str(table))[0]
# get row of the requested date
row = df[df['Datum']== previous.strftime('%d.%m.%Y')].reset_index()
# add value of share (beginning of day) to values array
value = (row.loc[0,'Erster Schluss'].split()[0]).replace(',','.')
print(value)
# add date of the current value to date array
date = (previous.strftime('%d.%m.%Y'))
test = []
test.append(value)
test.append(date)
# save value and date into list
shareValues.append(test)
count = count - 1
previous = previous + datetime.timedelta(days = -1)
loadFromWebsite("TecDax","DE0007203275")
推荐阅读
- javascript - VM299:1 未捕获的语法错误:JSON.parse 中位置 0 处的 JSON 中的意外标记 u(
) - sql - psql:关系不存在
- mongodb - MongoDB 平衡器未启动
- node.js - 嵌套异步等待与循环结合
- macos - 什么是 NEIKEv2Provider,为什么它阻止我访问互联网?
- azure-cognitive-services - 表单识别器中的不同表单模板
- azure - Azure VM 出现预配失败问题
- excel - 将鼠标悬停在 Excel 应用程序上时出现隐藏的用户窗体
- maven - 如何在 Pom.xml 中为 Firefox 和 Internet Explorer 添加 maven 依赖项?
- html - 如何减少压痕
- UILabel html属性文本中的标签?