python - float() 参数必须是字符串或数字,而不是“时间戳”:python pandas
问题描述
我有以下命令:
from datetime import datetime
import pandas as pd
import matplotlib.pyplot as plt
data = pd.read_csv('data/C2A2_data/BinnedCsvs_d400/fb441e62df2d58994928907a91895ec62c2c42e6cd075c2700843b89.csv')
newdata = data[(data['Date'] >= '2005-01-01') & (data['Date'] <= '2014-12-12')]
datamax = newdata[newdata['Element']=='TMAX']
datamin = newdata[newdata['Element']=='TMIN']
datamax = datamax.groupby('Date').max()
datamin = datamin.groupby('Date').min()
datamax = datamax.reset_index()
datamin = datamin.reset_index()
datamax = datamax[~datamax['Date'].str.contains("02-29")]
datamin = datamin[~datamin['Date'].str.contains("02-29")]
breakoutdata = data[(data['Date'] > '2014-12-31')]
datamax2015 = breakoutdata[breakoutdata['Element']=='TMAX']
datamin2015 = breakoutdata[breakoutdata['Element']=='TMIN']
datamax2015 = datamax2015.groupby('Date').max()
datamin2015 = datamin2015.groupby('Date').min()
datamax2015 = datamax2015.reset_index()
datamin2015 = datamin2015.reset_index()
datamax2015 = datamax2015[~datamax2015['Date'].str.contains("02-29")]
datamin2015 = datamin2015[~datamin2015['Date'].str.contains("02-29")]
datamin['Date'] = pd.to_datetime(datamin['Date'])
datamin2015['Date'] = pd.to_datetime(datamin2015['Date'])
datamin["day_of_year"] = datamin["Date"].dt.dayofyear
datamin2015["day_of_year"] = datamin["Date"].dt.dayofyear
dataminappend = datamin2015.join(datamin,on="day_of_year",how="left",lsuffix="2015")
lower = dataminappend.loc[dataminappend["Data_Value2015"]<dataminappend["Data_Value"]]
datamax['Date'] = pd.to_datetime(datamax['Date'])
datamax2015['Date'] = pd.to_datetime(datamax2015['Date'])
datamax["day_of_year"] = datamax["Date"].dt.dayofyear
datamax2015["day_of_year"] = datamax["Date"].dt.dayofyear
datamaxappend = datamax2015.join(datamax,on="day_of_year",how="left",lsuffix="2015")
upper = datamaxappend.loc[datamaxappend["Data_Value2015"]>datamaxappend["Data_Value"]]
upper = upper.rename(columns={"Date2015":"Date","Data_Value2015":"Data_Value"})
lower = lower.rename(columns={"Date2015":"Date","Data_Value2015":"Data_Value"})
upper['Date'] = upper['Date'].map(lambda x: x.strftime("%d/%m/%Y, %H:%M:%S"))
lower['Date'] = lower['Date'].map(lambda x: x.strftime("%d/%m/%Y, %H:%M:%S"))
ax = plt.gca()
plt.plot(datamax['Date'],datamax['Data_Value'],color='red')
plt.plot(datamax['Date'],datamax['Data_Value'], color='blue')
plt.scatter(upper['Date'],upper['Data_Value'],color='red')
plt.scatter(lower['Date'],lower['Data_Value'], color='blue')
plt.ylabel("Temperature (degrees C)",color='navy')
plt.xlabel("Date",color='navy',labelpad=15)
plt.title('Record high and low temperatures by day (2005-2014)', alpha=1.0,color='brown',y=1.08)
ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.35),
fancybox=False,labels=['Record high','Record low'])
plt.xticks(rotation=30)
plt.fill_between(range(len(datamax['Date'])), datamax['Data_Value'], datamin['Data_Value'],color='yellow',alpha=0.8)
plt.show()
但是,我不断收到错误消息:TypeError: float() argument must be a string or a number, not 'Timestamp'
对于这一行:plt.scatter(upper['Date'],upper['Data_Value'],color='red')
有人能帮我一把吗?
我看不出我的代码有任何明显错误......
这是数据框数据的片段:
ID Date Element Data_Value
0 USW00094889 2014-11-12 TMAX 22
1 USC00208972 2009-04-29 TMIN 56
2 USC00200032 2008-05-26 TMAX 278
3 USC00205563 2005-11-11 TMAX 139
4 USC00200230 2014-02-27 TMAX -106
5 USW00014833 2010-10-01 TMAX 194
6 USC00207308 2010-06-29 TMIN 144
7 USC00203712 2005-10-04 TMAX 289
8 USW00004848 2007-12-14 TMIN -16
9 USC00200220 2011-04-21 TMAX 72
10 USC00205822 2013-01-16 TMAX 11
11 USC00205822 2008-05-29 TMIN 28
12 USC00203712 2008-10-17 TMIN 17
13 USC00205563 2006-05-14 TMAX 183
14 USC00200842 2006-05-14 TMAX 122
15 USC00205563 2014-12-07 TMAX 67
16 USC00205822 2008-09-07 TMAX 250
17 USC00205450 2006-04-22 TMIN 67
18 USC00203712 2008-02-22 TMAX -44
19 USC00205563 2015-01-03 TMIN -39
20 USC00201250 2011-03-28 TMIN -78
21 USC00200230 2008-02-10 TMAX -6
22 USC00207320 2008-02-03 TMIN -39
[165085 rows x 4 columns]
如果有人想要更简单的方法来操作上面的代码。
解决方案
- Python 代码,它返回 2005-2014 年期间按年记录的最高气温和最低气温的折线图。每天的创纪录高温和创纪录低温之间的区域应加阴影。
- 然后,将 2015 年数据的散点图叠加到 2015 年打破十年记录(2005-2014 年)记录高点或记录低点的任何点(高点和低点)。
- 删除闰年日期(即 2 月 29 日)。
/
from datetime import datetime
import pandas as pd
import matplotlib.pyplot as plt
pd.set_option("display.max_rows",None,"display.max_columns",None)
data = pd.read_csv('data/C2A2_data/BinnedCsvs_d400/fb441e62df2d58994928907a91895ec62c2c42e6cd075c2700843b89.csv')
newdata = data[(data['Date'] >= '2005-01-01') & (data['Date'] <= '2014-12-12')]
datamax = newdata[newdata['Element']=='TMAX']
datamin = newdata[newdata['Element']=='TMIN']
datamax['Date'] = pd.to_datetime(datamax['Date'])
datamin['Date'] = pd.to_datetime(datamin['Date'])
datamax["day_of_year"] = datamax["Date"].dt.dayofyear
datamax = datamax.groupby('day_of_year').max()
datamin["day_of_year"] = datamin["Date"].dt.dayofyear
datamin = datamin.groupby('day_of_year').min()
datamax = datamax.reset_index()
datamin = datamin.reset_index()
datamin['Date'] = datamin['Date'].dt.strftime('%Y-%m-%d')
datamax['Date'] = datamax['Date'].dt.strftime('%Y-%m-%d')
datamax = datamax[~datamax['Date'].str.contains("02-29")]
datamin = datamin[~datamin['Date'].str.contains("02-29")]
breakoutdata = data[(data['Date'] > '2014-12-31')]
datamax2015 = breakoutdata[breakoutdata['Element']=='TMAX']
datamin2015 = breakoutdata[breakoutdata['Element']=='TMIN']
datamax2015['Date'] = pd.to_datetime(datamax2015['Date'])
datamin2015['Date'] = pd.to_datetime(datamin2015['Date'])
datamax2015["day_of_year"] = datamax2015["Date"].dt.dayofyear
datamax2015 = datamax2015.groupby('day_of_year').max()
datamin2015["day_of_year"] = datamin2015["Date"].dt.dayofyear
datamin2015 = datamin2015.groupby('day_of_year').min()
datamax2015 = datamax2015.reset_index()
datamin2015 = datamin2015.reset_index()
datamin2015['Date'] = datamin2015['Date'].dt.strftime('%Y-%m-%d')
datamax2015['Date'] = datamax2015['Date'].dt.strftime('%Y-%m-%d')
datamax2015 = datamax2015[~datamax2015['Date'].str.contains("02-29")]
datamin2015 = datamin2015[~datamin2015['Date'].str.contains("02-29")]
dataminappend = datamin2015.join(datamin,on="day_of_year",rsuffix="_new")
lower = dataminappend.loc[dataminappend["Data_Value_new"]>dataminappend["Data_Value"]]
datamaxappend = datamax2015.join(datamax,on="day_of_year",rsuffix="_new")
upper = datamaxappend.loc[datamaxappend["Data_Value_new"]<datamaxappend["Data_Value"]]
upper['Date'] = pd.to_datetime(upper['Date'])
lower['Date'] = pd.to_datetime(lower['Date'])
datamax['Date'] = pd.to_datetime(datamax['Date'])
datamin['Date'] = pd.to_datetime(datamin['Date'])
ax = plt.gca()
plt.plot(datamax['day_of_year'],datamax['Data_Value'],color='red')
plt.plot(datamin['day_of_year'],datamin['Data_Value'], color='blue')
plt.scatter(upper['day_of_year'],upper['Data_Value'],color='purple')
plt.scatter(lower['day_of_year'],lower['Data_Value'], color='cyan')
plt.ylabel("Temperature (degrees C)",color='navy')
plt.xlabel("Day of the year",color='navy',labelpad=15)
plt.title('Record high and low temperatures by day between 2005-2014)', alpha=1.0,color='brown',y=1.08)
ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.35),fancybox=False,labels=['Record high','Record low'])
plt.xticks(rotation=30)
plt.fill_between(range(len(datamax['Date'])), datamax['Data_Value'], datamin['Data_Value'],color='yellow',alpha=0.8)
plt.show()
/
我已使用 Datamin['Date'] = datamin['Date'].dt.strftime('%Y-%m-%d') 将“日期”列转换为字符串。
然后我使用 upper['Date'] = pd.to_datetime(upper['Date']) 将其转换回'datetime'格式
然后我使用“年份”作为 x 值。
推荐阅读
- reactjs - useEffect 进入无限循环
- c - 在 C 中无效的用户输入应该返回哪个 errno?
- sql - 在 rails 中保存来自“SELECT name AS example”的信息
- html - 侧边栏上的链接仅显示在文本边缘
- python - 加载资源失败:net::ERR_PROXY_CONNECTION_FAILED',Selenium+python+browsermobproxy 出错
- python - 打印语句的未知行为
- c++ - 阵列旋转
- linux - Bash 脚本无法从变量中正确读取路径
- python - 如何使用具有 blit=True 的 FuncAnimation 为 Poly3DCollection 设置动画?
- python - 为什么 Map 工作但 Apply 引发 ValueError