python - 为什么 pandas 坚持将我的对象列转换为日期时间?
问题描述
我从一个只有字符串字段的 IBM DB2 数据源中提取数据。其中一些字段包含时间戳,但它们是字符串数据类型,因为有像“0001-01-01 00:00:00”这样的时间戳。我检查了 df.dtypes,一切都是对象。当我尝试使用 sqlalchemy 引擎通过 pandas.to_sql() 将数据帧写入 microsoft sql 表(我删除表并让 sqlalchemy 从数据帧中创建全新的数据帧)时,我收到一个错误,看起来好像 pandas 正在尝试将时间戳字段转换为日期时间,而不是将它们保留为字符串/对象。我错过了什么?
数据框:
BIL_DIV_NO object
CAS_UPC_NO object
CAT_ID object
CAT_PPS_ID object
CAT_ITM_NO object
SRC_ID object
UNT_FTR_AM object
UNT_PRC_SIZ_CD object
CAT_ADD_DT object
CAT_END_DT object
CON_UPC_NO object
UNT_OVD_CD object
UNT_LBL_QY object
SPS_RTL_FL object
ROW_UPD_TS object
DIV_CAS_STU_CD object
CAT_PPS_001_ID object
CAT_ITM_001_NO object
ROW_UPD_001_TS object
MAX_ORD_ITM_QY object
SKU_NO object
FAM_DPT_CD object
FAM_CLS_CD object
FAM_SBC_CD object
TSK_ID object
CRE_ID object
LST_UPD_ID object
LIS_CST_AM object
DIV_ACT_NO object
LIN_NO object
DIV_COM_CD object
DIV_DPT_CD object
DIV_INF_CD object
PRC_BOK_CD object
QPS_SIZ_TX object
QPS_DSC_TX object
ALT_DPT_CD object
QPS_SCN_CD object
RCM_FL object
ORL_DIV_COM_CD object
ORL_DIV_DPT_CD object
PDT_ANA_CD object
SCH_CGY_CD object
CLB_PAK_FL object
CMR_CD object
CRV_AM object
RLN_TYP_CD object
RLT_CON_UPC_NO object
DEP_LNK_CD object
SCH_CLS_CD object
MRC_FL object
dtype: object
编码:
import pandas as pd
import pyodbc as db
import urllib
import datetime
import time
import math
import sqlalchemy as sa
whs_sql = "SELECT DISTINCT SRC_ID FROM {}.{} FOR FETCH ONLY WITH UR;".format(db2_schema, table_name)
db2_conn = db.connect(
driver=db2_driver,
hostname=db2_server,
database=db2_database,
uid=db2_username,
pwd=db2_password,
protocol=db2_protocol,
port=db2_port,
autocommit=True
)
whs_nos = pd.read_sql(whs_sql, db2_conn)['SRC_ID'].values.tolist()
db2_conn.close()
engine = sa.create_engine("mssql+pyodbc:///?odbc_connect=%s" % sca_params, fast_executemany=True)
engine.execution_options(autocommit=True).execute("DROP TABLE {}.{}.{}".format(sca_sql_database, sca_sql_schema, table_name))
engine.dispose()
for whs_no in whs_nos:
db2_sql = """
SELECT
*
FROM {}.{} WHERE SRC_ID = '{}'
FOR FETCH ONLY
WITH UR;
""".format(db2_schema, table_name, whs_no)
db2_conn = db.connect(
driver=db2_driver,
hostname=db2_server,
database=db2_database,
uid=db2_username,
pwd=db2_password,
protocol=db2_protocol,
port=db2_port,
autocommit=True
)
df = pd.read_sql(db2_sql, db2_conn)
db2_conn.close()
engine = sa.create_engine("mssql+pyodbc:///?odbc_connect=%s" % sca_params, fast_executemany=True)
df.to_sql(table_name, engine, if_exists='append', index=False, schema=sca_sql_schema)
engine.dispose()
错误:
Traceback (most recent call last):
File "C:\Program Files\Python37\lib\site-packages\pandas\core\arrays\datetimes.py", line 1858, in objects_to_datetime64ns
values, tz_parsed = conversion.datetime_to_datetime64(data)
File "pandas\_libs\tslibs\conversion.pyx", line 198, in pandas._libs.tslibs.conversion.datetime_to_datetime64
File "pandas\_libs\tslibs\np_datetime.pyx", line 117, in pandas._libs.tslibs.np_datetime.check_dts_bounds
pandas._libs.tslibs.np_datetime.OutOfBoundsDatetime: Out of bounds nanosecond timestamp: 1-01-01 00:00:00
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "E:/Python_Scripts/EXT_TBL_DMPS/PIDORDEN.py", line 138, in <module>
df.to_sql(table_name, engine, if_exists='append', index=False, schema=sca_sql_schema)
File "C:\Program Files\Python37\lib\site-packages\pandas\core\generic.py", line 2663, in to_sql
method=method,
File "C:\Program Files\Python37\lib\site-packages\pandas\io\sql.py", line 521, in to_sql
method=method,
File "C:\Program Files\Python37\lib\site-packages\pandas\io\sql.py", line 1314, in to_sql
dtype=dtype,
File "C:\Program Files\Python37\lib\site-packages\pandas\io\sql.py", line 622, in __init__
self.table = self._create_table_setup()
File "C:\Program Files\Python37\lib\site-packages\pandas\io\sql.py", line 868, in _create_table_setup
column_names_and_types = self._get_column_names_and_types(self._sqlalchemy_type)
File "C:\Program Files\Python37\lib\site-packages\pandas\io\sql.py", line 860, in _get_column_names_and_types
for i in range(len(self.frame.columns))
File "C:\Program Files\Python37\lib\site-packages\pandas\io\sql.py", line 860, in <listcomp>
for i in range(len(self.frame.columns))
File "C:\Program Files\Python37\lib\site-packages\pandas\io\sql.py", line 970, in _sqlalchemy_type
if col.dt.tz is not None:
File "C:\Program Files\Python37\lib\site-packages\pandas\core\accessor.py", line 85, in _getter
return self._delegate_property_get(name)
File "C:\Program Files\Python37\lib\site-packages\pandas\core\indexes\accessors.py", line 62, in _delegate_property_get
values = self._get_values()
File "C:\Program Files\Python37\lib\site-packages\pandas\core\indexes\accessors.py", line 53, in _get_values
return DatetimeIndex(data, copy=False, name=self.name)
File "C:\Program Files\Python37\lib\site-packages\pandas\core\indexes\datetimes.py", line 253, in __new__
ambiguous=ambiguous,
File "C:\Program Files\Python37\lib\site-packages\pandas\core\arrays\datetimes.py", line 320, in _from_sequence
ambiguous=ambiguous,
File "C:\Program Files\Python37\lib\site-packages\pandas\core\arrays\datetimes.py", line 1755, in sequence_to_dt64ns
data, dayfirst=dayfirst, yearfirst=yearfirst
File "C:\Program Files\Python37\lib\site-packages\pandas\core\arrays\datetimes.py", line 1863, in objects_to_datetime64ns
raise e
File "C:\Program Files\Python37\lib\site-packages\pandas\core\arrays\datetimes.py", line 1854, in objects_to_datetime64ns
require_iso8601=require_iso8601,
File "pandas\_libs\tslib.pyx", line 481, in pandas._libs.tslib.array_to_datetime
File "pandas\_libs\tslib.pyx", line 698, in pandas._libs.tslib.array_to_datetime
File "pandas\_libs\tslib.pyx", line 694, in pandas._libs.tslib.array_to_datetime
File "pandas\_libs\tslib.pyx", line 566, in pandas._libs.tslib.array_to_datetime
File "pandas\_libs\tslibs\np_datetime.pyx", line 117, in pandas._libs.tslibs.np_datetime.check_dts_bounds
pandas._libs.tslibs.np_datetime.OutOfBoundsDatetime: Out of bounds nanosecond timestamp: 1-01-01 00:00:00
解决方案
是的,非常感谢 TiTo 的评论。答案是在 df.to_sql() 中设置 dtype 选项天哪,我觉得很愚蠢。哈哈。谢谢!!
df.to_sql(table_name,
engine,
if_exists='append',
index=False,
schema=sca_sql_schema,
dtype={'CAT_ADD_DT': sa.String,
'CAT_END_DT': sa.String,
'ROW_UPD_TS': sa.String,
'ROW_UPD_001_TS': sa.String})
推荐阅读
- libreoffice - 在 LibreOffice Calc 中,通过单击更改形状的大小
- c++ - 如何在ffmpeg(c ++)中获取视频流的格式
- c++ - How to return nullptr equivalent from a C++ function whose return type is pair?
- angular - Refresh the popup input text in Angular 9
- java - Spring - How to upload image from folder to database
- oracle - Connect to a MS Access database with SQL Developer
- react-native - error navigate with react-native-open-maps
- javascript - How to mock functions for testing using the React Testing Library + Jest
- php - sending verification email via SMTP using gmail SMTP server failed in codeigniter
- angular - On hover change the background color of item in ng-select