python - 如何矢量化以下函数
问题描述
def get_project_details(pids: list, conn: pymssql._pymssql.Connection) -> dict:
pidq = ",".join([f"'{x}'" for x in pids])
query = f"SELECT * FROM VW_CSMS_PMT_ProjectDetails WHERE Project_Id in ({pidq})"
df = pd.read_sql(
query,
con=conn,
parse_dates=["StartDate", "TargetDate"],
)
out = df.apply(
lambda x: {
"id": x["Project_Id"],
"type": "project",
"name": x["Project_Name"],
"start_date": get_date_string(x["StartDate"]),
"target_date": get_date_string(x["TargetDate"]),
"estimate": x["EST_HRS"],
"status": x["status"],
"timesheet_details": tdp(x["Project_Id"], conn),
"main_task": get_main_task(x["Project_Id"], conn),
},
axis=1,
)
if isinstance(out, pd.core.series.Series):
out = out.tolist()
else:
out = None
return out
我正在尝试使这段代码更快,因为它需要永远处理,并且有人建议我使用矢量化操作,这使它更快,所以关于我应该如何应用它的任何想法?
Out 示例(对不起它的一行,但格式是 JSON):
[{"estimate": "0.00", "id": "AAM31040", "main_task": [{"estimate": "200.00", "id": "CDM01", "name": "CDM01", "start_date": "28-Oct-2020", "status": "Open", "sub_task": [{"estimate": "200.00", "id": "BDM01", "name": "BDM01", "start_date": "28-Oct-2020", "status": "Open", "target_date": "30-Jun-2021", "timesheet_details": {"actual_hours": 16.0, "actual_start_date": "06-Jan-2021", "actual_target_date": "12-Jan-2021", "approved_hours": 16.0}, "type": "subtask"}, {"estimate": "200.00", "id": "BDM02", "name": "BDM02", "start_date": "28-Oct-2020", "status": "Open", "target_date": "30-Jun-2021", "timesheet_details": {"actual_hours": 142.0, "actual_start_date": "05-Jan-2021", "actual_target_date": "07-May-2021", "approved_hours": 136.5}, "type": "subtask"}, {"estimate": "200.00", "id": "BDM03", "name": "BDM03", "start_date": "28-Oct-2020", "status": "Open", "target_date": "30-Jun-2021", "timesheet_details": {"actual_hours": 557.5, "actual_start_date": "04-Jan-2021", "actual_target_date": "06-May-2021", "approved_hours": 541.5}, "type": "subtask"}, {"estimate": "200.00", "id": "BDM04", "name": "BDM04", "start_date": "28-Oct-2020", "status": "Open", "target_date": "30-Jun-2021", "timesheet_details": {"actual_hours": 20.5, "actual_start_date": "05-Jan-2021", "actual_target_date": "09-Mar-2021", "approved_hours": 20.5}, "type": "subtask"}, {"estimate": "200.00", "id": "BDM05", "name": "BDM05", "start_date": "28-Oct-2020", "status": "Open", "target_date": "30-Jun-2021", "timesheet_details": {"actual_hours": 22.0, "actual_start_date": "04-Jan-2021", "actual_target_date": "06-Apr-2021", "approved_hours": 21.0}, "type": "subtask"}, {"estimate": "200.00", "id": "BDM06", "name": "BDM06", "start_date": "28-Oct-2020", "status": "Open", "target_date": "30-Jun-2021", "timesheet_details": {"actual_hours": 13.5, "actual_start_date": "07-Jan-2021", "actual_target_date": "09-Feb-2021", "approved_hours": 13.5}, "type": "subtask"}, {"estimate": "200.00", "id": "BDM07", "name": "BDM07", "start_date": "28-Oct-2020", "status": "Open", "target_date": "30-Jun-2021", "timesheet_details": {"actual_hours": 6.0, "actual_start_date": "11-Jan-2021", "actual_target_date": "09-Feb-2021", "approved_hours": 6.0}, "type": "subtask"}, {"estimate": "200.00", "id": "BDM08", "name": "BDM08", "start_date": "28-Oct-2020", "status": "Open", "target_date": "30-Jun-2021", "timesheet_details": {"actual_hours": 1.0, "actual_start_date": "20-Jan-2021", "actual_target_date": "20-Jan-2021", "approved_hours": 1.0}, "type": "subtask"}, {"estimate": "200.00", "id": "BDM09", "name": "BDM09", "start_date": "28-Oct-2020", "status": "Open", "target_date": "30-Jun-2021", "timesheet_details": {"actual_hours": 15.5, "actual_start_date": "06-Jan-2021", "actual_target_date": "08-Feb-2021", "approved_hours": 15.5}, "type": "subtask"}, {"estimate": "200.00", "id": "CDM", "name": "CDM", "start_date": "28-Oct-2020", "status": "Open", "target_date": "30-Jun-2021", "timesheet_details": {"actual_hours": 4.0, "actual_start_date": "12-Jan-2021", "actual_target_date": "29-Mar-2021", "approved_hours": 4.0}, "type": "subtask"}], "target_date": "30-Jun-2021", "timesheet_details": {"actual_hours": 798.0, "actual_start_date": "04-Jan-2021", "actual_target_date": "07-May-2021", "approved_hours": 775.5}, "type": "maintask"}], "name": "Digital Twin Benchmarking Data_EV and ICE Vehicles", "start_date": "28-Oct-2020", "status": "Active", "target_date": "30-Jun-2021", "timesheet_details": {"actual_hours": 798.0, "actual_start_date": "04-Jan-2021", "actual_target_date": "07-May-2021", "approved_hours": 775.5}, "type": "project"}]
主要任务:
def get_main_task(id: str, conn: pymssql._pymssql.Connection) -> dict:
df = pd.read_sql(
(
"SELECT * FROM VW_CSMS_PMT_TaskDetails "
f"WHERE ProjectID = '{id}' AND TaskType = 'Main Task'"
),
con=conn,
parse_dates=["StartDate", "EndDate"],
)
out = df.apply(
lambda x: {
"id": x["TaskCode"],
"type": "maintask",
"name": x["TaskCode"],
"start_date": get_date_string(x["StartDate"]),
"target_date": get_date_string(x["EndDate"]),
"estimate": x["EstHours"],
"status": x["Status"],
"timesheet_details": tdmt(id, x["TaskCode"], conn),
"sub_task": get_sub_task(id, x["TaskCode"], conn),
},
axis=1,
)
if isinstance(out, pd.core.series.Series):
out = out.tolist()
else:
out = None
return out
解决方案
推荐阅读
- laravel-5 - Laravel Snappy PDF 找不到文件路径
- joomla3.0 - 模块中的类别标签 (mod_articles_categories)
- python - 如何将列表中的值添加到每一行的其他列表中?
- python - 集合的复杂性
- java - 使用 Apple (Netty) 的 ServiceTalk 作为带有 Jersey 的 RESTful API 并让我们加密 HTTPS
- r - GGplot 挑战
- iis - IIS 8.5 Windows Server 2012 ASP Classic/Active-X COM DLL 写入 UNC 共享文件夹时出现问题
- php - 为什么函数 rand 在方括号中?
- php - Wordpress - 显示部分内容
- spring - JPA 存储库按函数名称查询不等于