pandas - 如何根据列表头中的 TH 和 PR 组合这些列表
问题描述
import pandas as pd, win32com.client as win32, fire
from colorama import Fore, init
from os.path import abspath
from timeit import default_timer as timer
def res_loop(sample="302060",
from_seat="302061",
to_seat="302065",
export_file_name="excelfile"):
f_no = str(sample)[0:2]
df = pd.read_html(f"https://msbte.org.in/DISRESLIVE2021CRSLDSEP/COV6139QS21LIVEResult/SeatNumber/{f_no}/{sample}Marksheet.html")
df_sub = df[1][0]
#print(df)
sub_name = [ df_sub[i] for i in range(2, len(df_sub)) if type( df_sub[i] ) == str ]
print(sub_name)
#print(df_sub) #Subjects Names
df_thph=df[1][1]
thph=[df_thph[i] for i in range(2,len(df_thph)) if type(df_thph[i])==str]
print(thph)
headers = []
df_heads=df[1][2]
#print(df_heads)
heads=[df_heads[i] for i in range(2,len(df_heads)) if type( df_heads[i]) ==str]
print(heads)
需要帮助构建用于存储此数据的列标题。它给出如下所示的输出
sub_name=['MAN', 'PWP','MAD', 'ETI', 'NIS', 'EDP', 'CAP']
head=['TH', 'TH', 'PR', 'TH', 'PR', 'TH', 'TH', 'PR', 'PR', 'PR']
exam=['ESE', 'PA', 'ESE', 'PA', 'ESE', 'PA', 'ESE', 'PA', 'ESE', 'PA', 'ESE', 'PA', '
ESE', 'PA', 'ESE', 'PA', 'ESE', 'PA', 'ESE', 'PA']
预期输出:
h1=['MAN-TH-ESE','MAN-TH-PA','PWP-TH-ESE','PWP-TH-PA','PWP-PR-ESE','PWP-PR-PA','MAD-TH-ESE','MAD-TH-PA','MAD-PR-ESE','MAD-PR-PA','ETI-TH-ESE','ETI-TH-PA','NIS-TH-ESE','NIS-TH-PA','NIS-PR-ESE','NIS-PR-PA','EDP-PR-ESE','EDP-PR-PA','CAP-PR-ESE','CAP-PR-PA']
我已经为一个分支手动完成了它,如果数据更改它就不起作用。
解决方案
看起来您可以.ffill()
在这些列上使用(前向填充)。然后将它们连接为列表:
import pandas as pd, win32com.client as win32, fire
from colorama import Fore, init
from os.path import abspath
from timeit import default_timer as timer
def res_loop(sample="302060",
from_seat="302061",
to_seat="302065",
export_file_name="excelfile"):
f_no = str(sample)[0:2]
url = f"https://msbte.org.in/DISRESLIVE2021CRSLDSEP/COV6139QS21LIVEResult/SeatNumber/{f_no}/{sample}Marksheet.html"
df = pd.read_html(f"https://msbte.org.in/DISRESLIVE2021CRSLDSEP/COV6139QS21LIVEResult/SeatNumber/{f_no}/{sample}Marksheet.html")
df_sub = df[1]
for col in [0, 1]:
df_sub[col] = df_sub[col].ffill()
h1 = list(df_sub.iloc[2:,0] + '-' + df_sub.iloc[2:,1] + '-' + df_sub.iloc[2:,2])
print(h1)
输出:
['MANAGEMENT-TH-ESE', 'MANAGEMENT-TH-PA', 'PROGRAMMING WITH PYTHON-TH-ESE', 'PROGRAMMING WITH PYTHON-TH-PA', 'PROGRAMMING WITH PYTHON-PR-ESE', 'PROGRAMMING WITH PYTHON-PR-PA', 'MOBILE APPLICATION DEVELOPMENT-TH-ESE', 'MOBILE APPLICATION DEVELOPMENT-TH-PA', 'MOBILE APPLICATION DEVELOPMENT-PR-ESE', 'MOBILE APPLICATION DEVELOPMENT-PR-PA', 'EMERGING TRENDS IN COMPUTER AND INFORMATION TECHNOLGY-TH-ESE', 'EMERGING TRENDS IN COMPUTER AND INFORMATION TECHNOLGY-TH-PA', 'NETWORK AND INFORMATION SECURITY-TH-ESE', 'NETWORK AND INFORMATION SECURITY-TH-PA', 'NETWORK AND INFORMATION SECURITY-PR-ESE', 'NETWORK AND INFORMATION SECURITY-PR-PA', 'ENTERPRENURESHIP DEVELOPMENT-PR-ESE', 'ENTERPRENURESHIP DEVELOPMENT-PR-PA', 'CAPSTONE PROJECT â\x80\x93EXECUTION & REPORT WRITING-PR-ESE', 'CAPSTONE PROJECT â\x80\x93EXECUTION & REPORT WRITING-PR-PA']
推荐阅读
- c - 我有一个关于发送信号的问题
- sql-server - 如何更有效地设计我的数据库和实体关系图?
- automationanywhere - 找不到“消息正文”文本框。“搜索条件”不匹配。(TYPE_1001)
- javascript - SetState 在 componentDidMount 中调用后不会重新渲染组件
- r - 根据后续行的内容有条件地替换值
- flash - 如何从网站下载 Flash 视频?
- javascript - 我试图给大写,但它不工作
- javascript - 这段 Swift 代码的 JavaScript 等价物是什么?
- java - 当我使用改造 2 拨打电话时没有收到任何回应
- php - 使用关联键插入不同的日期 - 作为比较,如果键不同(尚未声明),则插入相同的数组