首页 > 解决方案 > 如何使用 Pandas 重新创建 Python 代码,而不是创建带有标题和指定行的 CSV 文件

问题描述

我正在使用此代码创建一个 CSV 文件,以在行中提取包含“Data,9,record”的代码行,其中的标题对应于特定数据列中的时间戳、纬度、经度、距离、高度、速度值:

import csv

with open("assets/ride.csv", "r") as source:
    lines = source.readlines()
    
    with open("solution06.csv", "w") as new_file:
        # Write header
        new_file.write(','.join(('timestamp','latitude','longitude','distance','altitude','speed')))
        new_file.write('\n')
        
        # Iterate over all lines after original header
        for line in lines[1:]:
            line = line.split(',')
            
            # Only record lines that start with "Data,<number>,record"
            if line[0] == 'Data'and line[1] == '9' and line[2] == 'record':
                # Join the desired data by commas and write as a new line
                new_file.write(','.join(line[column - 1].strip('"') for column in (5, 8, 11, 14, 17, 20)))
                new_file.write('\n')
    
with open("solution06.csv", "r") as source:
    for line in source.readlines():
        print(line.strip('\n').split(','))

我想在上面做同样的事情,但改用 Pandas。有没有办法将 Python 代码复制到 Pandas 版本的命令中?

来自 CSV 文件的数据片段:

Type,Local Number,Message,Field 1,Value 1,Units 1,Field 2,Value 2,Units 2,Field 3,Value 3,Units 3,Field 4,Value 4,Units 4,Field 5,Value 5,Units 5,Field 6,Value 6,Units 6,Field 7,Value 7,Units 7,Field 8,Value 8,Units 8,Field 9,Value 9,Units 9,Field 10,Value 10,Units 10,Field 11,Value 11,Units 11,Field 12,Value 12,Units 12,Field 13,Value 13,Units 13,Field 14,Value 14,Units 14,Field 15,Value 15,Units 15,Field 16,Value 16,Units 16,Field 17,Value 17,Units 17,Field 18,Value 18,Units 18,Field 19,Value 19,Units 19,Field 20,Value 20,Units 20,Field 21,Value 21,Units 21,Field 22,Value 22,Units 22,Field 23,Value 23,Units 23,Field 24,Value 24,Units 24,Field 25,Value 25,Units 25,Field 26,Value 26,Units 26,Field 27,Value 27,Units 27,Field 28,Value 28,Units 28,Field 29,Value 29,Units 29,Field 30,Value 30,Units 30,Field 31,Value 31,Units 31,Field 32,Value 32,Units 32,Field 33,Value 33,Units 33,Field 34,Value 34,Units 34,Field 35,Value 35,Units 35,Field 36,Value 36,Units 36,Field 37,Value 37,Units 37,Field 38,Value 38,Units 38,Field 39,Value 39,Units 39,Field 40,Value 40,Units 40,Field 41,Value 41,Units 41,Field 42,Value 42,Units 42,Field 43,Value 43,Units 43,Field 44,Value 44,Units 44,Field 45,Value 45,Units 45,Field 46,Value 46,Units 46,Field 47,Value 47,Units 47,Field 48,Value 48,Units 48,Field 49,Value 49,Units 49,Field 50,Value 50,Units 50,Field 51,Value 51,Units 51,Field 52,Value 52,Units 52,Field 53,Value 53,Units 53,Field 54,Value 54,Units 54,Field 55,Value 55,Units 55,Field 56,Value 56,Units 56,Field 57,Value 57,Units 57,Field 58,Value 58,Units 58,Field 59,Value 59,Units 59,Field 60,Value 60,Units 60,Field 61,Value 61,Units 61,Field 62,Value 62,Units 62,Field 63,Value 63,Units 63,Field 64,Value 64,Units 64,Field 65,Value 65,Units 65,Field 66,Value 66,Units 66,Field 67,Value 67,Units 67,Field 68,Value 68,Units 68,Field 69,Value 69,Units 69,Field 70,Value 70,Units 70,Field 71,Value 71,Units 71,Field 72,Value 72,Units 72,Field 73,Value 73,Units 73,Field 74,Value 74,Units 74,Field 75,Value 75,Units 75,Field 76,Value 76,Units 76,Field 77,Value 77,Units 77,Field 78,Value 78,Units 78,Field 79,Value 79,Units 79,Field 80,Value 80,Units 80,Field 81,Value 81,Units 81,Field 82,Value 82,Units 82,
Definition,0,file_id,serial_number,1,,time_created,1,,unknown,1,,manufacturer,1,,product,1,,number,1,,type,1,,
Data,0,file_id,serial_number,"3938543757",,time_created,"896018500",,manufacturer,"1",,garmin_product,"1765",,type,"4",,,,,,,,
Definition,1,file_creator,software_version,1,,hardware_version,1,,,,,,,,,,,,,,,,,
Data,1,file_creator,software_version,"930",,,,,,,,,,,,,,,,,,,,
Definition,2,event,timestamp,1,,data,1,,event,1,,event_type,1,,event_group,1,,,,,,,,
Data,2,event,timestamp,"896018500",s,timer_trigger,"0",,event,"0",,event_type,"0",,event_group,"0",,,,,,,,
Definition,3,device_info,timestamp,1,,serial_number,1,,cum_operating_time,1,,unknown,1,,unknown,1,,unknown,1,,manufacturer,1,,product,1,,software_version,1,,battery_voltage,1,,ant_device_number,1,,device_index,1,,device_type,1,,hardware_version,1,,unknown,1,,battery_status,1,,ant_transmission_type,1,,ant_network,1,,unknown,1,,source_type,1,,
Data,3,device_info,timestamp,"896018500",s,serial_number,"3938543757",,manufacturer,"1",,garmin_product,"1765",,software_version,"9.3",,device_index,"0",,source_type,"5",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
Data,3,device_info,timestamp,"896018500",s,manufacturer,"1",,garmin_product,"1765",,software_version,"9.3",,device_index,"1",,device_type,"4",,source_type,"5",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
Definition,4,unknown,unknown,1,,unknown,1,,unknown,1,,unknown,1,,unknown,1,,unknown,1,,unknown,1,,unknown,1,,unknown,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
Data,4,unknown,unknown,"896018500",,unknown,"1",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
Definition,5,unknown,unknown,1,,unknown,1,,unknown,1,,unknown,1,,unknown,1,,unknown,1,,unknown,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
Data,5,unknown,unknown,"896018500",,unknown,"895838400",,unknown,"896421600",,unknown,"1",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
Definition,6,unknown,unknown,1,,unknown,1,,unknown,1,,unknown,1,,unknown,1,,unknown,1,,unknown,1,,unknown,1,,unknown,1,,unknown,1,,unknown,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
Data,6,unknown,unknown,"896018500",,unknown,"10319",,unknown,"1000",,unknown,"0",,unknown,"133",,unknown,"51",,unknown,"190",,unknown,"1",,unknown,"40",,unknown,"188",,unknown,"1",,,,,,,,,,,,,,,,,,,,,,,,,,,,,
Definition,7,sport,name,24,,unknown,1,,sport,1,,sub_sport,1,,unknown,1,,unknown,1,,unknown,3,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
Data,7,sport,name,"Bike",,unknown,"61",,sport,"2",,sub_sport,"0",,unknown,"1",,unknown,"0",,unknown,"0|255|0",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
Definition,8,record,timestamp,1,,distance,1,,altitude,1,,unknown,1,,unknown,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
Data,8,record,timestamp,"896018500",s,distance,"0.0",m,altitude,"286.0",m,unknown,"3930",,unknown,"1002",,enhanced_altitude,"286.0",m,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
Data,3,device_info,timestamp,"896018504",s,manufacturer,"1",,garmin_product,"1620",,software_version,"3.0",,device_index,"2",,device_type,"0",,source_type,"5",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
Data,4,unknown,unknown,"896018504",,unknown,"2",,unknown,"2",,unknown,"1",,unknown,"0",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
Definition,9,record,timestamp,1,,position_lat,1,,position_long,1,,distance,1,,altitude,1,,speed,1,,unknown,1,,unknown,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
Data,9,record,timestamp,"896018545",s,position_lat,"504719750",semicircles,position_long,"-998493490",semicircles,distance,"10.87",m,altitude,"285.79999999999995",m,speed,"1.773",m/s,unknown,"3929",,unknown,"1002",,enhanced_altitude,"285.79999999999995",m,enhanced_speed,"1.773",m/s,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
Data,9,record,timestamp,"896018560",s,position_lat,"504717676",semicircles,position_long,"-998501870",semicircles,distance,"71.85",m,altitude,"285.0",m,speed,"5.533",m/s,unknown,"3924",,unknown,"1001",,enhanced_altitude,"285.0",m,enhanced_speed,"5.533",m/s,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
Data,9,record,timestamp,"896018566",s,position_lat,"504716354",semicircles,position_long,"-998506792",semicircles,distance,"108.02",m,altitude,"284.0",m,speed,"6.485",m/s,unknown,"3919",,unknown,"1001",,enhanced_altitude,"284.0",m,enhanced_speed,"6.485",m/s,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
Data,9,record,timestamp,"896018575",s,position_lat,"504714055",semicircles,position_long,"-998515244",semicircles,distance,"170.23",m,altitude,"284.0",m,speed,"6.951",m/s,unknown,"3919",,unknown,"1001",,enhanced_altitude,"284.0",m,enhanced_speed,"6.951",m/s,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,

我需要的最终结果如下所示: 在此处输入图像描述

标签: pythonpandascsv

解决方案


这是您的解决方案:

import pandas as pd
df = pd.read_csv("your-csv.csv", header=None)

new_df = df[(df[0]=="Data") & (df[1]==9) & (df[2]=="record")]
new_df = new_df[[4,7,10,13,16,19]]
new_df = new_df.rename(columns={
    4: "timestamp", 
    7: "latitude",
    10: "longitude",
    13: "distance",
    16: "altitude",
    19: "speed" 
})

new_df

您应该得到以下输出:

    timestamp   latitude    longitude       distance    altitude    speed
8   896018545   504719750.0 -998493490.0    10.87       285.8       1.773

推荐阅读