首页 > 解决方案 > tolist的numpy数组非常慢

问题描述

我正在阅读 NetCDF 文件并将其转换为 GeoJSON。功能已完成,但完成执行大约需要 40 秒。任何优化此代码的方法。这可能是由于三个不同位置的 numpy 数组的三个 for 循环和 tolist 转换。

import time
import geobuf
from netCDF4 import Dataset
from collections import OrderedDict

def main():
    '''
    The main method that holds the logic for conversion
    '''
    dataset = Dataset('outfile.nc', 'r')

    data_dict = OrderedDict({
        'type': 'FeatureCollection',
        'features': []
    })

    for traj in range(dataset.variables['trajectory'].size):
        for t in range(dataset.variables['time'].size):
            lat = dataset.variables['lat'][traj, t].tolist()
            lon = dataset.variables['lon'][traj, t].tolist()

            if lat is None or lon is None:
                continue
            feature_dict = OrderedDict({
                'type': 'Feature',
                'geometry': {
                    'type': 'Point',
                    'coordinates': [lat, lon]
                },
                'properties': {}
            })
            for var in dataset.variables.keys():
                if var not in ['lon', 'lat', 'time', 'trajectory']:
                    dvar = dataset.variables[var]
                    feature_dict['properties'][var] = dvar[traj, t].tolist()
            data_dict['features'].append(feature_dict)
    
    # This encoding is not slow
    pbf = geobuf.encode(data_dict)
    print(pbf)

    dataset.close()


if __name__ == '__main__':
    '''
    Entrypoint to this script
    '''
    start_time = time.time()
    main()
    print("--- %s seconds ---" % (time.time() - start_time))

标签: pythonperformancenumpynetcdfmemory-efficient

解决方案


推荐阅读