首页 > 解决方案 > 仅使用 numpy 和 itertools 加载文本文件段

问题描述

我有一段代码最初使用二进制芯片循环样式解码器将 .dat 文件解码为 .txt 文件。它会产生超过 500 行的数据点文本文件,其中 0-65 行是标题和其他显示功能,最后几行(从 586 开始)是错误解码的文本,如下所示:

ßÅBÎheÀœaÜ;sî3TÐêM·Zì?pêI†Q'&×¥ü#ÇPËiPì¿j–hñHžíoî#^[ÿ>BÿÃ@ÌhcP¿_ÔkõOˆEñlÀ‹J–>tò5Ægã_ð: yŽ6aôÎ “uôhaù*°DýZ4}Ó´Qá êlHí–'/mÑ=žt k×£QÉoû·]Ý&õC´Jœ9mû»ZÃ+]þ6ƒ[ቶS;Uö¥Wã Lè:ÂXÿ4sÈÄAïPó€Dó$EØÙ•dДeïkHâN xÐj@Ø"”eë1aõÅCÒ7ùC–ñiÐCÑP‹ Æ Ñ ]ô†}ÌdDñ Ë,WÎÄdó^ã8žDäÓ)Çq9}ùÃfÄP÷ÇzîoiÒ ÁpìeSÖ€ÒMŒÀ“;Bö

我正在使用代码:

with open (file) as f:  
    xpoints, ypoints, gradient = np.loadtxt(itertools.islice(f,68, 584), delimiter=',', unpack=True)

为了只加载包含我所追求的数据点的行。

然而,由于某种原因,这会导致程序抛出一个错误,因为它映射到未定义的字节时无法解码。我已经确认它是由底部的垃圾文本引起的,并且似乎被扔在上面显示的行中,但我无法弄清楚为什么会这样,因为它根本不需要阅读这些行。

完整错误:

模块中的文件“C:\Users\brady\Desktop\Slider_All\Slide-Mobile.py”,第 57 行

xpoints, ypoints, gradient = np.loadtxt(IT.islice(f,68, 500), delimiter=',', unpack=True) 文件 "C:\Users\brady\AppData\Local\Programs\Python\Python38- 32\lib\site-packag es\numpy\lib\npyio.py”,第 1159 行,在 loadtxt for x in read_data(_loadtxt_chunksize):文件“C:\Users\brady\AppData\Local\Programs\Python\Python38- 32\lib\site-packag es\numpy\lib\npyio.py",第 1075 行,在 read_data 中为 i,行在 enumerate(line_iter):文件“C:\Users\brady\AppData\Local\Programs\Python\ Python38-32\lib\encodings\c p1252.py",第 23 行,在解码返回 codecs.charmap_decode(input,self.errors,decoding_table)[0] UnicodeDecodeError: 'charmap' codec can't decode byte 0x81 in position 7758 : 字符映射到未定义

在获取切片并遇到问题之前是否itertools.islice或可能尝试首先阅读整个文档,或者这完全是我所缺少的。numpy.loadtxt为了完成,我将在下面发布我的整个未经编辑的代码,谢谢您的任何帮助。

import matplotlib.animation as animation
from matplotlib.widgets import Slider, Button
import matplotlib as mpl
from matplotlib import pyplot as plt
import scipy.interpolate as inter
import numpy as np
import itertools as IT
from itertools import cycle
from scipy.interpolate import interp1d
import os


file = 'example.dat'
x1 = 250    #Lower bound    Bigger bound leads to lots of lag
x2 = 300    #Upper bound    Recommended to remain close to range of 50

#=========================================================================================================================
start = []  #Stores data before given points
end = []    #Stores data after given points
files = []  #Stores text file to be removed when done

#This function decodes and re-encodes the dat files 
class Decoder:
    def decode(fn_in, fn_out):
        CIPHER = cycle([0b01011100, 0b00100111, 0b10111010, 0b01111011, 0b11110010, 0b00110010, 0b10100101])
        with open(fn_in, 'rb') as fin, open(fn_out, 'wb') as fout:        
            fout.write(fin.read(14))
            byte = fin.read(1)
            while byte:
                fout.write( ( int.from_bytes(byte, 'big') ^ next(CIPHER) ).to_bytes(1, 'big') )
                byte = fin.read(1)

    def to_txt(filename):
        #global files
        if filename [-3:] == "dat":        
            Decoder.decode( filename, filename[:-3] + "txt" )
            filename = filename[:-3] + "txt"    
        else:
            print("Extension not recognised for input filename \""+str(filename)+"\", skipping...")

        return filename

    def to_dat(filename):
        files.append(filename)
        if filename [-3:] == "txt":    
                Decoder.decode( filename, tempfile[:-3]+ "dat" )   
                #file.append(filename[:-3] + "dat")      
        else:
            print("Extension not recognised for input filename \""+str(filename)+"\", skipping...")

if file[-3:] == "dat":
    file = Decoder.to_txt(file) #Converts .dat to .txt
    files.append(file)

#Gets all data points from file
with open (file) as f:  
    xpoints, ypoints, gradient = np.loadtxt(IT.islice(f,68, 584), delimiter=',', unpack=True)


#get a list of points to fit a spline to as well
xmin = min(xpoints) 
xmax = max(xpoints) 

#Calculates which lines of data are required to plot
X1 = int(516*((x1 - xmin)/(xmax-xmin))) + 68
X2 = int(516*((x2 - xmin)/(xmax-xmin))) + 68

#Gets specific lines and saves the rest to copy back later
with open (file) as f:
    xp, ypoints, gradient = np.loadtxt(IT.islice(f,X1, X2), delimiter=',', unpack=True)
with open(file) as f:
    for line in IT.islice(f,0,X1):
        start.append(line)
with open (file) as f:
    for line in IT.islice(f,X2,584):
        end.append(line)

#Sets amount of data points to plot, must be multiple of point range
#The lower the number the more accurate the plot but the slower it will run 
N = len(xp)

if N < 200:
    j = 1                   
elif N < 400:
    j = 1
else: j = 1 

x = xp[::j]
yvals = ypoints[::j]
N = len(x)
xnew = xp

#spline fit
spline = inter.InterpolatedUnivariateSpline (x, yvals)

#set up a plot
fig,axes = plt.subplots(1,1,figsize=(12.0,4.0),sharex=True)
fig,axes.set_position([0.05,0.08,0.93,0.80])
ax1 = axes

pind = None #active point
epsilon = 5 #max pixel distance
#Updates plot when point is dragged
def update(val):
    global yvals
    global spline
    # update curve
    for i in np.arange(N):
      yvals[i] = sliders[i].val 
    l.set_ydata(yvals)
    spline = inter.InterpolatedUnivariateSpline (x, yvals)
    m.set_ydata(spline(X))
    # redraw canvas while idle
    fig.canvas.draw_idle()

#Resets plot back to original save from when opened
def reset(event):
    global yvals
    global spline
    #reset the values
    yvals = ypoints
    for i in np.arange(N):
      sliders[i].reset()
    spline = inter.InterpolatedUnivariateSpline (x, yvals)
    l.set_ydata(yvals)
    m.set_ydata(spline(X))
    # redraw canvas while idle
    fig.canvas.draw_idle()

#Overwirtes current save with new plot
def save(event):
    f = interp1d(x, yvals, kind='cubic')
    ynew = f(xnew)
    ax1.plot(xnew,ynew)

    newfile = np.vstack((xnew,ynew, gradient)).T

    with open(file, 'w') as f:
        for item in start:
            f.write("%s" % item)
        np.savetxt(f, newfile, delimiter = ',')
        for item in end:
            f.write("%s" % item)
        #f.write('""')
    Decoder.to_dat(file) #Converts .txt to .dat

#Event handler for mouse click
def button_press_callback(event):
    'whenever a mouse button is pressed'
    global pind
    if event.inaxes is None:
        return
    if event.button != 1:
        return
    #print(pind)
    pind = get_ind_under_point(event)    

#Event handler for mouse release
def button_release_callback(event):
    'whenever a mouse button is released'
    global pind
    if event.button != 1:
        return
    pind = None

#Gets clicked point number
def get_ind_under_point(event):
    'get the index of the vertex under point if within epsilon tolerance'

    # display coords
    #print('display x is: {0}; display y is: {1}'.format(event.x,event.y))
    t = ax1.transData.inverted()
    tinv = ax1.transData 
    xy = t.transform([event.x,event.y])
    #print('data x is: {0}; data y is: {1}'.format(xy[0],xy[1]))
    xr = np.reshape(x,(np.shape(x)[0],1))
    yr = np.reshape(yvals,(np.shape(yvals)[0],1))
    xy_vals = np.append(xr,yr,1)
    xyt = tinv.transform(xy_vals)
    xt, yt = xyt[:, 0], xyt[:, 1]
    d = np.hypot(xt - event.x, yt - event.y)
    indseq, = np.nonzero(d == d.min())
    ind = indseq[0]

    #print(d[ind])
    if d[ind] >= epsilon:
        ind = None

    #print(ind)
    return ind

#Event handler for mosue movement
def motion_notify_callback(event):
    'on mouse movement'
    global yvals
    if pind is None:
        return
    if event.inaxes is None:
        return
    if event.button != 1:
        return

    #update yvals
    #print('motion x: {0}; y: {1}'.format(event.xdata,event.ydata))
    yvals[pind] = event.ydata 

    # update curve via sliders and draw
    sliders[pind].set_val(yvals[pind])
    fig.canvas.draw_idle()



X = xp
ax1.plot (X, ypoints, 'k--', label='original')
l, = ax1.plot (x,yvals,color='k',linestyle='none',marker='o',markersize=8)
m, = ax1.plot (X, spline(X), 'r-', label='spline')

if max(ypoints) > 0:
    yheight = 0.01*max(ypoints)
    ylower =0
else: 
    yheight = -0.1*max(ypoints)
    ylower = yheight    

ax1.set_yscale('linear')
ax1.set_xlim(x1, x2)
ax1.set_ylim(min(ypoints)-ylower,max(ypoints)+yheight)
ax1.grid(True)
ax1.yaxis.grid(True,which='minor',linestyle='--')


sliders = []

for i in np.arange(N):

    axamp = plt.axes([0.84, -1, 0.12, 0.01])
    # Slider
    s = Slider(axamp, 'p{0}'.format(i), -100, 10, valinit=yvals[i])
    sliders.append(s)


for i in np.arange(N):
    #samp.on_changed(update_slider)
    sliders[i].on_changed(update)

axres = plt.axes([0.84, 0.90, 0.15, 0.08])
bres = Button(axres, 'Reset')
bres.on_clicked(reset)

axsave = plt.axes([0.68, 0.90, 0.15, 0.08])
bsave = Button(axsave, 'Save')
bsave.on_clicked(save)


fig.canvas.mpl_connect('button_press_event', button_press_callback)
fig.canvas.mpl_connect('button_release_event', button_release_callback)
fig.canvas.mpl_connect('motion_notify_event', motion_notify_callback)

plt.show()
for filename in files:
    os.remove(filename)

编辑:我知道这个错误几乎肯定与itertools.islice命令有关,因为我在这里发现了一个类似的问题:Python 3 itertools.islice continue 尽管 UnicodeDecodeError
目前正在研究可能打开文件的替代方法,因为在此阶段无法更改 .dat 的解码样式

标签: python-3.xnumpy

解决方案


我已经使用此处发布的解决方案解决了这个问题:https ://stackoverflow.com/a/31113251/10475989

我的最终代码是:

types_of_encoding = ["utf8", "cp1252"]
for encoding_type in types_of_encoding:
    with open (file, 'r', encoding = encoding_type, errors='ignore') as f:
        xpoints, ypoints, gradient = np.loadtxt(IT.islice(f,65, 582), delimiter=',', unpack=True)

推荐阅读