首页 > 解决方案 > 我的 imagewand 在 while 循环的第二次运行时引发异常

问题描述

我正在编写一个脚本,将 PDF 转换为图像以供 OCR 读取,然后重命名 PDF。然后它会在任务执行后删除转换后的图像。由于 OCR 不是 100% 正确,我会使用不同的设置循环它,并将新文件名与主列表进行比较。问题是它在第一次运行时运行良好,但无法循环。我提出了这个异常

我很困惑,因为正确的 PDF 文件名已被重新定位并且图像已被删除,因此不会发生重复,但出现的错误是要求已重新定位到另一个文件夹的文件。

def find_ext(dr, ext):
    return glob(path.join(dr,"*.{}".format(ext)))

allpdf = find_ext(".","pdf")



def TwoLine():
   for PDF_file in allpdf:
      PDFfile = wi(filename=PDF_file[2:],resolution=200)
      Images = PDFfile.convert('jpg')
      width,height = Images.size
      Images.crop(0, 0, width, height//3) 
      Images.scale(scaledef,1000)
      Images.sharpen()
        # Images.enhance()
        # Images.auto_orient()
        # Images.auto_level()
      ImageSequence = 1
      imagefile = PDF_file[2:-4]+str(ImageSequence)+".jpg"
      Images.save(filename=imagefile)
      for img in PDFfile.sequence:
         Image = wi(image = img)
         ImageSequence += 1
         pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'
         photo=cv2.imread(imagefile)
         text = pytesseract.image_to_string(photo)
         text_file=open("Notepad.txt","w+")
         text_file.write(text)
         text_file.close()
         text_file= open("Notepad.txt","r+")
         notes=text_file.readlines()
         counter=0
         name=""
         for line in notes:
            line=line.replace("["," ")
            line=line.replace("]"," ")
            numbers=sum(c.isdigit() for c in line)
            letters=sum(c.isalpha() for c in line)
            spaces=sum(c.isspace() for c in line)
            line=line.strip("\n")
            line=line.strip("\t")
            line=line.replace("|"," ")
            others=len(line)-numbers-letters-spaces 
            if numbers>=5 and others<2 and counter<2:
                line=line.replace("SN"," ")
                cond_pass_fail = 1
                if counter ==0:
                    if line[0]=="_":
                        line=line.replace("_","",1)
                    line=line.replace('=','')
                    name+=line
                    if len(name)>=12:
                        os.rename(PDF_file, name +".pdf")
                        break
                    counter+=1
                elif counter ==1:
                    name+="_"
                    name+=line
                    if "__" in name:
                        name=name.replace("_","",1)
                    counter+=1

            elif counter>=2:
                try:
                    os.rename(PDF_file, name +".pdf")
                    counter=0
                    break
                except FileExistsError:
                    continue

        else:
            fail_lst.append(PDF_file)
            print("Cant find name in " + PDF_file)
        
    os.remove(imagefile)
def PNFilter():
   FileList=[]
   for files in allpdf:
      PN=files.split('_')[0]
      PN=PN.replace('.\\',"")
      masterfile=open("masterfile.txt","r")
      master=masterfile.readlines()
      for line in master:
        line=line.strip("\n")
        line=line.strip("\t")
        if PN not in line:
            continue
        elif PN in line:
            FileList.append(files)
            break
   path=r'C:\\Users\\khairilamir.binahmad\\Desktop\\PDFRENAME\\test\\'
   for files in allpdf:
      if files in FileList:
        source=path+files
        dest=path+"PNFilter"
        shutil.move(source,dest)

while bool(allpdf)==True and scaledef>=2500:
    scaledef-=100
    TwoLine()
    PNFilter()

终端中出现的错误:

File "c:\Users\khairilamir.binahmad\Desktop\PDFRENAME\test\AllLine.py", line 175, in <module>
    TwoLine()
  File "c:\Users\khairilamir.binahmad\Desktop\PDFRENAME\test\AllLine.py", line 47, in TwoLine
    PDFfile = wi(filename=PDF_file[2:],resolution=200)
  File "C:\Users\khairilamir.binahmad\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\wand\image.py", line 9144, in __init__
    self.read(filename=filename)
  File "C:\Users\khairilamir.binahmad\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\wand\image.py", line 9815, in read
    self.raise_exception()
  File "C:\Users\khairilamir.binahmad\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\wand\resource.py", line 222, in raise_exception
    raise e
wand.exceptions.BlobError: unable to open image 'A51357_2135721090100.pdf': No such file or directory @ error/blob.c/OpenBlob/3536

标签: pythonimagepdfocrwand

解决方案


推荐阅读