python - 我的 imagewand 在 while 循环的第二次运行时引发异常
问题描述
我正在编写一个脚本,将 PDF 转换为图像以供 OCR 读取,然后重命名 PDF。然后它会在任务执行后删除转换后的图像。由于 OCR 不是 100% 正确,我会使用不同的设置循环它,并将新文件名与主列表进行比较。问题是它在第一次运行时运行良好,但无法循环。我提出了这个异常
我很困惑,因为正确的 PDF 文件名已被重新定位并且图像已被删除,因此不会发生重复,但出现的错误是要求已重新定位到另一个文件夹的文件。
def find_ext(dr, ext):
return glob(path.join(dr,"*.{}".format(ext)))
allpdf = find_ext(".","pdf")
def TwoLine():
for PDF_file in allpdf:
PDFfile = wi(filename=PDF_file[2:],resolution=200)
Images = PDFfile.convert('jpg')
width,height = Images.size
Images.crop(0, 0, width, height//3)
Images.scale(scaledef,1000)
Images.sharpen()
# Images.enhance()
# Images.auto_orient()
# Images.auto_level()
ImageSequence = 1
imagefile = PDF_file[2:-4]+str(ImageSequence)+".jpg"
Images.save(filename=imagefile)
for img in PDFfile.sequence:
Image = wi(image = img)
ImageSequence += 1
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'
photo=cv2.imread(imagefile)
text = pytesseract.image_to_string(photo)
text_file=open("Notepad.txt","w+")
text_file.write(text)
text_file.close()
text_file= open("Notepad.txt","r+")
notes=text_file.readlines()
counter=0
name=""
for line in notes:
line=line.replace("["," ")
line=line.replace("]"," ")
numbers=sum(c.isdigit() for c in line)
letters=sum(c.isalpha() for c in line)
spaces=sum(c.isspace() for c in line)
line=line.strip("\n")
line=line.strip("\t")
line=line.replace("|"," ")
others=len(line)-numbers-letters-spaces
if numbers>=5 and others<2 and counter<2:
line=line.replace("SN"," ")
cond_pass_fail = 1
if counter ==0:
if line[0]=="_":
line=line.replace("_","",1)
line=line.replace('=','')
name+=line
if len(name)>=12:
os.rename(PDF_file, name +".pdf")
break
counter+=1
elif counter ==1:
name+="_"
name+=line
if "__" in name:
name=name.replace("_","",1)
counter+=1
elif counter>=2:
try:
os.rename(PDF_file, name +".pdf")
counter=0
break
except FileExistsError:
continue
else:
fail_lst.append(PDF_file)
print("Cant find name in " + PDF_file)
os.remove(imagefile)
def PNFilter():
FileList=[]
for files in allpdf:
PN=files.split('_')[0]
PN=PN.replace('.\\',"")
masterfile=open("masterfile.txt","r")
master=masterfile.readlines()
for line in master:
line=line.strip("\n")
line=line.strip("\t")
if PN not in line:
continue
elif PN in line:
FileList.append(files)
break
path=r'C:\\Users\\khairilamir.binahmad\\Desktop\\PDFRENAME\\test\\'
for files in allpdf:
if files in FileList:
source=path+files
dest=path+"PNFilter"
shutil.move(source,dest)
while bool(allpdf)==True and scaledef>=2500:
scaledef-=100
TwoLine()
PNFilter()
终端中出现的错误:
File "c:\Users\khairilamir.binahmad\Desktop\PDFRENAME\test\AllLine.py", line 175, in <module>
TwoLine()
File "c:\Users\khairilamir.binahmad\Desktop\PDFRENAME\test\AllLine.py", line 47, in TwoLine
PDFfile = wi(filename=PDF_file[2:],resolution=200)
File "C:\Users\khairilamir.binahmad\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\wand\image.py", line 9144, in __init__
self.read(filename=filename)
File "C:\Users\khairilamir.binahmad\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\wand\image.py", line 9815, in read
self.raise_exception()
File "C:\Users\khairilamir.binahmad\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\wand\resource.py", line 222, in raise_exception
raise e
wand.exceptions.BlobError: unable to open image 'A51357_2135721090100.pdf': No such file or directory @ error/blob.c/OpenBlob/3536
解决方案
推荐阅读
- apache-nifi - 如何用 NULL 替换从 ExecuteSQL 中提取的 NIFI 属性值
- python - 无法在 tkinter 中对齐三个文本小部件
- firebase - Firebase Web 性能分布图上的百分比值
- swift - Google Maps API didDrag、didBeginDragging、didEndDragging 函数不起作用
- sql - SQL获取由一列不同的行,但在按第三列排序的另一列上也不同
- python-3.x - 创建具有多个 x 轴的连续子图
- python - Python非常规排序算法
- javascript - 如果在外面点击,如何自动关闭手风琴
- python - 如何在beautifulsoup中检查已经抓取的URL
- python - Is it possible to run Google Cloud Platform NLP-API entity sentiment analysis in a batch processing mode for a large number of documents?