python - Python 算法跳过搜索添加文件夹的新文件
问题描述
大家好,下面我的 python 代码将每个 doc、docx 和 rtf 文件转换为 .txt,这太棒了。当我在目录中添加了一个新文件时,代码中的 Ex.(rootdir)。我的代码找到了文件并将其完美转换。但是,如果我在 rootdir 的子目录中添加相同的确切文件,它不会拾取正在添加的新文件。我的问题是我可以在我的代码或一般情况下做些什么不同的事情,以便在任何子目录或主目录(rootdir)中添加的文件将被拾取和转换。
#RTF,DOCX,DOC TO TEXT
import win32com.client
import os
import re
import traceback
from os import listdir
from docx import Document
import shutil
import glob
rootdir = r'C:\Users\aac1928\Desktop\Test'
searchdir = rootdir + '\Search'
namedir = 'Search'
searchlist = []
dirlist = []
app = win32com.client.Dispatch('Word.Application')
app.Visible = False
app.DisplayAlerts = False
#Creates The search folder for text search in the directory
if os.path.exists(searchdir) == False:
os.mkdir(searchdir)
print((searchdir + " Has been created"))
#Modifies all word doc file types to .TXT
try:
for root, dirs, files in os.walk(rootdir):
for file in files:
fullpath = os.path.join(*[root, file])
if file.endswith(".docx"):
out_name = file.replace("docx", r"txt")
in_file = os.path.join(*[root, file])
out_file = os.path.abspath(root + "\\" + out_name)
doc = app.Documents.Open(in_file)
content = doc.Content.Text
print((file), out_file)
doc.SaveAs(out_file, FileFormat=7)
doc.Close()
if file.endswith(".doc"):
out_name = file.replace("doc", r"txt")
in_file = os.path.join(*[root, file])
out_file = os.path.abspath(root + "\\" + out_name)
doc = app.Documents.Open(in_file)
content = doc.Content.Text
print((file), out_file)
doc.SaveAs(out_file, FileFormat=7)
doc.Close()
if file.endswith(".rtf"):
out_name = file.replace("rtf", r"txt")
in_file = os.path.join(*[root, file])
out_file = os.path.abspath(root + "\\" + out_name)
doc = app.Documents.Open(in_file)
content = doc.Content.Text
print((file), out_file)
doc.SaveAs(out_file, FileFormat=7)
except:
pass
if os.path.exists(searchdir) == True:
print('Search file is Present')
for root, dirs, files in os.walk(searchdir, onerror=None):
for filename in files:
searchlist.append(os.path.splitext(filename)[0])
try:
for root, dirs, files in os.walk(rootdir):
if namedir in dirs:
dirs.remove(namedir)
for filename in files:
if (os.path.splitext(filename)[0]) not in searchlist:
print(filename)
#for filename in filenames:
fullpath = os.path.join(*[root, filename])
if filename.endswith(".docx"):
out_name = filename.replace("docx", r"txt")
in_filename = os.path.join(*[root, filename])
out_filename = os.path.abspath(root + "\\" + out_name)
doc = app.Documents.Open(in_filename)
content = doc.Content.Text
print((filename), out_filename)
doc.SaveAs(out_filename, FileFormat=7)
doc.Close()
if filename.endswith(".doc"):
out_name = filename.replace("doc", r"txt")
in_filename = os.path.join(*[root, filename])
out_filename = os.path.abspath(root + "\\" + out_name)
doc = app.Documents.Open(in_filename)
content = doc.Content.Text
print((filename), out_filename)
doc.SaveAs(out_filename, FileFormat=7)
doc.Close()
if filename.endswith(".rtf"):
out_name = filename.replace("rtf", r"txt")
in_filename = os.path.join(*[root, filename])
out_filename = os.path.abspath(root + "\\" + out_name)
doc = app.Documents.Open(in_filename)
content = doc.Content.Text
print((filename), out_filename)
doc.SaveAs(out_filename, FileFormat=7)
doc.Close()
except:
pass
else:
print("")
app.Quit()
#Moves the Converted Txt Files to The Search Folder
try:
for root, dirs, files in os.walk(rootdir):
for file in files:
for filename in file:
if namedir in dirs:
dirs.remove(namedir)
if file.endswith('.txt'):
shutil.move(os.path.join(root, file), os.path.join(searchdir, file))
break
except (IOError, OSError): # ignore read and permission errors
pass
解决方案
这是解决方案
import os
import shutil
def absoluteFilePaths(directory):
for dirpath,_,filenames in os.walk(directory):
for f in filenames:
yield os.path.abspath(os.path.join(dirpath, f))
rootdir = r'C:\Users\aac1928\Desktop\Test'
file_names = list(absoluteFilePaths(rootdir))
extensions = ['doc', 'docs', 'rtf']
for i in file_names:
file_name, extension = i.split('.')
if extension in extensions ans os.path.exists(i):
new_file_name = file_name+ '.txt'
shutill.move(i, new_file_name)
推荐阅读
- reactjs - 如何使用 React 路由器根据路由名称动态渲染组件
- java - 我如何将 JPA 实现到非 Spring java 项目?
- javascript - 引导多个 Modal 组件以在页面加载时弹出
- c++ - 如何将 UDP socket recv 缓冲区大小增加到 Linux 程序的最大值?
- c# - 使用命令行解析参数
- ruby-on-rails - 在 ActiveRecord 中查询包含来自多个数组的一个或多个 id 的对象
- angular - 组件不是已知元素 - 多模块应用程序
- sum - Cypher 将属性设置为不同节点的属性之和
- amazon-web-services - cdk 部署选项以重新构建映像
- python - 为sklearn管道获取“valueError:无法将字符串转换为浮点数:...”