python - 在文件夹中搜索文件名
问题描述
我有一个文件夹内的 ts 文件列表。我尝试从 XML 中提取内容 id,这是没有扩展名的文件名。我需要搜索与内容 id 匹配的 ts 文件。由于某种原因,它失败了。我附上下面的代码。我还附上了 ts 文件的屏幕截图。
import glob
import lxml.etree as et
import os, csv
ASSET_METADATA_PATH = '/Users/roradhak/eVision/failed_assets/'
TS_PATH = '/Users/roradhak/eVision/ts_check/'
def parse_file(path):
tree = et.parse(path)
root = tree.getroot()
trailer_id = ""
programs = root.xpath('Program[@title="Program"]')
if len(programs) == 0:
return None, None, None
program = programs[0] # TODO - Are multiple programs expected? If so, the function should return a list of tuples
# Get the Content ID
c_id = program.xpath('props/*[@title="Content ID"]')
if len(c_id) == 0:
content_id = None
else:
content_id = c_id[0].text
# Get the has_trailer attribute
has_t = program.xpath('props/*[@title="Has_Trailer"]')
has_trailer = has_t[0].text
if has_t[0].text =="Y":
trailer_id = content_id.replace('M','T',1)
# Get the content name
n = program.xpath('props/*[@title="Name"]')
if len(n) == 0:
content_name = None
else:
content_name = n[0].text
return content_id, content_name, has_trailer, trailer_id
def main():
asset_metadata = glob.glob(os.path.join(ASSET_METADATA_PATH, u'*.xml'))
movies = glob.glob(os.path.join(TS_PATH, u'*.ts'))
for p in asset_metadata:
print(u'Processing: {p}'.format(p=p).encode('utf-8'))
print content_id, content_name, has_trailer, trailer_id
content_id, content_name, has_trailer, trailer_id= parse_file(p)
if u'{c}.ts'.format(c=content_id) not in TS_PATH:
print "No Movie"
if has_trailer =="Y":
if u'{c}.ts'.format(c=trailer_id) not in movies:
print "No trailer"
if __name__ == '__main__':
main()
输出如下
/Users/roradhak/IVPGET_Local/venv/bin/python /Users/roradhak/Downloads/validate_xml.py
Processing: /Users/roradhak/eVision/failed_assets/E30000001557115265_2019_08_29T11_20_08Z.xml
MD009232 Ep 143 - Cool look Hair style N
No Movie
Processing: /Users/roradhak/eVision/failed_assets/10000000717960000_2019_10_09T15_04_20Z.xml
MZ008931 Aan: Men At Work Y TZ008931
No Movie
No trailer
Processing: /Users/roradhak/eVision/failed_assets/E30000001557537308_2019_08_09T19_15_22Z.xml
MZ010564 EP29 - Episode 29 - Raheem S1 Y TZ010564
No Movie
No trailer
Process finished with exit code 0
解决方案
这是我使用pathlib
Python 3.4+ 的方法:
from pathlib import Path
failed_assets_folder = Path('/Users/roradhak/eVision/failed_assets')
ts_folder = Path('/Users/roradhak/eVision/ts_check')
def main():
for failed_asset in failed_assets_folder.glob('*.xml'):
print(f'Processing: {failed_asset.name}')
content_id, content_name, has_trailer, trailer_id = parse_file(failed_asset.name)
print(f'{content_id}, {content_name}, {has_trailer}, {trailer_id}')
if not Path(ts_folder / f'{content_id}.ts').exists():
print('No Movie')
if has_trailer == 'Y':
if not Path(ts_folder / f'{trailer_id}.ts').exists():
print('No trailer')
它只是实现了文件搜索部分,虽然它没有经过测试。
推荐阅读
- c# - 带有 System.Threading.Thread 的 WPF 启动画面
- mongodb - Hadoop with MongoDB storage
- php - Problem with json_decode - Syntax error, malformed JSON
- android - ExpandableRecyclerView 与 GridLayoutManager ,垂直扩展父视图
- graphdb - 自定义 sparql 过滤器 Graphdb
- java - NullPointerException 未被捕获
- webpack - 将 PureScript `Pulp repl` 与无 js 文件一起使用
- python - 在 Catplot 中叠加图像
- apache-kafka - 减少 max.spout.pending 值会导致 Storm UI 中 Kafka Spout 中的消息失败?
- android - How to change center alignment in EditText when the password Toggle is Enabled?