首页 > 解决方案 > 如何检查列表中的字符串

问题描述

我正在尝试从 2 个列表中添加 content_ids 并尝试与另一个列表匹配。出于某种原因,由于类型的原因,匹配不起作用。我附上下面的代码。

import os, csv, re, xlwt

from openpyxl import load_workbook import glob import lxml.etree as et


CATLOG_REPORT_PATH = '/Users/rradhakrishnan/rradhakrishnan/catalogReport.xlsx' 
MASTER_XML_PATH = '/Users/rradhakrishnan/rradhakrishnan/FTP/' 
MEDIA_PATH = '/Users/rradhakrishnan/rradhakrishnan/media' 
DST_PATH = '/Users/rradhakrishnan/rradhakrishnan/toIngest/' 
DESTINATION_PATH = '/Users/rradhakrishnan/rradhakrishnan/matched_content_ids.txt'


def read_workbook(source_path):
    wb = load_workbook(source_path, data_only=True)
    ws = wb.get_sheet_by_name('catalogReport_Titles')
    catalog_ids = list()

    for i, row in enumerate(ws.rows):
        if i == 0:
            continue
        content_id, title = map(lambda c: unicode(c.value), row[0:2])
        catalog_ids.append([content_id.encode("utf-8")])
    watch_folder(catalog_ids)

def watch_folder(final_ids):
    movies = glob.glob(os.path.join(MEDIA_PATH, u'M*.ts'))
    movie = []
    for filename in movies:
        movie =  filename.rsplit('/', 1)[-1]
        movie_name = re.search('(.+?).ts',movie)
        final_ids.append(movie_name.group(1).encode("utf-8"))
    xml_parse(final_ids)

def xml_parse(ids):
    filecount = 0
    asset_metadata = glob.glob(os.path.join(MASTER_XML_PATH, u'*.xml'))
    to_ingest = []
    content_id = []
    for p in asset_metadata:
        filecount += 1
        tree = et.parse(p)
        root = tree.getroot()
        programs = root.xpath('Program[@title="Program"]')
        if len(programs) == 0:
            return None
        program = programs[0]
        # Get the Content ID
        c_id = program.xpath('props/*[@title="Content ID"]')
        if len(c_id) == 0:
            content_id = None
        else:
            content_id = c_id[0].text
            if content_id not in ids:
                print content_id
                print "Copying file no: " , filecount
            else:
                print "file exists", content_id

def main():
    read_workbook(CATLOG_REPORT_PATH)


if __name__ == '__main__':
    main()

有 4 个常见的 content_id,我希望它只复制 15 个文件中的 11 个。

输出如下所示。

Copying file no:  10
MZ009828
Copying file no:  11
MZ009827
Copying file no:  12
MN022736
Copying file no:  13
MZ009836
Copying file no:  14
MZ009834
Copying file no:  15

我附上了 MEDIA_PATH 的样子。 在此处输入图像描述

我怎样才能进入打印“文件存在”的其他条件?

标签: pythonpython-2.7

解决方案


在进入循环之前,您需要创建一个列表呼叫 ID。我替换了 content_id 列表,因为在那里启动它没有任何目的(除了 content_id 是循环内的字符串)。

此外,您需要将每个循环中的 content_id 附加到每个循环末尾的 ids 列表中,以便将来的循环可以与之比较。

def xml_parse(ids):
    filecount = 0
    asset_metadata = glob.glob(os.path.join(MASTER_XML_PATH, u'*.xml'))
    to_ingest = []
    helper_ids = []
    for p in asset_metadata:
        filecount += 1
        tree = et.parse(p)
        root = tree.getroot()
        programs = root.xpath('Program[@title="Program"]')
        if len(programs) == 0:
            return None
        program = programs[0]
        # Get the Content ID
        c_id = program.xpath('props/*[@title="Content ID"]')
        if len(c_id) == 0:
            content_id = None
        else:
            content_id = c_id[0].text
            if content_id not in helper_ids:
                print content_id
                print "Copying file no: " , filecount
            else:
                print "file exists", content_id
        helper_ids += c_id.text

编辑:重命名的ID


推荐阅读