首页 > 解决方案 > 我应该如何确保所有唯一项目都存在于唯一项目 python 列表中?

问题描述

因此,我正在使用 python dhash 库对唯一图像和相似图像进行排序,并且在比较图像期间,我将每个图像与当前图像两侧的 10 个邻居进行比较,但我的脚本从唯一列表中删除了所有相似图像,而不保留一个图像类似于唯一列表的列表,我该如何解决这个问题是我迄今为止编写的代码:

def compare_image(curr_img, curr_img_hash, to_compare):
    """
    Function for comparing two images

    """
    global norm_cache
    if to_compare in norm_cache:
        print(f"cached val found for {to_compare}")
        gray2 = norm_cache[to_compare]
        h2r,h2c =  dhash.dhash_row_col(gray2)
        hash2 = dhash.format_hex(h2r,h2c)
    else:
        print("No cached_val found, Computing and storing in norm_cache")
        gray2 = _get_image(to_compare)
        h2r,h2c =  dhash.dhash_row_col(gray2)
        hash2 = dhash.format_hex(h2r,h2c)
        norm_cache[to_compare] = gray2  # Update cache...

    print(f"Values ----> {curr_img} : {curr_img_hash}, {to_compare} : {hash2}")
    if distance.hamming(curr_img_hash,hash2) <= threshold:
        print("images are same")
        return "similar"
    else:
        print("images are different")
        return "different"


def find_duplicates(folder) -> tuple:
    """
    Main function to find duplicates.
    """
    uniques: list = list()
    similar: list = list()
    buffer:  list = list()
    
    
    image_list = os.listdir(folder)
    for image_index in range(0,len(image_list)):
        curr_img = image_list[image_index]
        full_path = os.path.join(folder, curr_img)
        gray1 = _get_image(full_path)
        h1r,h1c =  dhash.dhash_row_col(gray1)
        hash1 = dhash.format_hex(h1r,h1c)
        # compare left 10s
        x = image_index - 10
        if x < 0:
            x = 0
        if x < image_index:
            for prev_image_index in range(x, image_index):
                if os.path.isfile(full_path) and os.path.splitext(full_path)[-1] in image_exts:
                    prev_image = image_list[prev_image_index]
                    prev_full_path = os.path.join(folder,prev_image)
                    result = compare_image(curr_img, hash1,prev_full_path)
                    if result == 'similar':
                        if prev_full_path not in similar:
                            if prev_full_path not in buffer:
                                buffer.append(prev_full_path)
                            similar.append(prev_full_path)
                    if result == 'different':
                        if prev_full_path not in uniques:
                            uniques.append(prev_full_path) 
        #compare right 10s
        x = image_index + 10 + 1
        if x > len(image_list):
            x = len(image_list)
        if x > image_index:
            for j in range(image_index+1,x):
                if os.path.isfile(full_path) and os.path.splitext(full_path)[-1] in image_exts:
                    ahead = image_list[j]
                    ahead_full_path = os.path.join(folder,ahead)
                    result = compare_image(curr_img, hash1,ahead_full_path)
                    if result == 'similar':
                        if ahead_full_path not in similar:
                            similar.append(ahead_full_path)
                    if result == 'different':
                        if ahead_full_path not in uniques:
                            uniques.append(ahead_full_path) 

    for i in similar:
        if i in uniques and i in buffer:
            uniques.remove(i)

    print(f"{len(similar)} Similar Found...")
    print(f"{len(uniques)} Unique Found...")

    return similar, uniques

标签: pythonimage

解决方案


您可以将所有元素传递给 python 集合,它会自动只保留唯一值。之后,您可以使用 sorted 对列表进行排序并将其转换回列表

mySet = {'a', 'd', 'c', 'b', 'b'}
myList = list(sorted(mySet))

可能不是最有效的解决方案,但它确实有效


推荐阅读