首页 > 解决方案 > 如何根据字符串列表创建解密字典以按字符串加密

问题描述

我有一些输入 inputs = ["and", "dick", "jane", "puff", "spot", "yertle"] ,而不是由输入中的单词组成的句子,encrypted_sentence ="bjvg xsb hxsn xsb qymm xsb rqat xsb pnetfn" 但是字母以某种模式发生了变化。例如“a”='x'; “n”='s'; "d"='b'.....字母表中的每个字母都被替换为字母表中的一些不同字母。我需要创建新的“字母”key和旧字母的字典value。像这样decrypt_dict={ "x":'a', "s":'n', "b":'d', "q":'a', "t":'t'}等等......如果输入中唯一字母的数量与句子中唯一字母的数量不同,print "ERROR"因为不可能从输入中创建句子 如果不使用某些字母,没关系,跳过它们。没有必要用字母表中的每个字符创建字典,

到目前为止,这是我的代码(我使用了另一个测试字符串):


import itertools

def deduplicate(lst):
    elems = set()
    rest = []
    for el in lst:
        if el not in elems:
            elems.add(el)
            rest.append(el)
    return rest


def deduplicate_lists(lst_of_lists, differenet_lists=False):
    lst_of_lists.sort()
    if differenet_lists:
        return [lst for lst, _ in itertools.groupby(lst_of_lists)]
    else:
        return [*[lst for lst, _ in itertools.groupby(lst_of_lists)]]


def try_it_faster(lists_by_len_of_words, decrypt_dict):
    for key, val in lists_by_len_of_words.items():
        # print(ke, val)
        if len(val) == 1:
            for index, char in enumerate(val[0]):
                # print(char, index_for_word)
                decrypt_dict.update({char: key[index]})
    return decrypt_dict


def decrypt(string_to_decrypt, decrypt_dict):
    returner = None
    try:
        returner = "".join(decrypt_dict[i] for i in string_to_decrypt)
    except KeyError:
        pass
    return returner


def c_just_multiple_val_dict(dict, help_dict=None):
    copy_dict = dict.copy()
    for keys, values in copy_dict.items():
        # print(keys, values)
        if len(values) == 1:
            dict.pop(keys)

    if help_dict is not None:
        copy_dict = dict.copy()
        for word in copy_dict.keys():
            if all([char in help_dict.values() for char in word]):
                dict.pop(word)
    return dict


def move_items(lst):
    return lst[0:] + lst[0]


# timed
number = 6
inputs = ['all', 'and', 'apple', 'ass', 'gibble', 'horror', 'hound', 'love',
          'tie']  # input words default: "and", "dick", "jane", "puff", "spot", "yertle"
decrypt_dict = {' ': ' '}
encryption_as_string = "cvvli euynm dxggli euzzuz cnm bxi lupi cll cww"  # default: bjvg xsb hxsn xsb qymm xsb rqat xsb pnetfn
encryption_key = encryption_as_string.split()  # create list from encrypted string
lists_by_len_of_words = {inp: deduplicate([a for a in encryption_key if len(a) == len(inp)]) for inp in
                         inputs}  # dict key=input word, deduplicated value=words from encryption_key with same length as input word
# timed

# print start values
print(f"1. {inputs}")
print(f"2. {encryption_key}")
print(f"3. {lists_by_len_of_words}")
print(f"4. {decrypt_dict}")

decrypt_dict = try_it_faster(lists_by_len_of_words,
                             decrypt_dict)  # if len of input words is unique, this will make it faster
secondary_decrypt_dict = decrypt_dict.copy()
print(f"4. After try_it_faster(): {decrypt_dict}")

# same_len_word_list = [word for word in lists_by_len_of_words.keys()]
inputs_sorted = sorted(inputs, key=len)  # input sorted
inputs_as_string = " ".join(inputs_sorted)  # sorted input to string

encryption_key_deduplicated_sorted = sorted(deduplicate(encryption_key),
                                            key=len)  # encryption_key sorted and deduplicated
encryption_key_as_string = " ".join(encryption_key_deduplicated_sorted)  # sorted encryption_key to string

# print sorted and deduplicated start values
print(f"1. Sorted input words as string: {inputs_as_string}")
print(f"2. Sorted and deduplicated encrypted words as list: {encryption_key_deduplicated_sorted}")
print(f"2. Sorted and deduplicated encrypted words as string: {encryption_key_as_string}")

mover_check = len(encryption_key_deduplicated_sorted)#*len(encryption_key_deduplicated_sorted)
mover = 0
# while decrypt(encryption_key_as_string, secondary_decrypt_dict) != inputs_as_string:
while decrypt(encryption_key_as_string, secondary_decrypt_dict) != inputs_as_string and mover < mover_check:
    print(f"Before clearing:{encryption_key_deduplicated_sorted}")
    for index_of_word, input_word in enumerate(inputs_sorted):
        print(index_of_word, input_word,
              decrypt(encryption_key_deduplicated_sorted[index_of_word], secondary_decrypt_dict))
        if decrypt(encryption_key_deduplicated_sorted[index_of_word], secondary_decrypt_dict) == input_word:
            for index_of_char, char in enumerate(encryption_key_deduplicated_sorted[index_of_word]):
                # print(index_of_char, char)
                decrypt_dict[char] = input_word[index_of_char]
            print(f"pop out {inputs_sorted[index_of_word]} a {encryption_key_deduplicated_sorted[index_of_word]}")
            inputs_sorted.pop(index_of_word)
            encryption_key_deduplicated_sorted.pop(index_of_word)

    print(f"After clearing:{encryption_key_deduplicated_sorted}")
    print(f"Work wi      :{inputs_sorted}")
    secondary_decrypt_dict.clear()
    secondary_decrypt_dict.update(decrypt_dict)
    # secondary_decrypt_dict.update(decrypt_dict)

    # print(encryption_key_deduplicated_sorted)
    for index_for_word, encryption_word in enumerate(encryption_key_deduplicated_sorted):
        print(index_for_word, encryption_word)
        for index_for_char, letter in enumerate(encryption_word):
            # print(letter)
            if secondary_decrypt_dict.get(letter) is None:
                print(f"secondary_dict nor contain:{letter}")
                if decrypt_dict.
                try:
                    print("".join(char for char in inputs_sorted[index_for_word])[index_for_char])
                    secondary_decrypt_dict.update(
                        {letter: "".join(char for char in inputs_sorted[index_for_word])[index_for_char]})
                    setter = False
                    # print(secondary_decrypt_dict)
                except IndexError:
                    setter = True
            else:
                # pass
                setter = False
                print(f"Contains:{letter}")
    encryption_key_deduplicated_sorted = encryption_key_deduplicated_sorted[1:] + encryption_key_deduplicated_sorted[:1]
    if setter:
        sorted(encryption_key_deduplicated_sorted, key=len)
    print(decrypt(encryption_key_as_string, secondary_decrypt_dict))
    mover += 1

print(secondary_decrypt_dict)


print(decrypt_dict)
final_dict = {**secondary_decrypt_dict, **decrypt_dict}

print(final_dict)
str = "cvvli euynm acqyg dxggli euzzuz cnm bxi lupi cll cww"
if decrypt(str, final_dict) is None:
    ret_str = ""
    for char in encryption_key_as_string:
        if char != " ":
            ret_str.__add__(char)
        else:
            ret_str.__add__(" ")
    print(ret_str)
else:
    print(decrypt(str, final_dict))


标签: pythonalgorithmdictionaryencryptioncryptography

解决方案


回溯方法可用于查找字母映射的组合,该组合在逐个长度的基础上产生匹配的单词。

为了优化这种“试错”组合遍历,每个加密字母都被分配了它可以映射到的潜在明文字母,基于加密词与相同长度的明文词的配对。

knownWords = ["and", "dick", "jane", "puff", "spot", "yertle"]
encrypted  = "bjvg xsb hxsn xsb qymm xsb rqat xsb pnetfn"

letterMap = dict()
for word in encrypted.split():
    for kw in knownWords:                 # matching words 
        if len(kw)!=len(word): continue   # of same length
        for k,e in zip(kw,word):                  
             letterMap.setdefault(e,set()).add(k) # letter to letter options

def decrypt(w,mapping): return "".join(mapping.get(c,c) for c in w)

from collections import deque
finalMap = None
mapQ     = deque([[dict(),set(letterMap),set()]]) # mapping,remaining,usedClear
while mapQ and not finalMap:
    mapping,remaining,used = mapQ.pop()           # DFS mapping to complete
    letter = remaining.pop()                      # pick a letter to map
    for clearLetter in letterMap[letter]-used:        # select clear letters
        newMap    = {**mapping,letter:clearLetter}    # enhance mapping
        if remaining:                                 # more letters to map
            mapQ.append([newMap, remaining.copy(), used | {clearLetter}]) 
            continue
        if all(decrypt(w,newMap) in knownWords for w in encrypted.split()):
            finalMap = newMap                         # found mapping that works
            break 

最后结果:

print(decrypt(encrypted, finalMap))
'dick and jane and puff and spot and yertle'

print(finalMap)
{'y': 'u', 'p': 'y', 'q': 'p', 't': 't', 'j': 'i', 'm': 'f', 'e': 'r',
 'f': 'l', 'g': 'k', 'h': 'j', 'a': 'o', 'v': 'c', 'n': 'e', 'x': 'a',
 'r': 's', 'b': 'd', 's': 'n'}

其他测试用例:

knownWords = ['all', 'and', 'apple', 'ass', 'gibble', 'horror', 'hound', 'love',
      'tie']
encrypted  = "cvvli euynm dxggli euzzuz cnm bxi lupi cll cww"

...

decrypt(encrypted, finalMap)
'apple hound gibble horror and tie love all ass'

decrypt("cvvli euynm acqyg dxggli euzzuz cnm bxi lupi cll cww",finalMap)
'apple hound aaqub gibble horror and tie love all ass'

请注意,我使用了严格的条件来退出搜索循环,在该循环中所有加密的单词都在已知单词列表中找到。这可以通过跟踪“最佳匹配”并让循环耗尽 mapQ 队列来放松。定义“最佳匹配”的含义将取决于您。它可以用单词数、不同单词数、字符覆盖率等来衡量。

[编辑]更有效的方法......

我尝试了上面的“最佳匹配”方法,发现处理时间会变得非常长。因此,我基于整个单词的映射而不是逐个字母的搜索制作了一个新的(递归)解决方案。这会产生更好的结果,并且对于部分匹配,管理起来也不太复杂:

def decrypt(w,mapping): return "".join(mapping.get(c,c) for c in w)

def findMapping(knownWords,encrypted):
    if not encrypted: return {} # no more encrypted words, stop recursion
    if isinstance(encrypted,str): encrypted = list(set(encrypted.split()))
    bestMap   = findMapping(knownWords,encrypted[1:]) # track best
    bestMatch = sum(decrypt(w,bestMap) in knownWords for w in encrypted)
    eWord     = encrypted[0] # map first encrypted word
    for kWord in knownWords:
        if len(eWord) != len(kWord): continue # with word of same length
        mapping = {e:k for e,k in zip(eWord,kWord)}  # letter to letter
        if decrypt(eWord,mapping) != kWord: continue # one to one only
        unused   = str.maketrans('','',kWord)  # removed used letters
        subWords = [*filter(None,(kw.translate(unused) for kw in knownWords))]
        unmapped = str.maketrans('','',eWord)  # remove mapped letters
        subCrypt = [*filter(None,(ew.translate(unmapped) for ew in encrypted))]
        mapping.update(findMapping(subWords,subCrypt)) # Recurse subwords
        match = sum(decrypt(w,mapping) in knownWords for w in encrypted)
        if match>bestMatch: bestMap,bestMatch = mapping,match  # best match
    return bestMap # return mapping with best match (number of mapped words)
                    
  • 该解决方案选择一个加密的单词并尝试将其映射到相同长度的已知单词。
  • 这建立了一个字母映射(必须是一对一的)。
  • 在进入下一个递归级别之前,来自该映射的字母会从加密词和已知词中删除。
  • 因此,下一级递归只有一个有限的映射来执行,它不会与第一个单词的字母映射重叠。
  • 对每个相同长度的已知单词执行此操作,并通过递归处理所有加密的单词,将产生从字母到字母的角度兼容的单词到单词映射。
  • 跟踪匹配词数最多的映射并将其返回给调用者。
  • 为了支持不完整的匹配,该解决方案从跳过第一个加密单词开始递归。这为选择最佳匹配建立了基线。

输出:

knownWords = ["and", "dick", "jane", "puff", "spot", "yertle"]
encrypted  = "bjvg xsb hxsn xsb qymm xsb rqat xsb pnetfn"
mapping    = findMapping(knownWords,encrypted)
print(decrypt(encrypted,mapping))
'dick and jane and puff and spot and yertle'
print(mapping)
{'r': 's', 'q': 'p', 'a': 'o', 't': 't', 'y': 'u', 'm': 'f', 'b': 'd',
 'j': 'i', 'v': 'c', 'g': 'k', 'p': 'y', 'n': 'e', 'e': 'r', 'f': 'l',
 'h': 'j', 'x': 'a', 's': 'n'}


knownWords = "apple hound gibble horror and tie love all ass".split()
encrypted  = "cvvli euynm acqyg dxggli euzzuz cnm bxi lupi upilucnm"
mapping    = findMapping(knownWords,encrypted)
print(decrypt(encrypted,mapping))
'apple hound aaqub gibble horror and tie love oveloand'
print(mapping)
{'b': 't', 'x': 'i', 'i': 'e', 'l': 'l', 'u': 'o', 'p': 'v', 'c': 'a',
 'v': 'p', 'd': 'g', 'g': 'b', 'e': 'h', 'y': 'u', 'n': 'n', 'm': 'd',
 'z': 'r'}

推荐阅读