首页 > 解决方案 > 在云系统仿真(Python)上实现一个基本的 DHT 方法

问题描述

我正在尝试创建一个基本模拟,在其中生成假节点,连接它们并使用 DHT 方法为它们分配随机数量的进程。连接采用循环链表方式,我认为这种方式更容易实现。我正在使用以下函数来实现 DHT:

dc 是具有哈希表和有关下一个节点的信息的节点类

def closest_dataCenter(dc,dcNext,key):
 largestNode = hash_sha1(str(com_count))
 dc_id = hash_sha1(str(dc.id))
 dcNext_id = hash_sha1(str(dcNext.id))
 if(dc_id>dcNext_id):
    if(((largestNode)-dcNext_id-key) > (key-dc_id)):
        return dc
    else:
        return dcNext
else:
    if((key-dc_id) > (dcNext_id-key)):
        return dcNext
    else:
        return dc

def find_dataCenter(dc,key):
    current = dc
    current_id = hash_sha1(str(current.id))
    current_nextNode = hash_sha1(str(current.nextNode.id))
    while(not (current_id<=key<current_nextNode)):
        current_id = hash_sha1(str(current.id))
        current_nextNode = hash_sha1(str(current.nextNode.id))
        print("Key:" + str(key) + " \n")
        print("Current id: " + str(current.id) + " Current id hash: " + str(current_id))
        print("CurrentNext id: " + str(current.nextNode.id) + " CurrentNext id hash: " + str(current_nextNode))
        time.sleep(1)
        if(key>current_id>current_nextNode):
            break
        else:
            current = current.nextNode
    return closest_dataCenter(current,current.nextNode,key)


def store(start,key,value):
    dc = find_dataCenter(start,key)
    dc.hash_table[key]=value


def lookup(start,key):
    dc = find_dataCenter(start,key)
    return dc.hash_table[key]

在我的散列函数上,我使用 sha1 散列,然后在返回之前将十六进制转换为整数。我在想的是我会发送一个密钥和值。密钥将被散列并找到最近的节点 id 对,然后找到最近的节点。但我猜散列不会那样工作,因为当我散列某些键值时,他们有时找不到自己的位置并永远循环。例如:

    Current id: 1 Current id hash: 304942582444936629325699363757435820077590259883
CurrentNext id: 2 CurrentNext id hash: 1246245281287062843477446394631337292330716631216
Key:62051490369831458547456710601409275698631100567

Current id: 2 Current id hash: 1246245281287062843477446394631337292330716631216
CurrentNext id: 3 CurrentNext id hash: 684329801336223661356952546078269889038938702779
Key:62051490369831458547456710601409275698631100567

Current id: 3 Current id hash: 684329801336223661356952546078269889038938702779
CurrentNext id: 4 CurrentNext id hash: 156380102318965990264666286018191900590658905210
Key:62051490369831458547456710601409275698631100567

Current id: 4 Current id hash: 156380102318965990264666286018191900590658905210
CurrentNext id: 5 CurrentNext id hash: 983116577831777608312765670515538102764663892676
Key:62051490369831458547456710601409275698631100567

Current id: 5 Current id hash: 983116577831777608312765670515538102764663892676
CurrentNext id: 6 CurrentNext id hash: 1106827226057151132207397296477425838227048555128
Key:62051490369831458547456710601409275698631100567

Current id: 6 Current id hash: 1106827226057151132207397296477425838227048555128
CurrentNext id: 7 CurrentNext id hash: 823067872317374799613180678352776801958854168538
Key:62051490369831458547456710601409275698631100567

Current id: 7 Current id hash: 823067872317374799613180678352776801958854168538
CurrentNext id: 8 CurrentNext id hash: 1452173985408750203318475117189636062911214042143
Key:62051490369831458547456710601409275698631100567

Current id: 8 Current id hash: 1452173985408750203318475117189636062911214042143
CurrentNext id: 1 CurrentNext id hash: 304942582444936629325699363757435820077590259883
Key:62051490369831458547456710601409275698631100567

我是否遗漏了 DHT 方法的逻辑背后的某些东西,我应该编写自己的哈希函数还是不使用哈希?怎么可能找到离我的散列键最近的节点,如果所有内容都被散列,什么是最接近的,有可能知道吗?如果有人可以向我解释 DHT 方法背后的逻辑,我会很高兴,我该如何将其应用于我的问题?提前致谢。

如果需要完整代码:

import random
import time
import hashlib


com_count = random.randint(5,10)
process_count = random.randint(5,20)
###########################################################################################

class dataCenter:

    def __init__(self,id):
        self.nextNode = None
        self.id=id
        self.hash_table={}

class circularLinkedList:
    def __init__(self):
        self.head = None
    def push(self,id):
        ptr1 = dataCenter(id) 
        temp = self.head 

        ptr1.nextNode = self.head 

        if self.head is not None: 
            while(temp.nextNode != self.head): 
                temp = temp.nextNode 
            temp.nextNode = ptr1 

        else: 
            ptr1.nextNode = ptr1

        self.head = ptr1

    def printList(self): 
        temp = self.head 
        if self.head is not None: 
            while(True): 
                print("%d" %(temp.id)) 
                temp = temp.nextNode
                if (temp == self.head): 
                    break 

    def find_next(self,id):
        this_dc = self.head
        while True:
            if(this_dc.id == id):
                print("ID: " + str(this_dc.id) + " connected to " + str(this_dc.nextNode.id))
                break
            elif(this_dc.nextNode == self.head):
                return False
            this_dc = this_dc.nextNode
    def find(self,id):
        this_dc = self.head
        while True:
            if(this_dc.id == id):
                return this_dc
                break
            elif(this_dc.nextNode == self.head):
                return False
            this_dc = this_dc.nextNode

###########################################################################################

def print_connections_info(clist):
    print ("Number of Data Centers: "+str(com_count))
    for i in range(com_count+1):
        cList.find_next(i)


###########################################################################################

def create_dc(com_count):
    for i in range(com_count):
        cList.push(com_count-i)

###########################################################################################

def hash_sha1(x):
    hash_object = hashlib.sha1()
    hash_object.update(bytes(x, encoding='utf-8'))
    return int(hash_object.hexdigest(),16)

###########################################################################################

def closest_dataCenter(dc,dcNext,key):
    largestNode = hash_sha1(str(com_count))
    dc_id = hash_sha1(str(dc.id))
    dcNext_id = hash_sha1(str(dcNext.id))
    if(dc_id>dcNext_id):
        if(((largestNode)-dcNext_id-key) > (key-dc_id)):
            return dc
        else:
            return dcNext
    else:
        if((key-dc_id) > (dcNext_id-key)):
            return dcNext
        else:
            return dc
###########################################################################################

def find_dataCenter(dc,key):
    current = dc
    current_id = hash_sha1(str(current.id))
    current_nextNode = hash_sha1(str(current.nextNode.id))
    while(not (current_id<=key<current_nextNode)):
        current_id = hash_sha1(str(current.id))
        current_nextNode = hash_sha1(str(current.nextNode.id))
        print("Key:" + str(key) + " \n")
        print("Current id: " + str(current.id) + " Current id hash: " + str(current_id))
        print("CurrentNext id: " + str(current.nextNode.id) + " CurrentNext id hash: " + str(current_nextNode))
        time.sleep(1)
        if(key>current_id>current_nextNode):
            break
        else:
            current = current.nextNode
    return closest_dataCenter(current,current.nextNode,key)

###########################################################################################

def store(start,key,value):
    dc = find_dataCenter(start,key)
    dc.hash_table[key]=value

###########################################################################################

def lookup(start,key):
    dc = find_dataCenter(start,key)
    return dc.hash_table[key]

###########################################################################################

def create_process(pc_count):
    li = []
    for i in range(pc_count):
        li.append("Process " + str(i))
    return li

###########################################################################################


cList = circularLinkedList()
process_list = create_process(process_count)
create_dc(com_count)
print_connections_info(cList)

for i in range(len(process_list)):
    store(cList.find(1),hash_sha1(str(i)),process_list[i])
    print(cList.find(1))
    print(hash_sha1(str(i)))
    print(process_list[i])




print("**********************")

标签: pythoncloudsimulationdistributed-computingdht

解决方案


我解决了更改此行的问题:

if(key>=current.id>current.nextNode.id):
            break

它永远循环,因为当前 id 有时等于 key。


推荐阅读