首页 > 解决方案 > python中的凝聚聚类实现

问题描述

我尝试使用 python 实现凝聚聚类实现。它适用于某些数据集,但会卡在其他一些数据集之间。请帮助识别我的错误。此外,此代码是根据单链接方法实现的。如果您有其他替代方法来实现这一点,那也会有所帮助。

X = np.array(([[ 1. , 60.7],
       [ 2. , 45.8],
       [ 3. , 64.8],
       [ 4. , 40.8],
       [ 5. , 72.8],
       [ 6. , 54.8],
       [ 7. , 77.7],
       [ 8. , 42.3],
       [ 9. , 39. ],
       [10. , 59.8],
       [11. , 67.5],
       [12. , 78.3],
       [13. , 43.5],
       [14. , 47.2],
       [15. , 55.8],
       [16. , 62.7],
       [17. , 30.7],
       [18. , 48.2],
       [19. , 32.5],
       [20. , 73.5],
       [21. , 39.2],
       [22. , 48.5],
       [23. , 62.8],
       [24. , 42.2],
       [25. , 64. ],
       [26. , 26.3]]))
def calculate_distance(samples):
    Distance_matrix = np.zeros((len(samples),len(samples)))
    for i in range(Distance_matrix.shape[0]):
        for j in range(Distance_matrix.shape[0]):
            if i!=j:
                Distance_matrix[i,j] = float(distance_calculation(samples[i],samples[j]))
            else:
                Distance_matrix[i,j]=10**4
    return Distance_matrix
   
def distance_calculation(sample1,sample2):
    distance=[]
    for i in range(len(sample1)):
        for j in range(len(sample2)):
            try:
                distance.append(np.linalg.norm(np.array(sample1[i])-np.array(sample2[j])))
            except:
                distance.append(sampledistance(sample1[i],sample2[j]))
    return min(distance)
def sampledistance(sam1,sam2):
    #print('INSIDE SAMPLED_DISTANCE')
    if str(type(sam2[0]))!='<class \'list\'>':
        sam2 = [sam2]
    if str(type(sam1[0]))!='<class \'list\'>':
        sam1 = [sam1]
    m = len(sam1)
    n = len(sam2)
    distance = []
    if n>=m:
        for i in range(n):
            for j in range(m):
                if(len(sam2[i])>=len(sam1[j])) and str(type(sam2[i][0])!='<class \'list\'>'):
                    distance.append(cluster_distance(sam2[i],sam1[j]))
                else:
                    distance.append(np.linalg.norm(np.array(sam2[i])-np.array(sam1[j])))
    else:
        for i in range(m):
            for j in range(n):
                if(len(sam1[i])>=len(sam2[j])) and str(type(sam1[i][0])!='<class \'list\'>'):
                    distance.append(cluster_distance(sam1[i],sam2[j]))
                else:
                    distance.append(np.linalg.norm(np.array(sam1[i])-np.array(sam2[j])))
    return min(distance)
def cluster_distance(clu,sample):
    #print('INSIDE CLUSTER_DISTANCE')
    if sample[0]!= '<class \'list\'>':
        sample = [sample]
    distance = []
    for i in range(len(clu)):
        for j in range(len(sample)):
            distance.append(np.linalg.norm(np.array(clu[i])-np.array(sample[j])))
    return min(distance)

标签: pythonjupytercluster-analysishierarchical-clustering

解决方案


推荐阅读