首页 > 解决方案 > 转换成 python 类 IndexError

问题描述

我正在尝试将代码转换为 python 类。

我的数据框“URM”看起来像:

   user_id  anime_id user_rating
    0   1   20  7.808497
    1   3   20  8.000000
    2   5   20  6.000000
    3   6   20  7.808497
    4   10  20  7.808497

出于计算原因,我只过滤了 999 个用户。

如果我复制粘贴下面没有“自我”的所有代码,它工作正常。

当我试图潜入课堂时

像那样

参数

`class ALS: 

    def __init__(self, URM, n_factors, lambda_ , n_iterations) :
        self.URM = URM 
        self.n_factors = n_factors 
        self.lambda_ = lambda_
        self.n_iterations = n_iterations
        self.n  = max(self.URM["anime_id"])
        self.m = max(self.URM["user_id"])

用于规范化 df、度量和计算训练矩阵的函数

    def normaliseRow(self, x): 
        return x / sum(x)

    def initialiseMatrix(self, n, n_factors):
        A = abs(np.random.randn(self.n, self.n_factors))
        return np.apply_along_axis(self.normaliseRow, 1, A)

    def ratingsPred(X, Y):
        return np.dot(X, Y.T)

    def MSE(self, ratingsPred, ratingsMatrix):
        idx = ratingsMatrix > 0
        return sum((ratingsPred[idx] - ratingsMatrix[idx]) ** 2) / np.count_nonzero(ratingsMatrix)

    def compute_matrix(self) :
        Y = self.initialiseMatrix(self.n, self.n_factors)
        X = self.initialiseMatrix(self.m, self.n_factors)
        temp =  np.zeros((n, 3)) #user_id, anime_id rating_user
        for i in range(1,n): 
            temp[i,] =  [m+1,i,0]
        COL_NAME = ["user_id","anime_id", "user_rating"]
        RM = self.URM.append(pd.DataFrame(temp, columns =COL_NAME))
        RM  = RM.pivot_table(columns=['anime_id'], index =['user_id'], 
                             values='user_rating', dropna = False)
        RM = RM.fillna(0).as_matrix()
        ratingsMatrix = RM[0:self.m,0:self.n]
        nonZero = ratingsMatrix > 0
        reg = lambda_ * np.eye(n_factors,n_factors)
        return X, Y, ratingsMatrix, nonZero, reg

训练交替最小二乘法

    def train(self) : 
        X, Y, ratingsMatrix, nonZero, reg = self.compute_matrix()
        print("start training ...")
        training_process = []

        for k in range(1, self.n_iterations):
            for i in range(1, self.m):
                idx = nonZero[i,:]
                a = Y[idx,]
                b = np.dot(np.transpose(Y[idx,]), ratingsMatrix[i, idx])
                updateX = np.linalg.solve(np.dot(np.transpose(a), a) + reg, b)
                X[i,] = updateX
            for j in range(1, self.n):
                idx = nonZero[:,j]
                a = X[idx,]
                b = np.dot(np.transpose(X[idx,]), ratingsMatrix[idx, j])
                updateY = np.linalg.solve(np.dot(np.transpose(a), a) + reg, b)
                Y[j,] = updateY
        ratingsP = self.ratingsPred(X, Y)
        mse = self.MSE(ratingsP, ratingsMatrix)
        training_process.append((k, mse))
        if (k+1) % 5 == 0:
            print("Iteration: %d ; mse = %.4f" % (k+1, mse))
            return training_process

开始训练

   # df, n_factors, lambda, iteration
    als_model =  ALS(urm, 15, 0.1, 10) `
    als_mode.train()

它返回一个 IndexError

    ---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
<ipython-input-130-ce2849580784> in <module>()
----> 1 als_model.train()

<ipython-input-129-14f3d633244d> in train(self)
     53             for j in range(1, self.n):
     54                 idx = nonZero[:,j]
---> 55                 a = X[idx,]
     56                 b = np.dot(np.transpose(X[idx,]), ratingsMatrix[idx, j])
     57                 updateY = np.linalg.solve(np.dot(np.transpose(a), a) + reg, b)

IndexError: boolean index did not match indexed array along dimension 0; dimension is 34240 but corresponding boolean dimension is 999

标签: pythonpandasclassnumpyindex-error

解决方案


推荐阅读