首页 > 解决方案 > Pickle 不保存对象的实例

问题描述

我有一个自定义模型collaborative.py,它包含以下代码

import numpy as np 
import pandas as pd 
import pickle

class LatentCollaborativeFiltering():

    def __init__(self, numberOfRecommendations, stepSize, lamda, gamma, K):
        self.ratings_data = pd.read_csv('../data/ratings.csv')
        self.movies_data = pd.read_csv('../data/movies.csv')
        self.numberOfRecommendations = numberOfRecommendations
        self.steps = stepSize
        self.lamda = lamda 
        self.gamma = gamma 
        self.K = K
        self.R = self.read_data()
        self.N = len(self.R.index)
        self.M = len(self.R.columns)
        self.P = pd.DataFrame(np.random.rand(self.N, self.K), index = self.R.index)
        self.Q = pd.DataFrame(np.random.rand(self.M, self.K), index = self.R.columns)
        print("Csv file read successful and R matrix formed")
        self.MatrixFactorization() 

    def read_data(self):
        data = pd.merge(self.ratings_data, self.movies_data, left_on="movieId", right_on="movieId")
        data=pd.DataFrame.sort_values(data,['userId','movieId'],ascending=[0,1])
        R = pd.pivot_table(data, values='rating', index=['userId'], columns = ['movieId'])
        print("User Item Rating Matrix Returned")
        return R

    def MatrixFactorization(self):
        self.R = self.R.iloc[:10, :10]
        for step in range(self.steps):
            for i in self.R.index:
                for j in self.R.columns:
                    if self.R.loc[i,j]>0:
                        # For each rating that exists in the training set 
                        eij=self.R.loc[i,j]-np.dot(self.P.loc[i],self.Q.loc[j])
                        self.P.loc[i]=self.P.loc[i]+self.gamma*(eij*self.Q.loc[j]-self.lamda*self.P.loc[i])
                        self.Q.loc[j]=self.Q.loc[j]+self.gamma*(eij*self.P.loc[i]-self.lamda*self.Q.loc[j])
            e=0
            for i in self.R.index:
                for j in self.R.columns:
                    if self.R.loc[i,j]>0:
                        #Sum of squares of the errors in the rating
                        e= e + pow(self.R.loc[i,j]-np.dot(self.P.loc[i],self.Q.loc[j]),2)+self.lamda*(pow(np.linalg.norm(self.P.loc[i]),2)+pow(np.linalg.norm(self.Q.loc[j]),2))
                        print("Error", e)
            if e<0.001:
                print(e)
                break
            print(step)
        return self.P,self.Q


    def predict(self, activeUser):
        self.predictItemRating=pd.DataFrame(np.dot(self.P.loc[activeUser],self.Q.T),index=self.Q.index,columns=['Rating'])
        self.topRecommendations=pd.DataFrame.sort_values(self.predictItemRating,['Rating'],ascending=[0])[:3]
        self.topRecommendationTitles=self.movies_data.loc[self.movies_data.movieId.isin(self.topRecommendations.index)]
        return list(self.topRecommendationTitles.title)

myobj = LatentCollaborativeFiltering(numberOfRecommendations=10, stepSize=100, lamda=0.02, gamma=0.001, K=2)
#result = myobj.predict(5)
#print(result)

with open('colla.pkl', 'wb') as outfile:
    my_model = pickle.dump(myobj, outfile, pickle.HIGHEST_PROTOCOL)

我创建了一个类的 myobj 对象并将该对象转储为 pickle 文件。但是当我加载泡菜文件时,对象不会从先前的状态加载并再次执行所有计算。我想要的是在对象初始化时只执行一次计算,而不是一次又一次地调用类中的预测函数。任何建议都会非常有帮助。我加载腌制对象的脚本是

import pickle 
from collaborative import LatentCollaborativeFiltering

with open('colla.pkl', 'rb') as infile:
    myObj = pickle.load(infile)

print(myObj.predict(5))

在这里,当运行这个脚本时,矩阵分解的整个过程再次发生。

标签: pythonpython-3.xmachine-learningdeep-learningpickle

解决方案


推荐阅读