首页 > 解决方案 > Flask 应用程序:是否可以替代 csv 文件的使用

问题描述

我正在使用 CSV 文件来保存用户输入,但是,我想使用 python 中的模块将变量从一个文件移动到另一个文件。在下面的代码中,我将提到我如何使用 CSV 将数据移动到另一个文件的领域。

这是 app.py

from flask import Flask, render_template, url_for, redirect, request
import pickle
import pandas as pd
import numpy as np
from forms import ticket_details
from pathlib import Path

app = Flask(__name__)

app.config['SECRET_KEY'] = '21d58b5f23bcc9fc0a8759e261dddbae'

# here we load decision tree classifier model that we saved earlier in pickl-csv-files folder
loaded_model=pickle.load(open('pickel-csv-files/model-dtc.pkl', 'rb'))

@app.route("/")
def home():
    return render_template('layout.html')

@app.route("/overview")
def overview():
    return render_template('overview.html')

@app.route("/prediction_results",  methods = ['GET', 'POST'])
def prediction_results():
    form = ticket_details()
    if request.method == 'POST':
        Age = form.age.data
        Sex = form.sex.data
        Pclass = form.pclass.data
        SibSp = form.sibsp.data
        Parch = form.parch.data
        Fare = form.fare.data
        #converting a dict to transfer to a dataframe
        data = {'Pclass': [Pclass],'Sex': [Sex],'Age': [Age],'SibSp': [SibSp],'Parch': [Parch],'Fare': [Fare]}
        user_df = pd.DataFrame(data)
        # saving users inputs as a dataframe in a csv file
        user_df.to_csv('/Users/anildhage/Titanic-Data-Prediction/titanic-app/user-entry/user.csv')

        #########Moving the variable to notebook.py for processing user input
        from notebook import user_inputs
        #########Moving this variable to notebook.py for processing user input

        #Below in the user_inputs variable which we grabbed from notebook.py 
        #we grab the values of the user & predict
        predictionn = loaded_model.predict(user_inputs)
    return render_template('prediction_results.html', predictionnn = predictionn)

@app.route("/form", methods = ['GET', 'POST'])
def form():
    form = ticket_details()
    return render_template('ticket_form.html', title = 'Ticket Form', form = form )

@app.route("/down_arrow")
def down_arrow():
    return render_template('down_arrow.html')

@app.route("/facts")
def facts():
    return render_template('facts.html')

@app.route("/prediction")
def prediction():
    return render_template('prediction.html')

if __name__ == '__main__':
    app.run(debug=True)

这是 notebook.py

import numpy as np
import pandas as pd

### The below processing (lines 6 to 56) is done to capture the label encoding and standard scalar variables to scale user inputs ###

train = pd.read_csv('/Users/anildhage/Titanic-Data-Prediction/titanic-app/pickel-csv-files/train.csv')

# We are dropping the columns that is not required 
train.drop(columns=['PassengerId', 'Name','Ticket','Embarked','Cabin' ], axis=1, inplace=True)

# We find nan values in the Age column which we will remove now
train = train.fillna(train.Age.mean())

#remove outliers
outliers_fare = train['Fare'].quantile(1)
train = train[train.Fare < outliers_fare]
# we removed the outliers from the Fare column
# using an arithematic and quantile built in method

"""# Training the train dataset"""

x = train.iloc[:,1:7].values # we only need the data, not the index and the column

# In the above values you see Sex data which is in categorical format, lets convert it into integers 
# LabelEncoder can be used to normalize labels
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
x[:,1] = le.fit_transform(x[:,1])

## The idea behind StandardScaler is that it will transform your data such 
## that its distribution will have a mean value 0 and standard deviation of 1 (Standard Normal Distribution)
from sklearn.preprocessing import StandardScaler
ss = StandardScaler()
x = ss.fit_transform(x)

# Dependent feature, ready for the model
y = train.iloc[:,0]


""" Decision Tree Algorithm gives the best result for this dataset"""

from sklearn.tree import DecisionTreeClassifier
classifier = DecisionTreeClassifier(random_state=0)
classifier.fit(x,y) # model is fit

prediction_x = classifier.predict(x)

#we going to use 'accuracy score' to find out accuracy of our model
from sklearn.metrics import accuracy_score,confusion_matrix
# This accuracy percentage will tell us how similar the data is ('y' vs our model), the higher the better
print(accuracy_score(y,prediction_x)) #if you run this program on your PC, terminal will print the accuracy score

### No need to run this code if pkl file is already saved in pickel-csv-files folder ###
# #import pickle
# #with open('model-dtc.pkl', 'wb') as files:
# #   pickle.dump(classifier, files)
# #   Save the file in the necessary folder for loading in app.py

# """Load user inputs"""

#importing user details that's saved in .csv format
user_df = pd.read_csv('/Users/anildhage/Titanic-Data-Prediction/titanic-app/user-entry/user.csv', usecols=['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare'])

""" Now we follow the same process which we did to clean the training dataset """

#checking null values
user_df.isnull().sum()
user_df = user_df.fillna(user_df.Age.mean())

#setting dtypes to match the values to the model
user_df.Pclass = user_df.Pclass.astype(int)
user_df.Age = user_df.Age.astype(float)
user_df.SibSp = user_df.SibSp.astype(int)
user_df.Parch = user_df.Parch.astype(int)
user_df.Fare = user_df.Fare.astype(float)

# label encoding for Sex Column
user_df = user_df.values
user_df[:,1] = le.fit_transform(user_df[:,1])

#transforming scales user inputs
user_inputs = ss.transform(user_df)



因此,如果您在上面注意到 app.py 中我使用 'user_df' 作为变量以保存在 CSV 中,那么我在 notebook.py 中打开了相同的 CSV 文件进行处理。在此文件中,我抓取了“user_inputs”变量以将其传递给 app.py 以进行进一步处理。

我发现这个过程在构建机器学习方面效率不高。有时由于抓取和处理的 CSV 风格,预测是不准确的。我尝试实现模块概念,但在这两个文件之间遇到了导入错误。

有人可以帮我找到出路吗,我真的很想在这里实现模块概念或更好的东西。

标签: pythonpandasmachine-learningflaskmodule

解决方案


推荐阅读