python - Flask 应用程序:是否可以替代 csv 文件的使用
问题描述
我正在使用 CSV 文件来保存用户输入,但是,我想使用 python 中的模块将变量从一个文件移动到另一个文件。在下面的代码中,我将提到我如何使用 CSV 将数据移动到另一个文件的领域。
这是 app.py
from flask import Flask, render_template, url_for, redirect, request
import pickle
import pandas as pd
import numpy as np
from forms import ticket_details
from pathlib import Path
app = Flask(__name__)
app.config['SECRET_KEY'] = '21d58b5f23bcc9fc0a8759e261dddbae'
# here we load decision tree classifier model that we saved earlier in pickl-csv-files folder
loaded_model=pickle.load(open('pickel-csv-files/model-dtc.pkl', 'rb'))
@app.route("/")
def home():
return render_template('layout.html')
@app.route("/overview")
def overview():
return render_template('overview.html')
@app.route("/prediction_results", methods = ['GET', 'POST'])
def prediction_results():
form = ticket_details()
if request.method == 'POST':
Age = form.age.data
Sex = form.sex.data
Pclass = form.pclass.data
SibSp = form.sibsp.data
Parch = form.parch.data
Fare = form.fare.data
#converting a dict to transfer to a dataframe
data = {'Pclass': [Pclass],'Sex': [Sex],'Age': [Age],'SibSp': [SibSp],'Parch': [Parch],'Fare': [Fare]}
user_df = pd.DataFrame(data)
# saving users inputs as a dataframe in a csv file
user_df.to_csv('/Users/anildhage/Titanic-Data-Prediction/titanic-app/user-entry/user.csv')
#########Moving the variable to notebook.py for processing user input
from notebook import user_inputs
#########Moving this variable to notebook.py for processing user input
#Below in the user_inputs variable which we grabbed from notebook.py
#we grab the values of the user & predict
predictionn = loaded_model.predict(user_inputs)
return render_template('prediction_results.html', predictionnn = predictionn)
@app.route("/form", methods = ['GET', 'POST'])
def form():
form = ticket_details()
return render_template('ticket_form.html', title = 'Ticket Form', form = form )
@app.route("/down_arrow")
def down_arrow():
return render_template('down_arrow.html')
@app.route("/facts")
def facts():
return render_template('facts.html')
@app.route("/prediction")
def prediction():
return render_template('prediction.html')
if __name__ == '__main__':
app.run(debug=True)
这是 notebook.py
import numpy as np
import pandas as pd
### The below processing (lines 6 to 56) is done to capture the label encoding and standard scalar variables to scale user inputs ###
train = pd.read_csv('/Users/anildhage/Titanic-Data-Prediction/titanic-app/pickel-csv-files/train.csv')
# We are dropping the columns that is not required
train.drop(columns=['PassengerId', 'Name','Ticket','Embarked','Cabin' ], axis=1, inplace=True)
# We find nan values in the Age column which we will remove now
train = train.fillna(train.Age.mean())
#remove outliers
outliers_fare = train['Fare'].quantile(1)
train = train[train.Fare < outliers_fare]
# we removed the outliers from the Fare column
# using an arithematic and quantile built in method
"""# Training the train dataset"""
x = train.iloc[:,1:7].values # we only need the data, not the index and the column
# In the above values you see Sex data which is in categorical format, lets convert it into integers
# LabelEncoder can be used to normalize labels
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
x[:,1] = le.fit_transform(x[:,1])
## The idea behind StandardScaler is that it will transform your data such
## that its distribution will have a mean value 0 and standard deviation of 1 (Standard Normal Distribution)
from sklearn.preprocessing import StandardScaler
ss = StandardScaler()
x = ss.fit_transform(x)
# Dependent feature, ready for the model
y = train.iloc[:,0]
""" Decision Tree Algorithm gives the best result for this dataset"""
from sklearn.tree import DecisionTreeClassifier
classifier = DecisionTreeClassifier(random_state=0)
classifier.fit(x,y) # model is fit
prediction_x = classifier.predict(x)
#we going to use 'accuracy score' to find out accuracy of our model
from sklearn.metrics import accuracy_score,confusion_matrix
# This accuracy percentage will tell us how similar the data is ('y' vs our model), the higher the better
print(accuracy_score(y,prediction_x)) #if you run this program on your PC, terminal will print the accuracy score
### No need to run this code if pkl file is already saved in pickel-csv-files folder ###
# #import pickle
# #with open('model-dtc.pkl', 'wb') as files:
# # pickle.dump(classifier, files)
# # Save the file in the necessary folder for loading in app.py
# """Load user inputs"""
#importing user details that's saved in .csv format
user_df = pd.read_csv('/Users/anildhage/Titanic-Data-Prediction/titanic-app/user-entry/user.csv', usecols=['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare'])
""" Now we follow the same process which we did to clean the training dataset """
#checking null values
user_df.isnull().sum()
user_df = user_df.fillna(user_df.Age.mean())
#setting dtypes to match the values to the model
user_df.Pclass = user_df.Pclass.astype(int)
user_df.Age = user_df.Age.astype(float)
user_df.SibSp = user_df.SibSp.astype(int)
user_df.Parch = user_df.Parch.astype(int)
user_df.Fare = user_df.Fare.astype(float)
# label encoding for Sex Column
user_df = user_df.values
user_df[:,1] = le.fit_transform(user_df[:,1])
#transforming scales user inputs
user_inputs = ss.transform(user_df)
因此,如果您在上面注意到 app.py 中我使用 'user_df' 作为变量以保存在 CSV 中,那么我在 notebook.py 中打开了相同的 CSV 文件进行处理。在此文件中,我抓取了“user_inputs”变量以将其传递给 app.py 以进行进一步处理。
我发现这个过程在构建机器学习方面效率不高。有时由于抓取和处理的 CSV 风格,预测是不准确的。我尝试实现模块概念,但在这两个文件之间遇到了导入错误。
有人可以帮我找到出路吗,我真的很想在这里实现模块概念或更好的东西。
解决方案
推荐阅读
- python - PermissionError:[WinError 32] 该进程无法访问该文件,因为它正在被另一个进程使用:'temp/edit.jpg'
- javascript - 用于 react-beautiful-dnd 抛出错误构建 Droppable 的寓言绑定
- angular - 如何正确显示嵌套视图?
- javascript - 使用 Azure AD 和 SPA 获取令牌失败
- azure - Azure Devops - 测试运行管理:恢复正在进行的测试
- python - 并发 Selenium Webscraping 与 ThreadPoolExecutor 错误:无法运行单击和解析
- python - 按索引和列组合两个 pd df
- python - Django - 日期时间字段过期时更新字段
- solidity - 我怎样才能通过solidity从用户那里获得LINK资助的代币?
- python - 按定义间隔中的 bp 位置值对行进行分组或合并,并将它们的计数相加(在 R 或 python 中)