python - 使用逻辑回归的泰坦尼克号机器学习问题
问题描述
我是一名有抱负的数据科学家。我偶然发现了泰坦尼克号数据集。我尝试使用逻辑回归来解决这个问题。但是,我在尝试将逻辑回归模型拟合到训练集时遇到了困难。下面是我的代码:
#importing the libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
#importing the dataset
Titanic_train = pd.read_csv('train.csv').values
Titanic_test = pd.read_csv('test.csv').values
columns = ['PassengerId', 'Survived', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp', 'Parch', 'Ticket', 'Fare', 'Cabin', 'Embarked']
Titanic_train = pd.DataFrame(Titanic_train, columns = columns )
#splitting the training data into dependent and independent variable
X = Titanic_train.loc[:,['Pclass', 'Sex','Age','SibSp','Parch','Fare']].values
Y = Titanic_train.loc[:, 'Survived'].values
X = pd.DataFrame(Titanic_train, columns = ['Pclass', 'Sex','Age','SibSp','Parch','Fare'])
Y = pd.DataFrame(Titanic_train, columns = ['Survived'])
#working with missing data
from sklearn.preprocessing import Imputer
imputer = Imputer(missing_values = 'NaN', strategy = 'mean', axis = 0)
imputer = imputer.fit(X[['Age']])
X[['Age']] = imputer.transform(X[['Age']])
#dealing with categorical data
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
LabelEncoder_X = LabelEncoder()
X['Sex'] = LabelEncoder_X.fit_transform(X['Sex'])
from sklearn.cross_validation import train_test_split
X_train, X_test, Y_train, y_test = train_test_split(X,Y,test_size = 0.4, random_state = 0)
from sklearn.linear_model import LogisticRegression
classifier = LogisticRegression(random_state = 0)
classifier.fit(X_train, Y_train)
# Predicting the Test set results
y_pred = classifier.predict(X_test)
******这是我不断收到的错误:
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\utils\validation.py:547: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
y = column_or_1d(y, warn=True)
Traceback (most recent call last):
File "<ipython-input-196-c1f2228de316>", line 3, in <module>
classifier.fit(X_train, Y_train)
File "C:\ProgramData\Anaconda3\lib\site-packages\sklearn\linear_model\logistic.py", line 1217, in fit
check_classification_targets(y)
File "C:\ProgramData\Anaconda3\lib\site-packages\sklearn\utils\multiclass.py", line 172, in check_classification_targets
raise ValueError("Unknown label type: %r" % y_type)
ValueError: Unknown label type: 'unknown'*****
如何修复此错误?
解决方案
您需要将标签结果Y.Survived
转换为float
. 以下代码只是运行:
Titanic_train = pd.read_csv('train.csv').values
Titanic_test = pd.read_csv('test.csv').values
columns = ['PassengerId', 'Survived', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp', 'Parch', 'Ticket', 'Fare', 'Cabin', 'Embarked']
Titanic_train = pd.DataFrame(Titanic_train, columns = columns )
#splitting the training data into dependent and independent variable
X = Titanic_train.loc[:,['Pclass', 'Sex','Age','SibSp','Parch','Fare']].values
Y = Titanic_train.loc[:, 'Survived'].values
X = pd.DataFrame(Titanic_train, columns = ['Pclass', 'Sex','Age','SibSp','Parch','Fare'])
Y = pd.DataFrame(Titanic_train, columns = ['Survived'])
Y = Y.Survived.astype("float")
#working with missing data
from sklearn.preprocessing import Imputer
imputer = Imputer(missing_values = 'NaN', strategy = 'mean', axis = 0)
imputer = imputer.fit(X[['Age']])
X[['Age']] = imputer.transform(X[['Age']])
#dealing with categorical data
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
LabelEncoder_X = LabelEncoder()
X['Sex'] = LabelEncoder_X.fit_transform(X['Sex'])
from sklearn.cross_validation import train_test_split
X_train, X_test, Y_train, y_test = train_test_split(X,Y,test_size = 0.4, random_state = 0)
from sklearn.linear_model import LogisticRegression
classifier = LogisticRegression(random_state = 0)
classifier.fit(X_train, Y_train)
# Predicting the Test set results
y_pred = classifier.predict(X_test)
寻找线:
Y = Y.Survived.astype("float")
推荐阅读
- php - Update database when user is no more online
- sql - 使用 SQL Server 2016 SPLIT_STRING
- python - Networkx - 找到加权网络中出现次数最多的路径
- php - 作为函数的结果,php将关联数组放置在正常数组中
- php - 如何在不知道 $row 像 phpmyadmin 的情况下将 mysql 表数据输出到 html 表中
- android - 错误:包 rx.android.schedulers 不存在
- docker - 使用 docker compose 运行 Apache NIFI
- mysql - Mysql - 通过保留以前的数据来合并两个表
- android - 为不同操作系统版本覆盖 values-night 的正确方法
- amazon-web-services - 使用 CloudFront 部署在 S3 上的 VueJS 应用程序的“指定的密钥不存在”