python - tkinter 显示错误：ValueError：无法将字符串转换为浮点数：

问题描述

当我运行此代码时，我收到错误。我正在尝试创建一个小应用程序来预测医疗状况是疾病还是非疾病。我在这里实现了 RandomForest。代码在 Jupyter Notebook 中执行时运行正常。但是，当我制作它的 .py 文件时，我在 clf.predict() 处遇到类型转换错误，我不明白为什么。这是我在显示错误的 .py 文件中使用的代码。

import tkinter as tk

root= tk.Tk()

canvas1 = tk.Canvas(root, width = 400, height = 600,  relief = 'raised')
canvas1.pack()

label1 = tk.Label(root, text='Predict the shit')
label1.config(font=('helvetica', 14))
canvas1.create_window(200, 25, window=label1)

label2 = tk.Label(root, text='Type G Value:')
label2.config(font=('helvetica', 10))
canvas1.create_window(200, 50, window=label2)

entry1 = tk.Entry (root) 
canvas1.create_window(200, 100, window=entry1)


label5 = tk.Label(root, text='Type H value:')
label5.config(font=('helvetica', 10))
canvas1.create_window(200, 140, window=label5)

entry2 = tk.Entry (root) 
canvas1.create_window(200, 160, window=entry2)

a1 = tk.IntVar()
a2 = tk.IntVar()

a1 = entry1.get()
a2 = entry2.get()
global lst1, lst2
lst1 = [a1]
lst2 = [a2]


def c1 ():

    import numpy as np
    import pandas as pd
    import os
    from sklearn.metrics import accuracy_score
    from sklearn.model_selection import train_test_split
    from sklearn import preprocessing
    from sklearn.preprocessing import StandardScaler
    from sklearn.linear_model import LogisticRegression
    from sklearn.ensemble import RandomForestClassifier
    
    data = pd.read_csv("C:/Users/kumar/Downloads/try/train_data.csv")
    df1 = data[['Blood_Pressure_Abnormality','Level_of_Hemoglobin','Genetic_Pedigree_Coefficient']].copy()
    x1= df1.values #returns a numpy array
    min_max_scaler = preprocessing.MinMaxScaler()
    x_scaled = min_max_scaler.fit_transform(x1)
    df1 = pd.DataFrame(x_scaled,columns=df1.columns)

    median_genetic_pedigree_coeff = df1[['Genetic_Pedigree_Coefficient']].median()
    df1['Genetic_Pedigree_Coefficient'].fillna(median_genetic_pedigree_coeff[0],inplace = True)

    # Seprating the Data frame into input(x) and target(y)
    list1 = list(df1.columns)
    list1.remove('Blood_Pressure_Abnormality')
    x = df1[list1]
    y = df1[["Blood_Pressure_Abnormality"]]
    y = np.array(y)
    y = np.ravel(y)


    # Splitting the Data into train and Test 
    X_train, X_test, Y_train, Y_test= train_test_split(x, y, test_size=0.2,random_state=42)



    clf = RandomForestClassifier(bootstrap=True, class_weight=None,
                           criterion='gini', max_depth=11, max_features='auto',
                           max_leaf_nodes=None,
                           min_impurity_decrease=0.0, min_impurity_split=None,
                           min_samples_leaf=1, min_samples_split=2,
                           min_weight_fraction_leaf=0.0, n_estimators=100,
                           n_jobs=None, oob_score=False, random_state=42, verbose=0,
                           warm_start=False)


    clf.fit(X_train,Y_train)
    y_pred = clf.predict(X_test)
    print(type(X_test))
    print("With accuracy of ",accuracy_score(Y_test, y_pred))



    df_new = pd.DataFrame(list(zip(lst1, lst2)),
                   columns =['H_value', 'G_Coeff'])
    print(type(df_new))
    y_pred = clf.predict(df_new)
    print(y_pred)
    hh = y_pred[0]
    print(hh)

    if hh == 1:
        label4 = tk.Label(root, text= "Benign",font=('helvetica', 10, 'bold'))
        canvas1.create_window(200, 240, window=label4)
    else:
        label4 = tk.Label(root, text= "Malignant",font=('helvetica', 10, 'bold'))
        canvas1.create_window(200, 240, window=label4)

    
button1 = tk.Button(text='The entered case is', command=c1, bg='brown', fg='white', font=('helvetica', 9, 'bold'))
canvas1.create_window(200, 200, window=button1)

root.mainloop()

我看到如下错误：

Exception in Tkinter callback
Traceback (most recent call last):
  File "C:\Users\kumar\Anaconda3\lib\tkinter\__init__.py", line 1705, in __call__
    return self.func(*args)
  File "<ipython-input-4-1bb627d3a044>", line 93, in c1
    y_pred = clf.predict(df_new)
  File "C:\Users\kumar\Anaconda3\lib\site-packages\sklearn\ensemble\forest.py", line 545, in predict
    proba = self.predict_proba(X)
  File "C:\Users\kumar\Anaconda3\lib\site-packages\sklearn\ensemble\forest.py", line 588, in predict_proba
    X = self._validate_X_predict(X)
  File "C:\Users\kumar\Anaconda3\lib\site-packages\sklearn\ensemble\forest.py", line 359, in _validate_X_predict
    return self.estimators_[0]._validate_X_predict(X, check_input=True)
  File "C:\Users\kumar\Anaconda3\lib\site-packages\sklearn\tree\tree.py", line 391, in _validate_X_predict
    X = check_array(X, dtype=DTYPE, accept_sparse="csr")
  File "C:\Users\kumar\Anaconda3\lib\site-packages\sklearn\utils\validation.py", line 496, in check_array
    array = np.asarray(array, dtype=dtype, order=order)
  File "C:\Users\kumar\Anaconda3\lib\site-packages\numpy\core\numeric.py", line 538, in asarray
    return array(a, dtype, copy=False, order=order)
ValueError: could not convert string to float:

我是编码新手。你能帮我解决这个问题吗？当我在笔记本中运行与下面相同的代码时，它运行良好，没有任何错误。只有当我在 tkinter 中运行它时，我才会看到错误。

import numpy as np
import pandas as pd
import os
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier


lst1 = [0.8]
lst2 = [0.5]
data = pd.read_csv("C:/Users/kumar/Downloads/try/train_data.csv")
df1 = data[['Blood_Pressure_Abnormality','Level_of_Hemoglobin','Genetic_Pedigree_Coefficient']].copy()
x1= df1.values #returns a numpy array
min_max_scaler = preprocessing.MinMaxScaler()
x_scaled = min_max_scaler.fit_transform(x1)
df1 = pd.DataFrame(x_scaled,columns=df1.columns)

median_genetic_pedigree_coeff = df1[['Genetic_Pedigree_Coefficient']].median()
df1['Genetic_Pedigree_Coefficient'].fillna(median_genetic_pedigree_coeff[0],inplace = True)

# Seprating the Data frame into input(x) and target(y)
list1 = list(df1.columns)
list1.remove('Blood_Pressure_Abnormality')
x = df1[list1]
y = df1[["Blood_Pressure_Abnormality"]]
y = np.array(y)
y = np.ravel(y)


# Splitting the Data into train and Test 
X_train, X_test, Y_train, Y_test= train_test_split(x, y, test_size=0.2,random_state=42)



clf = RandomForestClassifier(bootstrap=True, class_weight=None,
                       criterion='gini', max_depth=11, max_features='auto',
                       max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=100,
                       n_jobs=None, oob_score=False, random_state=42, verbose=0,
                       warm_start=False)


clf.fit(X_train,Y_train)
y_pred = clf.predict(X_test)
print(type(X_test))
print("With accuracy of ",accuracy_score(Y_test, y_pred))



df_new = pd.DataFrame(list(zip(lst1, lst2)),
               columns =['H_value', 'G_Coeff'])
print(type(df_new))
y_pred = clf.predict(df_new)
print(y_pred)
hh = y_pred[0]
print(hh)

if hh == 1:
    print(type(hh))
else:
    print(hh)

标签： pythontkinterrandom-forest

python - tkinter 显示错误：ValueError：无法将字符串转换为浮点数：

问题描述

解决方案

推荐阅读