首页 > 解决方案 > Python SVM 混淆图输出

问题描述

问题

我正在尝试建立一个用于新颖性检测的一类 SVM 模型。我对 ML 相当陌生,似乎找不到与我现在遇到的问题类似的问题。我已经生成了一些随机数据,一些要训练,一些要测试,一些有保证的异常值。

代码

%matplotlib inline
import numpy as np
import pandas as pd
import array
from array import *
from sklearn import utils
from sklearn import svm 
import matplotlib.pyplot as plt
import matplotlib.font_manager
from random import gauss
from random import seed
from random import random
from random import uniform

rng = np.random.RandomState(42)
seed(42)

# generation common data
samples = 2000
ambient = 400
std_dev = 3
typical_chance = 0.001
abnormal_chance = 0.01
lower_bound = -100
upper_bound = 800

# Generate train data
i = 0;
X_train = array('f')
while i < samples:
   X_train.append(ambient+gauss(0,std_dev));
   r = random()
   if r < typical_chance:
       X_train[i] += uniform(lower_bound,upper_bound)
   i += 1

d = {'reading':X_train}
df = pd.DataFrame(data=d)
X_train = np.array(df)
X_train.reshape(-1,1)

# Generate typical test data
i = 0;
X_typical_test = array('f')
while i < samples:
   X_typical_test.append(ambient+gauss(0,std_dev));
   r = random()
   if r < typical_chance:
       X_typical_test[i] += uniform(lower_bound,upper_bound)
   i += 1

dd = {'reading':X_typical_test}
ddf = pd.DataFrame(data=dd)
X_typical_test = np.array(ddf)
X_typical_test.reshape(-1,1)

# Generate abnormal test data
i = 0;
X_abnormal_test = array('f')
while i < samples:
   X_abnormal_test.append(ambient+gauss(0,3*std_dev));
   r = random()
   if r < abnormal_chance:
       X_abnormal_test[i] += uniform(lower_bound,upper_bound)
   i += 1

ddd = {'reading':X_abnormal_test}
dddf = pd.DataFrame(data=ddd)
X_abnormal_test = np.array(dddf)
X_abnormal_test.reshape(-1,1)

#model fit
clf = svm.OneClassSVM(nu=0.001, kernel="rbf", gamma=0.001)
clf.fit(X_train)
y_pred_train = clf.predict(X_train)
y_pred_train = y_pred_train.astype(np.float32)
y_pred_typical = clf.predict(X_typical_test)
y_pred_abnormal = clf.predict(X_abnormal_test)

# Generate noise to better see distribution of points
i = 0
while i < samples:
    y_pred_train[i] += gauss(0,0.125)
    i+=1
    
i = 0
while i < samples:
    y_pred_typical[i] += gauss(0,0.125)
    y_pred_abnormal[i] += gauss(0,0.125)
    i += 1

# plot the line, the samples, and the nearest vectors to the plane
plt.title("Trained SVM")
plt.scatter(X_train, y_pred_train,c='White',s=10, edgecolor='k')
plt.xlim(290,1210)
plt.ylim(-2,2)
plt.show()
plt.title("Trained SVM - narrowed")
plt.scatter(X_train, y_pred_train,c='White',s=10, edgecolor='k')
plt.xlim(350,450)
plt.ylim(-2,2)
plt.show()

plt.title("Typical Test SVM")
plt.scatter(X_typical_test, y_pred_typical,c='green',s=10, edgecolor='k')
plt.xlim(290,1210)
plt.ylim(-2,2)
plt.show()
plt.title("Typical Test SVM - narrowed")
plt.scatter(X_typical_test, y_pred_typical,c='green',s=10, edgecolor='k')
plt.xlim(350,450)
plt.ylim(-2,2)
plt.show()

plt.title("Abnormal Test SVM")
plt.scatter(X_abnormal_test, y_pred_abnormal,c='red',s=10, edgecolor='k')
plt.xlim(290,1210)
plt.ylim(-2,2)
plt.show()
plt.title("Abnormal Test SVM - narrowed")
plt.scatter(X_abnormal_test, y_pred_abnormal,c='red',s=10, edgecolor='k')
plt.xlim(350,450)
plt.ylim(-2,2)
plt.show()

plt.title("Composite of Train, Typical Test, Abnormal Test SVM")
plt.scatter(X_train, y_pred_train,c='White',s=10, edgecolor='k')
plt.scatter(X_typical_test, y_pred_typical,c='green',s=10, edgecolor='k')
plt.scatter(X_abnormal_test, y_pred_abnormal,c='red',s=10, edgecolor='k')
plt.xlim(290,1210)
plt.ylim(-2,2)
plt.show()
plt.title("Composite - narrowed")
plt.scatter(X_train, y_pred_train,c='White',s=10, edgecolor='k')
plt.scatter(X_typical_test, y_pred_typical,c='green',s=10, edgecolor='k')
plt.scatter(X_abnormal_test, y_pred_abnormal,c='red',s=10, edgecolor='k')
plt.xlim(350,450)
plt.ylim(-2,2)
plt.show()

错误

我希望 oneClassSVM 会将这些点标记为 1 或 -1。这是我的理解。前两个散点图(经过训练)确实将数据点绘制为 1 或 -1。另一方面,其余散点图将数据点绘制为 1 或 0,或者在某些情况下为 1、0 和 -1。我很困惑为什么会这样,因为我知道 SVM 会将点分组为 1 或 -1。

我真的希望我已经足够彻底地解释了自己。任何帮助是极大的赞赏。

标签: pythonmachine-learning

解决方案


推荐阅读