首页 > 解决方案 > geom_tile 填写混淆矩阵对应的值

问题描述

我试图让混淆矩阵的颜色与每个矩阵中间的百分比值相对应。

我尝试将 geom_tile 部分填充调整为频率或百分比的各种选项,但没有运气。

valid_actualFunc <- as.factor(c(conf$ObsFunc))
valid_predFunc   <- as.factor(c(conf$PredFunc))
cfmFunc <- confusionMatrix(valid_actualFunc, valid_predFunc)


ggplotConfusionMatrix <- function(m){
  mytitle <- paste("Accuracy", percent_format()(m$overall[1]),
               "Kappa", percent_format()(m$overall[2]))
  data_c <-  mutate(group_by(as.data.frame(m$table), Prediction ), 
percentage=percent(Freq/sum(Freq)))

  p <-
ggplot(data = data_c,
       aes(x = Reference, y = Prediction)) +
geom_tile(aes(fill = Freq/sum(Freq)), colour = "white") +
scale_fill_gradient(low = "white", high = "red", na.value="white") +
geom_text(aes(x = Reference, y = Prediction, label = percentage)) +
theme(axis.text.x=element_text(angle = -90, hjust = 0), 
axis.ticks=element_blank(), legend.position="none") +
ggtitle(mytitle)+
scale_y_discrete(limits = rev(levels(as.factor(valid_predFunc))))
  return(p)
}


conf2Func=ggplotConfusionMatrix(cfmFunc)
conf2Func

目前填充不等于中间的值,即 89% 的瓷砖比 70% 的瓷砖轻

根据评论,回报是

dput(head(cfmFunc))

list(positive = NULL, table = structure(c(2331L, 102L, 262L, 
52L, 290L, 1986L, 178L, 89L, 495L, 74L, 2966L, 52L, 189L, 58L, 
92L, 800L), .Dim = c(4L, 4L), .Dimnames = list(Prediction = c("Algae", 
"Hard Coral", "Other", "Other Inv"), Reference = c("Algae", "Hard Coral", 
"Other", "Other Inv")), class = "table"), overall = c(Accuracy = 
0.807008785942492, 
Kappa = 0.730790156424558, AccuracyLower = 0.799141307917932, 
AccuracyUpper = 0.814697342402988, AccuracyNull = 0.358126996805112, 
AccuracyPValue = 0, McnemarPValue = 6.95780670112837e-62), byClass = 
structure(c(0.848562067710229, 
0.780967361384192, 0.826874825759688, 0.702370500438982, 
0.866006328243225, 
0.968687274187073, 0.917249961113703, 0.978258420637603, 
0.705295007564297, 
0.894594594594595, 0.847913093196112, 0.805639476334341, 
0.938012218745343, 
0.928553104155977, 0.904725375882172, 0.962429347223761, 
0.705295007564297, 
0.894594594594595, 0.847913093196112, 0.80563947633434, 0.848562067710229, 
0.780967361384192, 0.826874825759688, 0.702370500438982, 
0.770323859881031, 
0.833928196514802, 0.837261820748059, 0.75046904315197, 0.274261182108626, 
0.253893769968051, 0.358126996805112, 0.113718051118211, 
0.232727635782748, 
0.198282747603834, 0.296126198083067, 0.0798722044728434, 
0.329972044728434, 
0.221645367412141, 0.349241214057508, 0.0991413738019169, 
0.857284197976727, 
0.874827317785633, 0.872062393436696, 0.840314460538292), .Dim = c(4L, 
11L), .Dimnames = list(c("Class: Algae", "Class: Hard Coral", 
"Class: Other", "Class: Other Inv"), c("Sensitivity", "Specificity", 
"Pos Pred Value", "Neg Pred Value", "Precision", "Recall", "F1", 
"Prevalence", "Detection Rate", "Detection Prevalence", "Balanced 
Accuracy"
))), mode = "sens_spec", dots = list())

标签: rggplot2confusion-matrix

解决方案


如果您检查要绘制的数据集的结构str(data_c),您将看到这percentage是一个字符向量,需要将其转换为数字以用作填充渐变的连续输入。

data_c$percentage.numeric <- as.numeric(gsub("%", "", data_c$percentage))

您可以使用percentage.numericforaes fillpercentagefor aes label

ggplot(data = data_c,
       aes(x = Reference, y = Prediction)) +
  geom_tile(aes(fill = percentage.numeric), colour = "white") +
  scale_fill_gradient(low = "white", high = "red", na.value="white") +
  geom_text(aes(x = Reference, y = Prediction, label = percentage)) +
  theme(axis.text.x=element_text(angle = -90, hjust = 0), 
        axis.ticks=element_blank(), legend.position="none") +
  ggtitle(mytitle)

注意scale_y_discrete(limits = rev(levels(as.factor(valid_predFunc))))在您的示例中产生错误

Error in as.factor(valid_predFunc) : object 'valid_predFunc' not found

在此处输入图像描述


推荐阅读