首页 > 解决方案 > 为什么 adabag 中的预测误差是离散的?

问题描述

我有 55 个观察值表,其中包含 5 个变量(F、H、R、T、U)和 1 个分类器变量(“组”),其中我有两个组。

我通过将数据分成训练集(70%)和测试集(30%)来进行数据采样。然后我运行 adaboosting 并检查它是如何工作的。我想获得 100 个采样的 adaboost 误差分布。但是分布是离散的,只输出五个值变量:0、0.0588235294117647、0.117647058823529、0.176470588235294 和 0.235294117647059。它不会随 mfinal 参数而改变。我想应该还有更多!这个怎么运作?

我使用以下代码:


predictions<-list()

for (i in 1:100){

train.ind<-sample(nrow(df), nrow(df) * 0.7)

assign(paste0("ada",i), do.call(boosting,
c(formula=Group~F + H + R + T + U,
data=substitute(df[train.ind,]), mfinal=50, boos=FALSE,
coeflearn='Breiman'),envir = parent.frame()))

assign(paste0("pred",i), predict(ada,df[-train.ind,]))
predictions[[i]]<-get(paste0("pred",i))$error
                }

hist(100*unlist(predictions),breaks=10,
main="Error probability [%] ntrees=10. 100 sampling operations", xlab="AdaBoost error")

dput(df)
structure(list(Group = structure(c(2L, 2L, 2L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L
), .Label = c("Canines", "Sled"), class = "factor"), F = c(0.263150566678734, 
0.260347316635598, 0.26437277258488, 0.265710057607949, 0.254866055219663, 
0.263294264681227, 0.261901194801303, 0.257318268395066, 0.26420207103455, 
0.252093225560912, 0.255473253732324, 0.259067858940115, 0.259528043446917, 
0.267331491048901, 0.260246447333382, 0.26035486437815, 0.254553215708594, 
0.274074579975413, 0.262896904742862, 0.260504330262876, 0.258329960879536, 
0.262664861154909, 0.256148832094211, 0.258509128895957, 0.256292083925698, 
0.262358651734143, 0.254578103664353, 0.255386025800537, 0.264120912009577, 
0.275232714712253, 0.265375720277527, 0.267601768121804, 0.262932226832642, 
0.263633189245163, 0.262826186070212, 0.261058637786334, 0.262979366135887, 
0.259232168979912, 0.252933156025384, 0.263963451214447, 0.258511197058683, 
0.261957295373665, 0.253412282699461, 0.260748166588172, 0.263136039863289, 
0.255317062006506, 0.258822015633545, 0.252757763183064, 0.260840486010478, 
0.258620689655172, 0.263738813871524, 0.26241134751773, 0.26405425581719, 
0.263685152057245, 0.262062787572784), H = c(0.242711147002311, 
0.243850477245014, 0.245132979060713, 0.241794831140003, 0.235370262206577, 
0.241392449436832, 0.236787894677703, 0.240434935369935, 0.234076675284456, 
0.236978505926275, 0.23489414817613, 0.236461115627298, 0.241377100655228, 
0.240778565421122, 0.238954656595734, 0.237237027626932, 0.23562891291975, 
0.228247507171151, 0.235543469567304, 0.238348073568565, 0.237639956832591, 
0.237993655975811, 0.23053394888479, 0.237553985998722, 0.238716430501961, 
0.241044553515742, 0.23579805839771, 0.244646715997643, 0.245211405561299, 
0.248463204730402, 0.237910443860818, 0.23772859908127, 0.242517289073306, 
0.230376515634971, 0.239386381312522, 0.242971498213445, 0.248246377553633, 
0.245227816034538, 0.237968589560153, 0.235998092571798, 0.235639593181493, 
0.240320284697509, 0.239383587641388, 0.237939850635807, 0.240409493084614, 
0.239705089012767, 0.235291279312896, 0.237725562711216, 0.251017166425148, 
0.244410329082034, 0.247581475626206, 0.244082639531298, 0.248022977743474, 
0.246127343801762, 0.246345535241663), R = c(0.23238005068085, 
0.233913128793082, 0.232906768805408, 0.234580624702711, 0.23729616240706, 
0.232552468336102, 0.23566425708828, 0.233370934038501, 0.23413197660754, 
0.241255572873247, 0.240609653949119, 0.233790113420818, 0.239086204963073, 
0.233644719452121, 0.23849468613068, 0.236846146329206, 0.239755264655663, 
0.225925420024587, 0.239355887920232, 0.237429996633718, 0.23819641170916, 
0.232039177131833, 0.223832380603256, 0.235838907338977, 0.236669843303285, 
0.234916072348618, 0.238304558463179, 0.235904655883701, 0.232124394623714, 
0.222879222527955, 0.233232723139038, 0.233871666714818, 0.235947441217151, 
0.242585880964708, 0.234693056561268, 0.233941777691605, 0.229366135886539, 
0.23539800906269, 0.239803390172875, 0.236505714593364, 0.24647853698133, 
0.235569395017794, 0.242526379716086, 0.236207360559779, 0.234180854122081, 
0.240408036487878, 0.239601762794737, 0.245058343429191, 0.234449894103222, 
0.237875925051173, 0.230698942666106, 0.233475177304965, 0.231384358432554, 
0.233114688928642, 0.230655428424067), T = c(0.261758235638105, 
0.261889077326307, 0.257587479549, 0.257914486549337, 0.272467520166701, 
0.262760817545838, 0.265646653432713, 0.268875862196498, 0.267589277073454, 
0.269672695639567, 0.269022944142428, 0.270680912011768, 0.260008650934782, 
0.258245224077857, 0.262304209940204, 0.265561961665713, 0.270062606715993, 
0.271752492828849, 0.262203737769602, 0.263717599534841, 0.265833670578713, 
0.267302305737446, 0.289484838417743, 0.268097977766344, 0.268321642269056, 
0.261680722401497, 0.271319279474757, 0.264062602318119, 0.258543287805409, 
0.253424858029389, 0.263481112722616, 0.260797966082108, 0.258603042876902, 
0.263404414155158, 0.263094376055998, 0.262028086308617, 0.259408120423941, 
0.26014200592286, 0.269294864241588, 0.263532741620391, 0.259370672778494, 
0.262153024911032, 0.264677749943065, 0.265104622216242, 0.262273612930016, 
0.264569812492848, 0.266284942258822, 0.264458330676529, 0.253692453461153, 
0.25909305621162, 0.257980767836164, 0.260030835646007, 0.256538408006782, 
0.25707281521235, 0.260936248761486), U = c(0.276642254462421, 
0.275750907536407, 0.274138521440258, 0.279385339041277, 0.283770344294126, 
0.273124933319108, 0.276770665567999, 0.272796198013943, 0.273326789343435, 
0.278824893979485, 0.282917535762971, 0.269035729493284, 0.276381346021371, 
0.275681845488406, 0.280473043309851, 0.274957072857482, 0.279453614114969, 
0.265400901516186, 0.284438401450319, 0.275270067631668, 0.277080803992985, 
0.268341093323935, 0.26334299428362, 0.27494270078114, 0.277070411973316, 
0.276364671746617, 0.277622940087166, 0.275489489882784, 0.275412200032649, 
0.267636555236813, 0.275475938484053, 0.27914367434201, 0.281161825726141, 
0.287341513046201, 0.274277898463271, 0.272041104617345, 0.268317034458041, 
0.277054269097656, 0.276448903327891, 0.282483963758864, 0.288513266166897, 
0.280409252669039, 0.283610415243301, 0.27874587902846, 0.274619094771137, 
0.275604453090517, 0.286100299160421, 0.288513039597016, 0.270078586556683, 
0.280480764184118, 0.274123602187187, 0.277940178846747, 0.273784368554907, 
0.282369310276287, 0.277372857201026)), na.action = structure(c(`2` = 2L, 
`4` = 4L, `19` = 18L, `24` = 20L, `28` = 24L, `29` = 25L, `30` = 26L, 
`32` = 28L, `33` = 29L, `42` = 38L, `54` = 46L, `69` = 54L, `74` = 58L, 
`77` = 59L, `79` = 60L, `80` = 61L, `83` = 62L), class = "omit"), row.names = c(5L, 
6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 15L, 16L, 17L, 18L, 20L, 
25L, 26L, 27L, 31L, 41L, 44L, 46L, 47L, 48L, 50L, 51L, 52L, 55L, 
57L, 64L, 65L, 66L, 67L, 68L, 70L, 71L, 72L, 85L, 86L, 87L, 88L, 
89L, 90L, 91L, 92L, 93L, 94L, 95L, 96L, 97L, 98L, 99L, 100L, 
101L, 102L, 103L), class = "data.frame")


在此处输入图像描述

标签: rpredictionadaboostboosting

解决方案


推荐阅读