r - 绘图覆盖，所以在 R 循环结束时，都从列表的最后一个元素中拉出。我究竟做错了什么？

问题描述

我已经在这个循环上停留了一段时间（从我的问题历史可以看出），但我认为我已经接近修复它了，非常感谢我在堆栈溢出方面获得的帮助。

我注意到在我的图中，每个图都使用 data_percentage_list[391]，即列表中的最后一个元素。我已经做了很多事情来试图阻止这种情况的发生，但是使用下面的代码：

# Create graphs in list

# Create titles for plots
titlenames <- c(harps)

 for (i in 1:length(harps)){

counts <- table(Y[[i]][[5]], Y[[i]][[3]])
nam <- paste("data_percentage_", i, sep ="")
assign(nam, apply(counts, 2, function(x){x*100/sum(x,na.rm=T)}))
 }

data_percentage_list <- lapply(paste0("data_percentage_",1:length(harps)), get)

# Create pdf of score breakdown
for (i in 1:length(harps)){ for(j in titlenames) {

# For Hotel Name Subtitle
hotelname <- hotel_report$`Hotel (Q15 1)`[hotel_report$`Harp Number`==j]

# Plot the Data 

pdf(file = paste0(j, ".pdf"), paper = "USr", width=8, height=7)
par(mar = c(5.1, 7, 4.1, 2.1))
nam <- paste("breakdown_", i, sep ="")
assign(nam, barplot(data_percentage_list[[i]], main = "Breakdown of Property Score Distribution", sub = hotelname, 
        col = coul, las = 1, cex.names = .6, horiz = TRUE, yaxs="i", xlab = "Percentage",
        cex.axis = .8, cex.lab = .8, cex.main = .8, cex.sub = .8))
dev.off()
}}

其中长度（竖琴）为 391，因此有 391 个图，这些图在进行时会被覆盖。因此，当我在 pdf 中打开一个绘图并刷新它时，它会更改为循环的最后一次迭代，直到最后它们都成为第 391 个属性的数据，并且具有正确的酒店名称，因为它是从 j 中提取的。

有谁知道我需要如何更改我的代码以使每个图都对应于正确的数据？意思是，breakdown_54 应该使用 data_percentage_list[54]，并保存为该数据的 pdf，breakdown_55 应该是 data_percentage_list[55]，等等？

谢谢！

编辑：在进一步研究之后跟进。

下面的代码生成了 391 个不同的图表，但 391 个 pdf 文件中的每一个都有所有 391 个图表，而不是像他们应该的那样只有它们自己的图表。

与修复上面的代码相比，在此代码中正确拆分这些 pdf 是否更容易？

# Create graphs in list

# Create titles for plots
titlenames <- c(harps)

 for (i in 1:length(harps)){

counts <- table(Y[[i]][[5]], Y[[i]][[3]])
nam <- paste("data_percentage_", i, sep ="")
assign(nam, apply(counts, 2, function(x){x*100/sum(x,na.rm=T)}))
 }

data_percentage_list <- lapply(paste0("data_percentage_",1:length(harps)), get)

# Create pdf of score breakdown
for (i in 1:length(harps)){ for(j in titlenames) {

# For Hotel Name Subtitle
hotelname <- hotel_report$`Hotel (Q15 1)`[hotel_report$`Harp Number`==j]

# Plot the Data 

pdf(file = paste0(j, ".pdf"), paper = "USr", width=8, height=7)
par(mar = c(5.1, 7, 4.1, 2.1))
nam <- paste("breakdown_", i, sep ="")
breakdown_list <- lapply(1:length(harps), function(i){
assign(nam, barplot(data_percentage_list[[i]], main = "Breakdown of Property Score Distribution", sub = hotelname, 
        col = coul, las = 1, cex.names = .6, horiz = TRUE, yaxs="i", xlab = "Percentage",
        cex.axis = .8, cex.lab = .8, cex.main = .8, cex.sub = .8))})
dev.off()
}}

再次感谢！

编辑2：试图使这更具重现性

Y 是 391 个数据帧的列表，下面的代码是 Y 中 391 个数据帧之一的输入。

structure(list(`Hotel (Q15 1)` = c("HILTON, SAN PEDRO, BELIZE", 
"HILTON, SAN PEDRO, BELIZE", "HILTON, SAN PEDRO, BELIZE", "HILTON, SAN PEDRO, BELIZE", 
"HILTON, SAN PEDRO, BELIZE", "HILTON, SAN PEDRO, BELIZE", "HILTON, SAN PEDRO, BELIZE", 
"HILTON, SAN PEDRO, BELIZE", "HILTON, SAN PEDRO, BELIZE", "HILTON, SAN PEDRO, BELIZE", 
"HILTON, SAN PEDRO, BELIZE", "HILTON, SAN PEDRO, BELIZE", "HILTON, SAN PEDRO, BELIZE", 
"HILTON, SAN PEDRO, BELIZE", "HILTON, SAN PEDRO, BELIZE", "HILTON, SAN PEDRO, BELIZE", 
"HILTON, SAN PEDRO, BELIZE", "HILTON, SAN PEDRO, BELIZE", "HILTON, SAN PEDRO, BELIZE", 
"HILTON, SAN PEDRO, BELIZE", "HILTON, SAN PEDRO, BELIZE", "HILTON, SAN PEDRO, BELIZE", 
"HILTON, SAN PEDRO, BELIZE"), `Metro Area State (Q10 1)` = c("OCONUS", 
"OCONUS", "OCONUS", "OCONUS", "OCONUS", "OCONUS", "OCONUS", "OCONUS", 
"OCONUS", "OCONUS", "OCONUS", "OCONUS", "OCONUS", "OCONUS", "OCONUS", 
"OCONUS", "OCONUS", "OCONUS", "OCONUS", "OCONUS", "OCONUS", "OCONUS", 
"OCONUS"), `Question ID` = c("Room Work Area", "Staff Knowledge", 
"Add'tl Item Working Order", "Property Maintenance", "Property Appearance", 
"Staff Knowledge", "Property Appearance", "Staff Interaction", 
"Safety/Security", "Add'tl Item Working Order", "Room Work Area", 
"Bed Quality", "Check In/Out", "Invoice Accuracy", "Staff Interaction", 
"Safety/Security", "Bed Quality", "Invoice Accuracy", "Check In/Out", 
"Safety/Security", "Invoice Accuracy", "Bed Quality", "Property Maintenance"
), `Question ID (group)` = c("Question 4 Items", "Question 4 Items", 
"Question 4 Items", "Question 4 Items", "Question 4 Items", "Question 4 Items", 
"Question 4 Items", "Question 4 Items", "Question 4 Items", "Question 4 Items", 
"Question 4 Items", "Question 4 Items", "Question 4 Items", "Question 4 Items", 
"Question 4 Items", "Question 4 Items", "Question 4 Items", "Question 4 Items", 
"Question 4 Items", "Question 4 Items", "Question 4 Items", "Question 4 Items", 
"Question 4 Items"), `Score Label` = c("7 Extremely Good", "7 Extremely Good", 
"7 Extremely Good", "7 Extremely Good", "7 Extremely Good", "6 Quite Good", 
"6 Quite Good", "6 Quite Good", "6 Quite Good", "6 Quite Good", 
"6 Quite Good", "6 Quite Good", "6 Quite Good", "7 Extremely Good", 
"7 Extremely Good", "5 Slightly Good", "7 Extremely Good", "6 Quite Good", 
"7 Extremely Good", "7 Extremely Good", "3 Slightly Poor", "5 Slightly Good", 
"6 Quite Good"), `Harp Number` = c("1111", "1111", "1111", "1111", 
"1111", "1111", "1111", "1111", "1111", "1111", "1111", "1111", 
"1111", "1111", "1111", "1111", "1111", "1111", "1111", "1111", 
"1111", "1111", "1111")), row.names = c(9380L, 9381L, 9383L, 
9384L, 9385L, 9387L, 9388L, 9389L, 9390L, 9391L, 9392L, 9393L, 
9394L, 9395L, 9396L, 9399L, 9402L, 9403L, 9404L, 9405L, 9407L, 
9408L, 9411L), class = "data.frame")

下面是 dput(harps)

dput(harps)
c("1111", "1696", "3279", "5646", "5724", "5938", "6887", "8859", 
"9368", "9508", "11569", "11644", "18661", "21418", "22460", 
"23317", "25755", "26076", "26336", "28917", "29497", "29498", 
"30465", "30619", "30629", "32784", "35578", "35588", "40390", 
"40866", "47493", "47677", "47866", "48064", "48294", "50432", 
"50667", "50773", "51857", "52125", "52146", "52383", "52432", 
"52451", "52755", "53589", "53620", "56939", "57784", "59571", 
"61276", "61283", "62329", "62666", "66058", "66553", "66741", 
"66763", "67092", "67169", "67214", "67373", "67840", "69494", 
"71343", "73906", "74550", "75285", "76253", "76335", "76361", 
"76393", "76396", "76898", "76949", "78501", "78800", "80079", 
"81035", "81620", "85043", "87026", "87219", "87304", "88683", 
"89650", "92759", "94380", "94427", "95043", "95255", "96061", 
"96677", "97269", "100135", "109591", "109743", "109971", "110414", 
"110856", "110884", "110899", "110926", "111032", "111384", "111605", 
"123136", "123411", "124380", "124753", "124848", "127565", "135185", 
"135999", "136005", "138251", "140027", "140074", "140091", "140095", 
"140159", "145523", "148284", "149639", "153676", "154790", "157239", 
"158213", "158259", "159248", "159343", "159401", "159842", "161219", 
"161725", "163154", "163653", "167172", "170199", "171936", "172095", 
"172272", "172273", "172340", "172868", "173429", "173816", "175033", 
"177012", "177150", "177361", "177383", "177692", "177892", "177965", 
"179887", "180495", "182189", "182979", "183174", "183717", "183879", 
"184076", "185191", "185341", "185675", "185961", "189276", "190279", 
"190896", "192388", "192984", "193387", "193441", "193526", "193534", 
"193605", "193613", "193614", "194274", "194794", "196133", "196546", 
"197075", "197647", "198115", "200996", "201627", "202124", "202992", 
"205802", "206405", "206880", "206990", "207423", "207483", "207723", 
"208210", "208943", "209614", "210006", "211605", "211985", "212714", 
"213707", "213803", "213842", "215961", "216533", "217963", "218029", 
"218348", "218376", "221745", "222179", "222299", "222399", "222736", 
"222882", "224539", "224624", "225339", "225346", "225368", "225553", 
"225565", "225572", "225573", "226003", "228325", "229582", "229614", 
"230871", "231228", "231402", "235196", "235538", "239409", "241353", 
"244587", "244654", "245353", "246093", "246311", "247209", "251084", 
"253732", "254388", "256996", "258464", "260958", "261655", "262754", 
"263192", "263444", "265835", "269872", "270285", "271683", "271687", 
"272664", "275922", "276312", "279909", "287731", "291167", "291988", 
"296004", "297975", "298318", "298401", "300962", "301940", "302250", 
"302702", "304896", "308049", "311490", "312027", "313227", "313603", 
"315536", "319957", "320049", "320270", "320352", "327521", "330319", 
"331054", "332070", "332426", "334213", "341876", "345820", "346263", 
"346723", "347340", "352596", "354486", "396465", "445549", "473263", 
"482701", "496665", "503123", "503365", "528259", "538396", "539834", 
"540896", "546228", "546290", "546652", "546922", "548916", "550479", 
"552466", "709416", "714793", "714861", "716337", "719021", "728913", 
"731082", "732346", "733242", "735165", "735348", "735473", "749296", 
"757777", "761782", "762104", "770251", "808540", "809896", "809951", 
"812527", "816275", "837926", "842678", "843836", "847737", "857277", 
"864044", "864495", "865468", "865951", "866108", "866502", "866547", 
"867803", "867809", "868374", "868420", "868593", "868793", "869746", 
"869748", "870953", "872490", "872579", "875200", "875288", "878016", 
"878858", "879328", "879640", "882643", "882781", "883894", "886067", 
"886876", "888522", "888560", "888820", "889693", "890261", "890264", 
"891171", "894931", "896794", "896840", "899485", "901218", "903465", 
"904381", "912517", "913354", "918968", "921083")

标签： rlistloopsplotoverwrite

解决方案

考虑 R 中的以下一般技巧，甚至可能是编程：

变量：避免使用太多变量，但直接与现有对象交互。这增强了环境变量的可维护性。冗余的一些示例包括：

titlenames <- c(harps)
nam <- paste("data_percentage_", i, sep ="")
data_percentage_list <- lapply(paste0("data_percentage_",1:length(harps)), get)

名称：为对象使用更多信息名称，因为Y将来不会通知代码读者或您自己。它似乎是一个包含较大数据框子集的列表hotel_report。更多信息名称，例如hotel_reports_df_list快速详细说明其内容和类型（即列表中的数据框）。
缩进：始终在for循环中缩进代码（可以在 RStudio 中使用键自动缩进：Ctrl/ cmd+ i）甚至在上下文管理器中，如pdf,with等。这增强了可读性和可维护性。

分配/获取：避免assign并且get通常在 R 中不推荐使用。相反，直接将您的对象保存为列表中的项目。第一个循环可以绕过将子项分配为单独变量的需要：

data_pct_matrix_list <- lapply(seq_along(harps), function(i) {
     counts <- table(Y[[i]][[5]], Y[[i]][[3]])
     pct_matrix <- apply(counts, 2, function(x) { x*100/sum(x, na.rm=TRUE)}) 

     return(pct_matrix)
})

最后assign环绕barplot也可以重构：

plot_list <- lapply(data_percentage_matrix_list, function(mat) {
   barplot(mat, main = "Breakdown of Property Score Distribution", sub = hotelname, 
           col = coul, las = 1, cex.names = .6, horiz = TRUE, yaxs="i", xlab = "Percentage",
           cex.axis = .8, cex.lab = .8, cex.main = .8, cex.sub = .8))
})

循环for：尽可能避免多个或嵌套循环。在 R 中，lapply是一个隐藏循环。您在 391 个 PDF 中的 391 个绘图的问题可能是由于嵌套lapply 在循环for中。考虑以下步骤：
1. 首先，考虑您对一个数据框对象的处理。甚至将其概括为一个单独的函数。
2. 然后，想想究竟有哪些变化可以迭代。
R 的应用系列不仅包括apply，lapply例如mapply可以运行元素循环以展平嵌套迭代或by（面向对象的包装器tapply），可以按因子列对数据帧进行子集化并对其运行操作。

在不查看样本数据的情况下，请考虑以下需要针对数据进行测试的方法。下面假设Y被定义为来自hotel_report数据帧的子集列表Harp Number。

`mapply`/`Map`接近

在等长对象data_pct_matrix_list和harps.

data_pct_matrix_list <- lapply(seq_along(harps), function(i) {
    counts <- table(Y[[i]]$`Score Label`, Y[[i]]$`Question ID`)
    pct_matrix <- apply(counts, 2, function(x) { x*100/sum(x, na.rm=TRUE) }) 

    return(pct_matrix)
})

build_pdf <- function(data, harp) {
    # For Hotel Name Subtitle
    hotelname <- hotel_report$`Hotel (Q15 1)`[hotel_report$`Harp Number` == harp]

    # Plot the Data 
    pdf(file = paste0(harp, ".pdf"), paper = "USr", width=8, height=7)
        par(mar = c(5.1, 7, 4.1, 2.1))
        
        hotel_plot <- barplot(data, main = "Breakdown of Property Score Distribution", sub = hotelname, 
                              col = coul, las = 1, cex.names = .6, horiz = TRUE, yaxs="i", xlab = "Percentage",
                              cex.axis = .8, cex.lab = .8, cex.main = .8, cex.sub = .8))
    dev.off()
    
    return(hotel_plot)
}

plot_list <- Map(build_pdf, data_pct_matrix_list, harps)

# EQUIVALENTLY:
plot_list <- mapply(build_pdf, data_pct_matrix_list, harps, SIMPLIFY=FALSE)

`by`方法

子集hotel_report数据帧通过唯一Harp Number且迭代地运行在每个子集上来构建pct_matrix和hotel_plot。这种方法结合了矩阵构建和绘图步骤。

build_pdf <- function(sub_df) {
    # Matrix build
    counts <- table(sub_df$`Score Label`, sub_df$`Question ID`)
    pct_matrix <- apply(counts, 2, function(x) { x*100/sum(x, na.rm=TRUE) }) 

    # For Hotel Name Subtitle
    hotelname <- sub_df$`Hotel (Q15 1)`[1]
    harp <- sub_df$`Harp Number`[1]

    # Plot the Data 
    pdf(file = paste0(harp, ".pdf"), paper = "USr", width=8, height=7)
        par(mar = c(5.1, 7, 4.1, 2.1))
        
        hotel_plot <- barplot(pct_matrix, main = "Breakdown of Property Score Distribution", sub = hotelname, 
                              col = coul, las = 1, cex.names = .6, horiz = TRUE, yaxs="i", xlab = "Percentage",
                              cex.axis = .8, cex.lab = .8, cex.main = .8, cex.sub = .8))
    dev.off()
    
    return(hotel_plot)
}

plot_list <- by(hotel_report, hotel_report$`Harp Number`, build_pdf)

# NEAR EQUIVALENT
plot_list <- lapply(split(hotel_report, hotel_report$`Harp Number`), build_pdf)

r - 绘图覆盖，所以在 R 循环结束时，都从列表的最后一个元素中拉出。我究竟做错了什么？

问题描述

解决方案

mapply/Map接近

by方法

推荐阅读

`mapply`/`Map`接近

`by`方法