首页 > 解决方案 > 用数值和 NA 折叠行的更好方法


我试图用数值和 NA 折叠行,并且脚本出现警告。我想知道是否有更好的方法可以在没有警告的情况下执行此操作 - 如果我在更大的数据集上使用它,当前的方法也需要一段时间。

structure(list(ID = c(12345, 12345, 12345, 23456, 23456, 34567, 
34567, 34567, 45678), cohort_0 = c(10.1, NA, NA, 12, NA, 15.5, 
NA, NA, NA), cohort_2 = c(NA, 10.1, NA, NA, NA, NA, NA, NA, NA
), cohort_7 = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_), cohort_9 = c(NA, NA, 
NA, NA, 12, NA, NA, NA, NA), cohort_11 = c(NA, NA, NA, NA, NA, 
NA, NA, 15.5, NA)), row.names = c(NA, -9L), class = c("tbl_df", 
"tbl", "data.frame"))


abc2 <- abc %>% group_by(ID) %>%
  summarize_all(~ max(as.character(.), na.rm = TRUE)) %>%


> warnings()
Warning messages:
1: In max(as.character(.), na.rm = TRUE) : no non-missing arguments, returning NA
2: In max(as.character(.), na.rm = TRUE) : no non-missing arguments, returning NA
3: In max(as.character(.), na.rm = TRUE) : no non-missing arguments, returning NA
4: In max(as.character(.), na.rm = TRUE) : no non-missing arguments, returning NA
5: In max(as.character(.), na.rm = TRUE) : no non-missing arguments, returning NA
6: In max(as.character(.), na.rm = TRUE) : no non-missing arguments, returning NA
7: In max(as.character(.), na.rm = TRUE) : no non-missing arguments, returning NA
8: In max(as.character(.), na.rm = TRUE) : no non-missing arguments, returning NA
9: In max(as.character(.), na.rm = TRUE) : no non-missing arguments, returning NA
10: In max(as.character(.), na.rm = TRUE) :
  no non-missing arguments, returning NA
11: In max(as.character(.), na.rm = TRUE) :
  no non-missing arguments, returning NA
12: In max(as.character(.), na.rm = TRUE) :
  no non-missing arguments, returning NA
13: In max(as.character(.), na.rm = TRUE) :
  no non-missing arguments, returning NA
14: In max(as.character(.), na.rm = TRUE) :
  no non-missing arguments, returning NA



var2 <- setDT(var)[, lapply(.SD, na.omit), by = ID]
Error in `[.data.table`(setDT(var), , lapply(.SD, na.omit), by = ID) : 
  Supplied 2 items for column 2 of group 6039 which has 3 rows. The RHS length must either be 1 (single values are ok) or match the LHS length exactly. If you wish to 'recycle' the RHS please use rep() explicitly to make this intent clear to readers of your code.
In addition: Warning message:
In `[.data.table`(setDT(var), , lapply(.SD, na.omit), by = ID) :
  Item 1 of j's result for group 18 is zero length. This will be filled with 2 NAs to match the longest column in this result. Later groups may have a similar problem but only the first is reported to save filling the warning buffer.

标签: rdata.tabletidyr



x <- sort(unique(unlist(abc[-1])))
sapply(abc[-1], function(y) ifelse(x %in% y, x, NA))
#     cohort_0 cohort_2 cohort_7 cohort_9 cohort_11
#[1,]     10.1     10.1       NA       NA        NA
#[2,]     12.0       NA       NA       12        NA
#[3,]     15.5       NA       NA       NA      15.5
