r - Delete incomplete cases in nested dataframe using map function from purrr
问题描述
I would like to delete incomplete cases from each dataframes of a nested tibble. I did try to use the map function (purrr package), but I received the following error message "Error in parent.env(x) : argument is not an environment". I do not understand what is the problem.
Here is a reproductible example.
library(tidyverse)
gapminder_orig <- read.csv("https://raw.githubusercontent.com/swcarpentry/r-novice-gapminder/gh-pages/_episodes_rmd/data/gapminder-FiveYearData.csv")
gapminder_orig <- gapminder_orig %>%
dplyr::select(continent, country, year, pop, lifeExp, gdpPercap)
data_with_NA<-map_df(gapminder_orig[,4:6], function(x) {x[sample(c(TRUE, NA), prob = c(0.8, 0.2), size = length(x), replace = TRUE)]})
gapminder_orig_with_NA<-gapminder_orig %>%
mutate(pop=data_with_NA$pop, lifeExp=data_with_NA$lifeExp, gdpPercap=data_with_NA$gdpPercap)
gapminder_nested <- gapminder_orig_with_NA %>%
mutate(dummy_var= sample(1:3, nrow(gapminder_orig_with_NA), replace=TRUE)) %>%
group_by(continent) %>%
nest() %>%
add_column(Type=c("Full", "Full", "Subset","Subset","Subset")) %>%
add_column(Sector=c("Agriculture", "Banking", "Agriculture", "Banking", "Agriculture"))
gapminder_nested
remove_NA<-function(x) {
y <- x[complete.cases(x),]
return(y)
}
remove_NAz<-function(x, z) {
y <- x[complete.cases(x),]
return(y)
}
test<-gapminder_nested %>%
#mutate(data2 = map(.x=data, .f=filter(complete.cases(.x)))) #Does not work
#mutate(data2 = map(.x=data, .f=na.omit)) #Does not work
#mutate(data2 = map(data, ~ map_dfc(., na.omit))) #Does not work
#mutate(data2 = map(data, function(.x) remove_NA(.x))) #Does not work
mutate(data2= map2(data, Type, function(.x, .z) remove_NAz(.x, .z))) #Work but not elegant
Any idea of what is going wrong with the calls to map function? Why does it work with map2?
Thanks!
解决方案
As far as I get it at least your second approach worked fine. Also to make the first approach work use .f = ~ filter(.x, complete.cases(.x))
.
Both approaches give me the same result as your final approach using map2
library(dplyr)
library(purrr)
library(tidyr)
library(tibble)
set.seed(42)
gapminder_orig <- gapminder::gapminder
gapminder_orig <- gapminder_orig %>%
dplyr::select(continent, country, year, pop, lifeExp, gdpPercap)
data_with_NA<-map_df(gapminder_orig[,4:6], function(x) {x[sample(c(TRUE, NA), prob = c(0.8, 0.2), size = length(x), replace = TRUE)]})
gapminder_orig_with_NA<-gapminder_orig %>%
mutate(pop=data_with_NA$pop, lifeExp=data_with_NA$lifeExp, gdpPercap=data_with_NA$gdpPercap)
gapminder_nested <- gapminder_orig_with_NA %>%
mutate(dummy_var= sample(1:3, nrow(gapminder_orig_with_NA), replace=TRUE)) %>%
group_by(continent) %>%
nest() %>%
add_column(Type=c("Full", "Full", "Subset","Subset","Subset")) %>%
add_column(Sector=c("Agriculture", "Banking", "Agriculture", "Banking", "Agriculture"))
remove_NAz<-function(x, z) {
y <- x[complete.cases(x),]
return(y)
}
gapminder_nested %>%
mutate(data2 = map(data, ~ filter(.x, complete.cases(.x))))
#> # A tibble: 5 x 5
#> # Groups: continent [5]
#> continent data Type Sector data2
#> <fct> <list> <chr> <chr> <list>
#> 1 Asia <tibble [396 x 6]> Full Agriculture <tibble [185 x 6]>
#> 2 Europe <tibble [360 x 6]> Full Banking <tibble [195 x 6]>
#> 3 Africa <tibble [624 x 6]> Subset Agriculture <tibble [311 x 6]>
#> 4 Americas <tibble [300 x 6]> Subset Banking <tibble [150 x 6]>
#> 5 Oceania <tibble [24 x 6]> Subset Agriculture <tibble [10 x 6]>
gapminder_nested %>%
mutate(data2 = map(.x=data, .f=na.omit))
#> # A tibble: 5 x 5
#> # Groups: continent [5]
#> continent data Type Sector data2
#> <fct> <list> <chr> <chr> <list>
#> 1 Asia <tibble [396 x 6]> Full Agriculture <tibble [185 x 6]>
#> 2 Europe <tibble [360 x 6]> Full Banking <tibble [195 x 6]>
#> 3 Africa <tibble [624 x 6]> Subset Agriculture <tibble [311 x 6]>
#> 4 Americas <tibble [300 x 6]> Subset Banking <tibble [150 x 6]>
#> 5 Oceania <tibble [24 x 6]> Subset Agriculture <tibble [10 x 6]>
gapminder_nested %>%
mutate(data2= map2(data, Type, function(.x, .z) remove_NAz(.x, .z)))
#> # A tibble: 5 x 5
#> # Groups: continent [5]
#> continent data Type Sector data2
#> <fct> <list> <chr> <chr> <list>
#> 1 Asia <tibble [396 x 6]> Full Agriculture <tibble [185 x 6]>
#> 2 Europe <tibble [360 x 6]> Full Banking <tibble [195 x 6]>
#> 3 Africa <tibble [624 x 6]> Subset Agriculture <tibble [311 x 6]>
#> 4 Americas <tibble [300 x 6]> Subset Banking <tibble [150 x 6]>
#> 5 Oceania <tibble [24 x 6]> Subset Agriculture <tibble [10 x 6]>
推荐阅读
- biopython - Biopython的PDB模块可以处理CONECT记录吗
- .net - Dotnet 构建失败,引用了其他 git 分支中不存在的旧文件
- powershell - Get-ChildItem 注册表项,仅提取键名
- r - 是否可以在 Shiny 中显示使用 system(wait=T) 运行的外部程序的日志?
- python - 具有多个 cmap 的 matplotlib 热图
- docker - 为什么从容器内部运行时不显示 vscode?
- azure - 使用 ARM 模板问题的 Azure 仪表板部署
- python - Pandas 使用原始日期时间索引从每日重新采样到每月
- r - R - 从具有不同 # 行的另一个数据集中添加变量
- video - 使用ffmpeg下载多个文件,每个文件保留一个流(根据默认流选择),然后将它们混合成单个文件?