首页 > 解决方案 > 在r中以tidyverse方式从列表列到数据框

问题描述

问题: 我想将命名列表中的列转换为数据框。但是现在只有 9 个项目,当列表中的变量达到 120 个时,其他情况会出现问题。那么,有没有一种有效的方法可以提取列表中的所有变量而无需一一调用?

这里DPUTFOO

foo <- structure(list(column_a = c("a", "b", "c"), column_b = list(list(country_code = "US", url = "https://api.twitter.com/1.1/geo/id/c3f37afa9efcf94b.json", country = "United States", place_type = "city", bounding_box = list(type = "Polygon", coordinates = structure(c(-97.928935, -97.928935, -97.580513, -97.580513, 30.127892, 30.518799, 30.518799, 30.127892), .Dim = c(1L, 4L, 2L))), full_name = "Austin, TX", attributes = structure(list(), .Names = character(0)), id = "c3f37afa9efcf94b", name = "Austin"), list(country_code = "UG", url = "https://api.twitter.com/1.1/geo/id/0092409a629e836c.json", country = "Uganda", place_type = "admin", bounding_box = list(type = "Polygon", coordinates = structure(c(32.192297, 32.192297, 32.683699, 32.683699, -0.147789, 0.585072, 0.585072, -0.147789), .Dim = c(1L, 4L, 2L))), full_name = "Wakiso, Uganda", attributes = structure(list(), .Names = character(0)), id = "0092409a629e836c", name = "Wakiso"), list(country_code = "US", url = "https://api.twitter.com/1.1/geo/id/080b8d8543aab399.json", country = "United States", place_type = "city", bounding_box = list(type = "Polygon", coordinates = structure(c(-93.399443, -93.399443, -93.203245, -93.203245, 44.78542, 44.863519, 44.863519, 44.78542), .Dim = c(1L, 4L, 2L))), full_name = "Bloomington, MN", attributes = structure(list(), .Names = character(0)), id = "080b8d8543aab399", name = "Bloomington"))), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, -3L))

数据: 我的foo数据框有两列column_a字母,column_b其中是一列列表。每个列表有 9 个项目。

library(tidyverse)

foo
#> # A tibble: 3 x 2
#>   column_a column_b        
#>   <chr>    <list>          
#> 1 a        <named list [9]>
#> 2 b        <named list [9]>
#> 3 c        <named list [9]>

str(foo[1,])
#> Classes 'tbl_df', 'tbl' and 'data.frame':    1 obs. of  2 variables:
#>  $ column_a: chr "a"
#>  $ column_b:List of 1
#>   ..$ :List of 9
#>   .. ..$ country_code: chr "US"
#>   .. ..$ url         : chr "https://api.twitter.com/1.1/geo/id/c3f37afa9efcf94b.json"
#>   .. ..$ country     : chr "United States"
#>   .. ..$ place_type  : chr "city"
#>   .. ..$ bounding_box:List of 2
#>   .. .. ..$ type       : chr "Polygon"
#>   .. .. ..$ coordinates: num [1, 1:4, 1:2] -97.9 -97.9 -97.6 -97.6 30.1 ...
#>   .. ..$ full_name   : chr "Austin, TX"
#>   .. ..$ attributes  : Named list()
#>   .. ..$ id          : chr "c3f37afa9efcf94b"
#>   .. ..$ name        : chr "Austin"

解决方案A:一种解决方案是调用列表中所有变量的名称,并使用它们map_chr一一提取。

foo %>%
  mutate(
    country_code = map_chr(column_b, "country_code"),
    country = map_chr(column_b, "country")
  )
#> # A tibble: 3 x 4
#>   column_a column_b         country_code country      
#>   <chr>    <list>           <chr>        <chr>        
#> 1 a        <named list [9]> US           United States
#> 2 b        <named list [9]> UG           Uganda       
#> 3 c        <named list [9]> US           United States

解决方案B:按照此链接,解决方案是在 内调用两次map函数mutate。但我没有成功:

foo %>%
  mutate(repo_info = column_b %>%
    map(~ .x %>%
      map_df(`[`, c("country_code", "country")))) %>%
  select(-column_b) %>%
  unnest()
#> # A tibble: 6 x 10
#>   column_a country_code url   country place_type bounding_box full_name
#>   <chr>    <chr>        <chr> <chr>   <chr>      <list>       <chr>    
#> 1 a        <NA>         <NA>  <NA>    <NA>       <NULL>       <NA>     
#> 2 a        <NA>         <NA>  <NA>    <NA>       <NULL>       <NA>     
#> 3 b        <NA>         <NA>  <NA>    <NA>       <NULL>       <NA>     
#> 4 b        <NA>         <NA>  <NA>    <NA>       <NULL>       <NA>     
#> 5 c        <NA>         <NA>  <NA>    <NA>       <NULL>       <NA>     
#> 6 c        <NA>         <NA>  <NA>    <NA>       <NULL>       <NA>     
#> # … with 3 more variables: attributes <list>, id <chr>, name <chr>

reprex 包(v0.3.0)于 2019 年 8 月 19 日创建

标签: rlistdataframedplyrpurrr

解决方案


一种选择是在提取列后,转换为 atibble然后执行unnest

library(dplyr)
library(tidyr)
foo %>%
   mutate(out = map(column_b,   ~ .x[c( "country_code", "country")] %>% 
               as_tibble)) %>% 
   unnest(out)
# A tibble: 3 x 4
#  column_a column_b         country_code country      
#  <chr>    <list>           <chr>        <chr>        
#1 a        <named list [9]> US           United States
#2 b        <named list [9]> UG           Uganda       
#3 c        <named list [9]> US           United States

如果我们需要提取完整的列

library(tibble)
foo %>%
    mutate(out = map(column_b, enframe)) %>% 
    unnest(out) %>% 
    spread(name, value) %>%
    unnest(setdiff(names(.), c("column_b", "attributes","bounding_box")))
# A tibble: 3 x 11
#  column_a column_b      attributes     bounding_box    country    country_code full_name    id         name     place_type url                             
#  <chr>    <list>        <list>         <list>          <chr>      <chr>        <chr>        <chr>      <chr>    <chr>      <chr>                           
#1 a        <named list … <named list [… <named list [2… United St… US           Austin, TX   c3f37afa9… Austin   city       https://api.twitter.com/1.1/geo…
#2 b        <named list … <named list [… <named list [2… Uganda     UG           Wakiso, Uga… 0092409a6… Wakiso   admin      https://api.twitter.com/1.1/geo…
#3 c        <named list … <named list [… <named list [2… United St… US           Bloomington… 080b8d854… Bloomin… city       https://api.twitter.com/1.1/geo…

推荐阅读