首页 > 解决方案 > How to loop a script through a list of tibbles?

问题描述

I have a list of tibbles, dfl.

> head(dfl)
<list_of<
  tbl_df<
    x   : double
    y   : integer
    z   : double
    form: character
  >
>[6]>
[[1]]
# A tibble: 36 x 4
            x      y      z form  
        <dbl>  <int>  <dbl> <chr> 
 1  409100012 107403 0.005  107403
 2 1032400197 107403 0.05   107403
 3 3725600001 107403 0.033  107403
 4 4218200011 107403 0.036  107403
 5 4873700001 107403 0.0512 107403
 6 5305300007 107403 0.0075 107403
 7 6488100007 107403 0.036  107403
 8 7008700002 107403 0.001  107403
 9 7517400002 107403 0.05   107403
10 8265300001 107403 0.0074 107403
# ... with 26 more rows

[[2]]
# A tibble: 8 x 4
           x      y        z form  
       <dbl>  <int>    <dbl> <chr> 
1   50700005 128928 0.4      128928
2  145900103 128928 0.0285   128928
3  183900065 128928 0.5      128928
4  214400008 128928 0.1      128928
5  546400001 128928 0.129    128928
6  683600191 128928 0.5      128928
7 1032400049 128928 0.5      128928
8 7295600001 128928 0.000175 128928

I have a separate script I have written that takes one of these tibbles, df, as the input, runs it through several transformations / functions, creates a new, almost identical tibble called save with column save$cluster, and binds df with save.

What I need to do is run this script through my list of tibbles, dfl, store new column cluster in each of the individual tibbles within dfl, and then convert the list of tibbles into one tibble called df.

Here is a copy of the script I need to apply, within a function myfunction, and how I tried to apply it after the function.


myfunction <- function(x)
{
  save <- unlist(unique(df$x)) %>%
    as_tibble()
  
  df <- 
    pivot_wider(
      df,
      id_cols = x, 
      names_from = y, 
      values_from = z 
    )
  
  rows <- df$x 
  
  df <- df %>% select(-x)
  
  df[is.na(df)] <- 0
  
  row.names(df) <- 
  
  df <- scale(df) 
  
  # Dissimilarity matrix
  d <- dist(df, method = "euclidean") 
  
  # assess methods
  m <- c( "average", "single", "complete", "ward")
  names(m) <- c( "average", "single", "complete", "ward")
  
  # function to compute coefficient
  ac <- function(x) { 
    agnes(d, method = x)$ac
  }
  
  coeffs <- map_dbl(m, ac) %>% 
    as_tibble() %>%
    mutate(method = m) %>%
    filter(value == max(value))
  
  coeffs <- matrix(data = coeffs)
  
  method = coeffs[2,1]
  
  # Function to compute hierarchical cluster
  hc <- function(x) {
    agnes(d, method = method)
  }
  
  # compute hierarchical clustering with optimal method
  hc1 <- hc(method)
  
  # determine optimal clusters
  elbowplot <- fviz_nbclust(df, FUN = hcut, method = "wss")

  elbow <- ggplot_build(elbowplot)
  
  elbow <- elbow$data[[1]] %>%
    as_tibble() 
  
  elbow <- elbow %>%
    mutate(slope = if_else(
      elbow$x == min(elbow$x), -elbow$y/elbow$x,
      (elbow$y-lag(elbow$y)/(elbow$x-lag(elbow$x)))
    ))
  
  elbow <- elbow %>%
    mutate(lastslope = lag(elbow$slope)) %>%
    mutate(nextslope = if_else(
      elbow$x == max(elbow$x), elbow$slope, lead(elbow$slope)
    ))
  
  elbow <- elbow %>%
    mutate(slopedelta = elbow$slope - elbow$lastslope) %>%
    mutate(slopedelta = as.numeric(slopedelta)) %>%
    filter(x != 1) %>%
    filter(slopedelta <= 1 & slopedelta > 0) %>%
    slice_head() %>%
    select(x) 
  
  clustercount <- matrix(data = elbow)
  
  clusters = clustercount[1,1] 
  
  # Cut tree into 4 groups
  sub_grp <- cutree(hc1, k = clusters)
  
  # Number of members in each cluster
  p <- fviz_cluster(list(data = df, cluster = sub_grp))
  p
  
  save <- save %>%
    mutate(cluster = sub_grp)
  
  rm(clustercount, clusters, coeffs, elbow, elbowplot, hc1, method, p, d, m, rows, sub_grp, ac, hc)
  
df <- rbind(df, save2)
}

myfunction <- purrr::possibly(myfunction, NA)

purrr::map(dfl, ~myfunction(.x))

My script works on one individual dfl object, but I haven't been able to figure out how to apply it through my entire list of tibbles in the way I described.

I have tried different variations of map() and for loops, but am not having any luck. I think I am applying something incorrectly. Any thoughts?

Edit Here is a dput of dfl... the script will work on one individual tibble within dfl, such as this dataset: dataset <- dfl[[1]] %>% as_tibble()

> dput(head(dfl))
structure(list(structure(list(x = c(409100012, 1032400197, 3725600001, 
4218200011, 4873700001, 5305300007, 6488100007, 7008700002, 7517400002, 
8265300001, 8301900001, 8301900002, 8301900003, 8301900005, 8301900006, 
8313500001, 8534800002, 8555600001, 8555600002, 8620000001, 8620000002, 
8758300003, 8790700001, 8790700002, 8896500001, 8916000002, 8916000004, 
9085600001, 9085600002, 9085600003, 9179900001, 9208200001, 9441800001, 
9565600001, 9565600002, 9754300001), y = c(107403L, 107403L, 
107403L, 107403L, 107403L, 107403L, 107403L, 107403L, 107403L, 
107403L, 107403L, 107403L, 107403L, 107403L, 107403L, 107403L, 
107403L, 107403L, 107403L, 107403L, 107403L, 107403L, 107403L, 
107403L, 107403L, 107403L, 107403L, 107403L, 107403L, 107403L, 
107403L, 107403L, 107403L, 107403L, 107403L, 107403L), z = c(0.005, 
0.05, 0.033, 0.036, 0.0512, 0.0075, 0.036, 0.001, 0.05, 0.0074, 
0.84, 0.0075, 0.05, 0.05, 0.0075, 0.0144, 0.033, 0.05, 0.0075, 
0.0084, 0.036, 0.005, 0.036, 0.05, 5e-04, 0.036, 0.02, 0.036, 
0.013, 0.005, 0.036, 0.0075, 0.01, 0.005, 0.05, 0.005), form = c("107403", 
"107403", "107403", "107403", "107403", "107403", "107403", "107403", 
"107403", "107403", "107403", "107403", "107403", "107403", "107403", 
"107403", "107403", "107403", "107403", "107403", "107403", "107403", 
"107403", "107403", "107403", "107403", "107403", "107403", "107403", 
"107403", "107403", "107403", "107403", "107403", "107403", "107403"
)), row.names = c(NA, -36L), class = c("tbl_df", "tbl", "data.frame"
)), structure(list(x = c(50700005, 145900103, 183900065, 214400008, 
546400001, 683600191, 1032400049, 7295600001), y = c(128928L, 
128928L, 128928L, 128928L, 128928L, 128928L, 128928L, 128928L
), z = c(0.4, 0.0285, 0.5, 0.1, 0.129, 0.5, 0.5, 0.000175), form = c("128928", 
"128928", "128928", "128928", "128928", "128928", "128928", "128928"
)), row.names = c(NA, -8L), class = c("tbl_df", "tbl", "data.frame"
)), structure(list(x = c(125801401, 144800345, 170600168, 170600181, 
170600217, 170600235, 221400012, 221400013, 221400014, 221400015, 
337700025, 337700028, 337700029, 337700032, 337700034, 337700053, 
337700054, 337700073, 337700075, 337700076, 337700077, 343200058, 
343200090, 352500127, 387600158, 387600159, 518500447, 518500448, 
518500449, 518500450, 518500451, 518500466, 518500467, 573600090, 
573600094, 578500066, 578500067, 578500076, 578500078, 578500079, 
578500080, 578500081, 736400030, 736400104, 736400106, 736400107, 
761600065, 862200045, 862200049, 862200051, 862200057, 862200066, 
862200067, 862200078, 862200089, 862200091, 895900052, 1032400095, 
1530000026, 4126000041, 4154700013, 4229100003, 4530900043, 4533700006, 
4533700007, 4533700008, 4533700009, 4533700010, 4533700011, 4533700014, 
4533700015, 4533700016, 4604300027, 4604300028, 4604300029, 5499800009, 
5861600003, 5861600005, 5861600006, 6248100001, 6383800026, 6947000031, 
6968100036, 6968100042, 7170400001, 7177000005, 7357800001, 7465500019, 
7465500029, 8345100017, 8345100018, 8345100019, 8871400003, 8911000035, 
9005200001), y = c(13907L, 13907L, 13907L, 13907L, 13907L, 13907L, 
13907L, 13907L, 13907L, 13907L, 13907L, 13907L, 13907L, 13907L, 
13907L, 13907L, 13907L, 13907L, 13907L, 13907L, 13907L, 13907L, 
13907L, 13907L, 13907L, 13907L, 13907L, 13907L, 13907L, 13907L, 
13907L, 13907L, 13907L, 13907L, 13907L, 13907L, 13907L, 13907L, 
13907L, 13907L, 13907L, 13907L, 13907L, 13907L, 13907L, 13907L, 
13907L, 13907L, 13907L, 13907L, 13907L, 13907L, 13907L, 13907L, 
13907L, 13907L, 13907L, 13907L, 13907L, 13907L, 13907L, 13907L, 
13907L, 13907L, 13907L, 13907L, 13907L, 13907L, 13907L, 13907L, 
13907L, 13907L, 13907L, 13907L, 13907L, 13907L, 13907L, 13907L, 
13907L, 13907L, 13907L, 13907L, 13907L, 13907L, 13907L, 13907L, 
13907L, 13907L, 13907L, 13907L, 13907L, 13907L, 13907L, 13907L, 
13907L), z = c(0.98, 0.4, 0.428, 0.428, 0.4, 0.3, 0.453, 0.4, 
0.38, 0.43, 0.4, 0.45, 0.45, 0.4, 0.98, 0.98, 0.4, 0.43, 0.1, 
0.5, 0.98, 0.35, 0.99, 0.3218, 0.4, 0.4, 0.97, 0.97, 0.46, 0.46, 
0.4, 0.38, 0.43, 0.026, 0.017, 0.46, 0.46, 0.38, 0.97, 0.428, 
0.15, 0.4, 0.3218, 0.98, 0.98, 0.98, 0.038, 0.99, 0.4, 0.99, 
0.99, 0.45, 0.43, 0.99, 0.46, 0.45, 0.98, 0.98, 0.4, 0.312, 0.99, 
0.3218, 0.35, 0.223, 0.208, 0.888, 0.485, 0.104, 0.414, 0.676, 
0.333, 0.6899, 0.99, 0.99, 0.4, 0.35, 0.223, 0.468, 0.149, 0.99, 
0.4, 0.99, 0.99, 0.4, 0.4, 0.771, 0.99, 0.4, 0.4, 0.4, 0.43, 
0.46, 0.4, 0.4, 0.99), form = c("13907", "13907", "13907", "13907", 
"13907", "13907", "13907", "13907", "13907", "13907", "13907", 
"13907", "13907", "13907", "13907", "13907", "13907", "13907", 
"13907", "13907", "13907", "13907", "13907", "13907", "13907", 
"13907", "13907", "13907", "13907", "13907", "13907", "13907", 
"13907", "13907", "13907", "13907", "13907", "13907", "13907", 
"13907", "13907", "13907", "13907", "13907", "13907", "13907", 
"13907", "13907", "13907", "13907", "13907", "13907", "13907", 
"13907", "13907", "13907", "13907", "13907", "13907", "13907", 
"13907", "13907", "13907", "13907", "13907", "13907", "13907", 
"13907", "13907", "13907", "13907", "13907", "13907", "13907", 
"13907", "13907", "13907", "13907", "13907", "13907", "13907", 
"13907", "13907", "13907", "13907", "13907", "13907", "13907", 
"13907", "13907", "13907", "13907", "13907", "13907", "13907"
)), row.names = c(NA, -95L), class = c("tbl_df", "tbl", "data.frame"
)), structure(list(x = c(1032400234, 1032400234, 1032400234, 
1032400234, 1032400234), y = c(21801L, 69149L, 69165L, 69166L, 
169101L), z = c(0.3, 0.0154, 0.0307, 0.0154, 0.041), form = c("169101", 
"169101", "169101", "169101", "169101")), row.names = c(NA, -5L
), class = c("tbl_df", "tbl", "data.frame")), structure(list(
    x = c(67500055, 77700108, 77700133, 77700135, 77700137, 77700139, 
    104300134, 357300053, 357300054, 357300067, 357300070, 357300072, 
    357300078, 357300079, 357300093, 574100025, 581300127, 990200002, 
    1032400220, 3481000035, 3481000036, 3481000037, 5075700005, 
    6424000064, 6677700001, 6749600001, 6761900044, 7027100032, 
    7527700002, 8185700001, 9145200001, 9145200005, 9145200006, 
    9270800001, 9533700001, 1032400234, 1032400234, 1032400234, 
    1032400234, 1032400234), y = c(21801L, 21801L, 21801L, 21801L, 
    21801L, 21801L, 21801L, 21801L, 21801L, 21801L, 21801L, 21801L, 
    21801L, 21801L, 21801L, 21801L, 21801L, 21801L, 21801L, 21801L, 
    21801L, 21801L, 21801L, 21801L, 21801L, 21801L, 21801L, 21801L, 
    21801L, 21801L, 21801L, 21801L, 21801L, 21801L, 21801L, 21801L, 
    69149L, 69165L, 69166L, 169101L), z = c(0.025, 0.035, 0.0263, 
    0.025, 0.0263, 0.0278, 0.37, 0.045, 0.06, 0.015, 0.018, 0.045, 
    0.06, 0.045, 0.06, 0.08, 0.00667, 0.25, 0.06, 0.006, 0.006, 
    0.006, 0.06, 0.137, 0.94, 0.0625, 0.003, 0.06, 0.05, 0.045, 
    0.25, 0.002, 0.009, 0.0066, 0.015, 0.3, 0.0154, 0.0307, 0.0154, 
    0.041), form = c("21801", "21801", "21801", "21801", "21801", 
    "21801", "21801", "21801", "21801", "21801", "21801", "21801", 
    "21801", "21801", "21801", "21801", "21801", "21801", "21801", 
    "21801", "21801", "21801", "21801", "21801", "21801", "21801", 
    "21801", "21801", "21801", "21801", "21801", "21801", "21801", 
    "21801", "21801", "21801", "21801", "21801", "21801", "21801"
    )), row.names = c(NA, -40L), class = c("tbl_df", "tbl", "data.frame"
)), structure(list(x = c(46400699, 46400700, 46400701, 46400702, 
46400712, 46400715, 46400716, 46408142, 183900249, 183900251, 
183900252, 1032400207, 1032400222, 1032400223, 1070700067, 5248400005, 
7117300007, 7117300009, 8276000005, 8911000022, 9051100006, 9051100009, 
9092400009, 9251300001, 9251300002, 9251300003, 9251300005, 9251300006, 
9358500001, 9460200002, 9460200003, 46400699, 46400700, 46400701, 
46400702, 46400712, 46400715, 46400716, 46408142, 183900249, 
183900251, 183900252, 1032400207, 1032400222, 1032400223, 1070700067, 
5248400005, 7117300007, 7117300009, 8276000005, 8911000022, 9051100006, 
9051100009, 9092400009, 9251300001, 9251300002, 9251300003, 9251300005, 
9251300006, 9358500001, 9460200002, 9460200003, 183900227, 183900243, 
1032400185, 1032400188, 1032400209, 1032400221, 1032400235, 1070700068, 
1248700002, 5248400006, 7117300003, 7135500001, 7181400001, 8613000005, 
8911000021, 8911000024, 8987100001, 9051100004, 9460200001, 9460200004, 
9460200005, 183900227, 183900243, 1032400185, 1032400188, 1032400209, 
1032400221, 1032400235, 1070700068, 1248700002, 5248400006, 7117300003, 
7135500001, 7181400001, 8613000005, 8911000021, 8911000024, 8987100001, 
9051100004, 9460200001, 9460200004, 9460200005, 183900227, 183900243, 
1032400185, 1032400188, 1032400209, 1032400221, 1032400235, 1070700068, 
1248700002, 5248400006, 7117300003, 7135500001, 7181400001, 8613000005, 
8911000021, 8911000024, 8987100001, 9051100004, 9460200001, 9460200004, 
9460200005, 183900253, 1032400208, 1032400211, 1032400212, 1032400224, 
1032400229, 7117300004, 7117300005, 9051100005, 183900253, 1032400208, 
1032400211, 1032400212, 1032400224, 1032400229, 7117300004, 7117300005, 
9051100005, 1032400225, 1032400226, 1032400227, 1032400228, 6617100007, 
6617100107, 8987100002, 1032400225, 1032400226, 1032400227, 1032400228, 
6617100007, 6617100107, 8987100002), y = c(43901L, 43901L, 43901L, 
43901L, 43901L, 43901L, 43901L, 43901L, 43901L, 43901L, 43901L, 
43901L, 43901L, 43901L, 43901L, 43901L, 43901L, 43901L, 43901L, 
43901L, 43901L, 43901L, 43901L, 43901L, 43901L, 43901L, 43901L, 
43901L, 43901L, 43901L, 43901L, 69105L, 69105L, 69105L, 69105L, 
69105L, 69105L, 69105L, 69105L, 69105L, 69105L, 69105L, 69105L, 
69105L, 69105L, 69105L, 69105L, 69105L, 69105L, 69105L, 69105L, 
69105L, 69105L, 69105L, 69105L, 69105L, 69105L, 69105L, 69105L, 
69105L, 69105L, 69105L, 43901L, 43901L, 43901L, 43901L, 43901L, 
43901L, 43901L, 43901L, 43901L, 43901L, 43901L, 43901L, 43901L, 
43901L, 43901L, 43901L, 43901L, 43901L, 43901L, 43901L, 43901L, 
69105L, 69105L, 69105L, 69105L, 69105L, 69105L, 69105L, 69105L, 
69105L, 69105L, 69105L, 69105L, 69105L, 69105L, 69105L, 69105L, 
69105L, 69105L, 69105L, 69105L, 69105L, 69149L, 69149L, 69149L, 
69149L, 69149L, 69149L, 69149L, 69149L, 69149L, 69149L, 69149L, 
69149L, 69149L, 69149L, 69149L, 69149L, 69149L, 69149L, 69149L, 
69149L, 69149L, 43901L, 43901L, 43901L, 43901L, 43901L, 43901L, 
43901L, 43901L, 43901L, 69149L, 69149L, 69149L, 69149L, 69149L, 
69149L, 69149L, 69149L, 69149L, 43901L, 43901L, 43901L, 43901L, 
43901L, 43901L, 43901L, 69175L, 69175L, 69175L, 69175L, 69175L, 
69175L, 69175L), z = c(0.14, 0.14, 0.425, 0.425, 0.425, 0.425, 
0.14, 0.14, 0.14, 0.05, 0.425, 0.4, 0.05, 0.1, 0.14, 0.14, 0.14, 
0.425, 0.14, 0.14, 0.14, 0.05, 0.05, 0.12, 0.25, 0.048, 0.05, 
0.14, 0.14, 0.08, 0.05, 0.025, 0.025, 0.075, 0.075, 0.075, 0.075, 
0.025, 0.025, 0.025, 0.1, 0.075, 0.1, 0.1, 0.15, 0.025, 0.03, 
0.025, 0.075, 0.025, 0.025, 0.025, 0.1, 0.1, 0.03, 0.1, 0.02, 
0.1, 0.025, 0.025, 0.02, 0.1, 0.267, 0.12, 0.257, 0.4, 0.125, 
0.257, 0.05, 0.257, 0.125, 0.25, 0.257, 0.10725, 0.1072, 0.257, 
0.12, 0.2525, 0.12, 0.125, 0.12, 0.08, 0.257, 0.0531, 0.012, 
0.05, 0.04, 0.012, 0.05, 0.02, 0.05, 0.012, 0.03, 0.05, 0.1706, 
0.1706, 0.075, 0.012, 0.0506, 0.01, 0.012, 0.018, 0.008, 0.05, 
0.0797, 0.018, 0.075, 0.06, 0.018, 0.075, 0.03, 0.075, 0.018, 
0.03, 0.075, 0.078, 0.078, 0.05, 0.018, 0.0759, 0.014, 0.018, 
0.012, 0.012, 0.075, 0.24, 0.125, 0.257, 0.24, 0.24, 0.257, 0.24, 
0.24, 0.125, 0.06, 0.03, 0.125, 0.06, 0.06, 0.125, 0.06, 0.06, 
0.03, 0.125, 0.125, 0.257, 0.257, 0.07, 0.00027, 0.125, 0.025, 
0.025, 0.125, 0.125, 0.26, 0.00102, 0.025), form = c("43901", 
"43901", "43901", "43901", "43901", "43901", "43901", "43901", 
"43901", "43901", "43901", "43901", "43901", "43901", "43901", 
"43901", "43901", "43901", "43901", "43901", "43901", "43901", 
"43901", "43901", "43901", "43901", "43901", "43901", "43901", 
"43901", "43901", "43901", "43901", "43901", "43901", "43901", 
"43901", "43901", "43901", "43901", "43901", "43901", "43901", 
"43901", "43901", "43901", "43901", "43901", "43901", "43901", 
"43901", "43901", "43901", "43901", "43901", "43901", "43901", 
"43901", "43901", "43901", "43901", "43901", "43901", "43901", 
"43901", "43901", "43901", "43901", "43901", "43901", "43901", 
"43901", "43901", "43901", "43901", "43901", "43901", "43901", 
"43901", "43901", "43901", "43901", "43901", "43901", "43901", 
"43901", "43901", "43901", "43901", "43901", "43901", "43901", 
"43901", "43901", "43901", "43901", "43901", "43901", "43901", 
"43901", "43901", "43901", "43901", "43901", "43901", "43901", 
"43901", "43901", "43901", "43901", "43901", "43901", "43901", 
"43901", "43901", "43901", "43901", "43901", "43901", "43901", 
"43901", "43901", "43901", "43901", "43901", "43901", "43901", 
"43901", "43901", "43901", "43901", "43901", "43901", "43901", 
"43901", "43901", "43901", "43901", "43901", "43901", "43901", 
"43901", "43901", "43901", "43901", "43901", "43901", "43901", 
"43901", "43901", "43901", "43901", "43901", "43901", "43901", 
"43901", "43901")), row.names = c(NA, -157L), class = c("tbl_df", 
"tbl", "data.frame"))), ptype = structure(list(x = numeric(0), 
    y = integer(0), z = numeric(0), form = character(0)), class = c("tbl_df", 
"tbl", "data.frame"), row.names = integer(0)), class = c("vctrs_list_of", 
"vctrs_vctr", "list"))

标签: riterationpurrr

解决方案


也许,尝试map2_df使用map-

library(purrr)
result <- map2_df(dfl, map(dfl, myfunction), cbind)

推荐阅读