r - How to loop a script through a list of tibbles?
问题描述
I have a list of tibbles, dfl
.
> head(dfl)
<list_of<
tbl_df<
x : double
y : integer
z : double
form: character
>
>[6]>
[[1]]
# A tibble: 36 x 4
x y z form
<dbl> <int> <dbl> <chr>
1 409100012 107403 0.005 107403
2 1032400197 107403 0.05 107403
3 3725600001 107403 0.033 107403
4 4218200011 107403 0.036 107403
5 4873700001 107403 0.0512 107403
6 5305300007 107403 0.0075 107403
7 6488100007 107403 0.036 107403
8 7008700002 107403 0.001 107403
9 7517400002 107403 0.05 107403
10 8265300001 107403 0.0074 107403
# ... with 26 more rows
[[2]]
# A tibble: 8 x 4
x y z form
<dbl> <int> <dbl> <chr>
1 50700005 128928 0.4 128928
2 145900103 128928 0.0285 128928
3 183900065 128928 0.5 128928
4 214400008 128928 0.1 128928
5 546400001 128928 0.129 128928
6 683600191 128928 0.5 128928
7 1032400049 128928 0.5 128928
8 7295600001 128928 0.000175 128928
I have a separate script I have written that takes one of these tibbles, df
, as the input, runs it through several transformations / functions, creates a new, almost identical tibble called save
with column save$cluster
, and binds df
with save
.
What I need to do is run this script through my list of tibbles, dfl
, store new column cluster
in each of the individual tibbles within dfl
, and then convert the list of tibbles into one tibble called df
.
Here is a copy of the script I need to apply, within a function myfunction
, and how I tried to apply it after the function.
myfunction <- function(x)
{
save <- unlist(unique(df$x)) %>%
as_tibble()
df <-
pivot_wider(
df,
id_cols = x,
names_from = y,
values_from = z
)
rows <- df$x
df <- df %>% select(-x)
df[is.na(df)] <- 0
row.names(df) <-
df <- scale(df)
# Dissimilarity matrix
d <- dist(df, method = "euclidean")
# assess methods
m <- c( "average", "single", "complete", "ward")
names(m) <- c( "average", "single", "complete", "ward")
# function to compute coefficient
ac <- function(x) {
agnes(d, method = x)$ac
}
coeffs <- map_dbl(m, ac) %>%
as_tibble() %>%
mutate(method = m) %>%
filter(value == max(value))
coeffs <- matrix(data = coeffs)
method = coeffs[2,1]
# Function to compute hierarchical cluster
hc <- function(x) {
agnes(d, method = method)
}
# compute hierarchical clustering with optimal method
hc1 <- hc(method)
# determine optimal clusters
elbowplot <- fviz_nbclust(df, FUN = hcut, method = "wss")
elbow <- ggplot_build(elbowplot)
elbow <- elbow$data[[1]] %>%
as_tibble()
elbow <- elbow %>%
mutate(slope = if_else(
elbow$x == min(elbow$x), -elbow$y/elbow$x,
(elbow$y-lag(elbow$y)/(elbow$x-lag(elbow$x)))
))
elbow <- elbow %>%
mutate(lastslope = lag(elbow$slope)) %>%
mutate(nextslope = if_else(
elbow$x == max(elbow$x), elbow$slope, lead(elbow$slope)
))
elbow <- elbow %>%
mutate(slopedelta = elbow$slope - elbow$lastslope) %>%
mutate(slopedelta = as.numeric(slopedelta)) %>%
filter(x != 1) %>%
filter(slopedelta <= 1 & slopedelta > 0) %>%
slice_head() %>%
select(x)
clustercount <- matrix(data = elbow)
clusters = clustercount[1,1]
# Cut tree into 4 groups
sub_grp <- cutree(hc1, k = clusters)
# Number of members in each cluster
p <- fviz_cluster(list(data = df, cluster = sub_grp))
p
save <- save %>%
mutate(cluster = sub_grp)
rm(clustercount, clusters, coeffs, elbow, elbowplot, hc1, method, p, d, m, rows, sub_grp, ac, hc)
df <- rbind(df, save2)
}
myfunction <- purrr::possibly(myfunction, NA)
purrr::map(dfl, ~myfunction(.x))
My script works on one individual dfl
object, but I haven't been able to figure out how to apply it through my entire list of tibbles in the way I described.
I have tried different variations of map()
and for
loops, but am not having any luck. I think I am applying something incorrectly. Any thoughts?
Edit Here is a dput of dfl... the script will work on one individual tibble within dfl, such as this dataset: dataset <- dfl[[1]] %>% as_tibble()
> dput(head(dfl))
structure(list(structure(list(x = c(409100012, 1032400197, 3725600001,
4218200011, 4873700001, 5305300007, 6488100007, 7008700002, 7517400002,
8265300001, 8301900001, 8301900002, 8301900003, 8301900005, 8301900006,
8313500001, 8534800002, 8555600001, 8555600002, 8620000001, 8620000002,
8758300003, 8790700001, 8790700002, 8896500001, 8916000002, 8916000004,
9085600001, 9085600002, 9085600003, 9179900001, 9208200001, 9441800001,
9565600001, 9565600002, 9754300001), y = c(107403L, 107403L,
107403L, 107403L, 107403L, 107403L, 107403L, 107403L, 107403L,
107403L, 107403L, 107403L, 107403L, 107403L, 107403L, 107403L,
107403L, 107403L, 107403L, 107403L, 107403L, 107403L, 107403L,
107403L, 107403L, 107403L, 107403L, 107403L, 107403L, 107403L,
107403L, 107403L, 107403L, 107403L, 107403L, 107403L), z = c(0.005,
0.05, 0.033, 0.036, 0.0512, 0.0075, 0.036, 0.001, 0.05, 0.0074,
0.84, 0.0075, 0.05, 0.05, 0.0075, 0.0144, 0.033, 0.05, 0.0075,
0.0084, 0.036, 0.005, 0.036, 0.05, 5e-04, 0.036, 0.02, 0.036,
0.013, 0.005, 0.036, 0.0075, 0.01, 0.005, 0.05, 0.005), form = c("107403",
"107403", "107403", "107403", "107403", "107403", "107403", "107403",
"107403", "107403", "107403", "107403", "107403", "107403", "107403",
"107403", "107403", "107403", "107403", "107403", "107403", "107403",
"107403", "107403", "107403", "107403", "107403", "107403", "107403",
"107403", "107403", "107403", "107403", "107403", "107403", "107403"
)), row.names = c(NA, -36L), class = c("tbl_df", "tbl", "data.frame"
)), structure(list(x = c(50700005, 145900103, 183900065, 214400008,
546400001, 683600191, 1032400049, 7295600001), y = c(128928L,
128928L, 128928L, 128928L, 128928L, 128928L, 128928L, 128928L
), z = c(0.4, 0.0285, 0.5, 0.1, 0.129, 0.5, 0.5, 0.000175), form = c("128928",
"128928", "128928", "128928", "128928", "128928", "128928", "128928"
)), row.names = c(NA, -8L), class = c("tbl_df", "tbl", "data.frame"
)), structure(list(x = c(125801401, 144800345, 170600168, 170600181,
170600217, 170600235, 221400012, 221400013, 221400014, 221400015,
337700025, 337700028, 337700029, 337700032, 337700034, 337700053,
337700054, 337700073, 337700075, 337700076, 337700077, 343200058,
343200090, 352500127, 387600158, 387600159, 518500447, 518500448,
518500449, 518500450, 518500451, 518500466, 518500467, 573600090,
573600094, 578500066, 578500067, 578500076, 578500078, 578500079,
578500080, 578500081, 736400030, 736400104, 736400106, 736400107,
761600065, 862200045, 862200049, 862200051, 862200057, 862200066,
862200067, 862200078, 862200089, 862200091, 895900052, 1032400095,
1530000026, 4126000041, 4154700013, 4229100003, 4530900043, 4533700006,
4533700007, 4533700008, 4533700009, 4533700010, 4533700011, 4533700014,
4533700015, 4533700016, 4604300027, 4604300028, 4604300029, 5499800009,
5861600003, 5861600005, 5861600006, 6248100001, 6383800026, 6947000031,
6968100036, 6968100042, 7170400001, 7177000005, 7357800001, 7465500019,
7465500029, 8345100017, 8345100018, 8345100019, 8871400003, 8911000035,
9005200001), y = c(13907L, 13907L, 13907L, 13907L, 13907L, 13907L,
13907L, 13907L, 13907L, 13907L, 13907L, 13907L, 13907L, 13907L,
13907L, 13907L, 13907L, 13907L, 13907L, 13907L, 13907L, 13907L,
13907L, 13907L, 13907L, 13907L, 13907L, 13907L, 13907L, 13907L,
13907L, 13907L, 13907L, 13907L, 13907L, 13907L, 13907L, 13907L,
13907L, 13907L, 13907L, 13907L, 13907L, 13907L, 13907L, 13907L,
13907L, 13907L, 13907L, 13907L, 13907L, 13907L, 13907L, 13907L,
13907L, 13907L, 13907L, 13907L, 13907L, 13907L, 13907L, 13907L,
13907L, 13907L, 13907L, 13907L, 13907L, 13907L, 13907L, 13907L,
13907L, 13907L, 13907L, 13907L, 13907L, 13907L, 13907L, 13907L,
13907L, 13907L, 13907L, 13907L, 13907L, 13907L, 13907L, 13907L,
13907L, 13907L, 13907L, 13907L, 13907L, 13907L, 13907L, 13907L,
13907L), z = c(0.98, 0.4, 0.428, 0.428, 0.4, 0.3, 0.453, 0.4,
0.38, 0.43, 0.4, 0.45, 0.45, 0.4, 0.98, 0.98, 0.4, 0.43, 0.1,
0.5, 0.98, 0.35, 0.99, 0.3218, 0.4, 0.4, 0.97, 0.97, 0.46, 0.46,
0.4, 0.38, 0.43, 0.026, 0.017, 0.46, 0.46, 0.38, 0.97, 0.428,
0.15, 0.4, 0.3218, 0.98, 0.98, 0.98, 0.038, 0.99, 0.4, 0.99,
0.99, 0.45, 0.43, 0.99, 0.46, 0.45, 0.98, 0.98, 0.4, 0.312, 0.99,
0.3218, 0.35, 0.223, 0.208, 0.888, 0.485, 0.104, 0.414, 0.676,
0.333, 0.6899, 0.99, 0.99, 0.4, 0.35, 0.223, 0.468, 0.149, 0.99,
0.4, 0.99, 0.99, 0.4, 0.4, 0.771, 0.99, 0.4, 0.4, 0.4, 0.43,
0.46, 0.4, 0.4, 0.99), form = c("13907", "13907", "13907", "13907",
"13907", "13907", "13907", "13907", "13907", "13907", "13907",
"13907", "13907", "13907", "13907", "13907", "13907", "13907",
"13907", "13907", "13907", "13907", "13907", "13907", "13907",
"13907", "13907", "13907", "13907", "13907", "13907", "13907",
"13907", "13907", "13907", "13907", "13907", "13907", "13907",
"13907", "13907", "13907", "13907", "13907", "13907", "13907",
"13907", "13907", "13907", "13907", "13907", "13907", "13907",
"13907", "13907", "13907", "13907", "13907", "13907", "13907",
"13907", "13907", "13907", "13907", "13907", "13907", "13907",
"13907", "13907", "13907", "13907", "13907", "13907", "13907",
"13907", "13907", "13907", "13907", "13907", "13907", "13907",
"13907", "13907", "13907", "13907", "13907", "13907", "13907",
"13907", "13907", "13907", "13907", "13907", "13907", "13907"
)), row.names = c(NA, -95L), class = c("tbl_df", "tbl", "data.frame"
)), structure(list(x = c(1032400234, 1032400234, 1032400234,
1032400234, 1032400234), y = c(21801L, 69149L, 69165L, 69166L,
169101L), z = c(0.3, 0.0154, 0.0307, 0.0154, 0.041), form = c("169101",
"169101", "169101", "169101", "169101")), row.names = c(NA, -5L
), class = c("tbl_df", "tbl", "data.frame")), structure(list(
x = c(67500055, 77700108, 77700133, 77700135, 77700137, 77700139,
104300134, 357300053, 357300054, 357300067, 357300070, 357300072,
357300078, 357300079, 357300093, 574100025, 581300127, 990200002,
1032400220, 3481000035, 3481000036, 3481000037, 5075700005,
6424000064, 6677700001, 6749600001, 6761900044, 7027100032,
7527700002, 8185700001, 9145200001, 9145200005, 9145200006,
9270800001, 9533700001, 1032400234, 1032400234, 1032400234,
1032400234, 1032400234), y = c(21801L, 21801L, 21801L, 21801L,
21801L, 21801L, 21801L, 21801L, 21801L, 21801L, 21801L, 21801L,
21801L, 21801L, 21801L, 21801L, 21801L, 21801L, 21801L, 21801L,
21801L, 21801L, 21801L, 21801L, 21801L, 21801L, 21801L, 21801L,
21801L, 21801L, 21801L, 21801L, 21801L, 21801L, 21801L, 21801L,
69149L, 69165L, 69166L, 169101L), z = c(0.025, 0.035, 0.0263,
0.025, 0.0263, 0.0278, 0.37, 0.045, 0.06, 0.015, 0.018, 0.045,
0.06, 0.045, 0.06, 0.08, 0.00667, 0.25, 0.06, 0.006, 0.006,
0.006, 0.06, 0.137, 0.94, 0.0625, 0.003, 0.06, 0.05, 0.045,
0.25, 0.002, 0.009, 0.0066, 0.015, 0.3, 0.0154, 0.0307, 0.0154,
0.041), form = c("21801", "21801", "21801", "21801", "21801",
"21801", "21801", "21801", "21801", "21801", "21801", "21801",
"21801", "21801", "21801", "21801", "21801", "21801", "21801",
"21801", "21801", "21801", "21801", "21801", "21801", "21801",
"21801", "21801", "21801", "21801", "21801", "21801", "21801",
"21801", "21801", "21801", "21801", "21801", "21801", "21801"
)), row.names = c(NA, -40L), class = c("tbl_df", "tbl", "data.frame"
)), structure(list(x = c(46400699, 46400700, 46400701, 46400702,
46400712, 46400715, 46400716, 46408142, 183900249, 183900251,
183900252, 1032400207, 1032400222, 1032400223, 1070700067, 5248400005,
7117300007, 7117300009, 8276000005, 8911000022, 9051100006, 9051100009,
9092400009, 9251300001, 9251300002, 9251300003, 9251300005, 9251300006,
9358500001, 9460200002, 9460200003, 46400699, 46400700, 46400701,
46400702, 46400712, 46400715, 46400716, 46408142, 183900249,
183900251, 183900252, 1032400207, 1032400222, 1032400223, 1070700067,
5248400005, 7117300007, 7117300009, 8276000005, 8911000022, 9051100006,
9051100009, 9092400009, 9251300001, 9251300002, 9251300003, 9251300005,
9251300006, 9358500001, 9460200002, 9460200003, 183900227, 183900243,
1032400185, 1032400188, 1032400209, 1032400221, 1032400235, 1070700068,
1248700002, 5248400006, 7117300003, 7135500001, 7181400001, 8613000005,
8911000021, 8911000024, 8987100001, 9051100004, 9460200001, 9460200004,
9460200005, 183900227, 183900243, 1032400185, 1032400188, 1032400209,
1032400221, 1032400235, 1070700068, 1248700002, 5248400006, 7117300003,
7135500001, 7181400001, 8613000005, 8911000021, 8911000024, 8987100001,
9051100004, 9460200001, 9460200004, 9460200005, 183900227, 183900243,
1032400185, 1032400188, 1032400209, 1032400221, 1032400235, 1070700068,
1248700002, 5248400006, 7117300003, 7135500001, 7181400001, 8613000005,
8911000021, 8911000024, 8987100001, 9051100004, 9460200001, 9460200004,
9460200005, 183900253, 1032400208, 1032400211, 1032400212, 1032400224,
1032400229, 7117300004, 7117300005, 9051100005, 183900253, 1032400208,
1032400211, 1032400212, 1032400224, 1032400229, 7117300004, 7117300005,
9051100005, 1032400225, 1032400226, 1032400227, 1032400228, 6617100007,
6617100107, 8987100002, 1032400225, 1032400226, 1032400227, 1032400228,
6617100007, 6617100107, 8987100002), y = c(43901L, 43901L, 43901L,
43901L, 43901L, 43901L, 43901L, 43901L, 43901L, 43901L, 43901L,
43901L, 43901L, 43901L, 43901L, 43901L, 43901L, 43901L, 43901L,
43901L, 43901L, 43901L, 43901L, 43901L, 43901L, 43901L, 43901L,
43901L, 43901L, 43901L, 43901L, 69105L, 69105L, 69105L, 69105L,
69105L, 69105L, 69105L, 69105L, 69105L, 69105L, 69105L, 69105L,
69105L, 69105L, 69105L, 69105L, 69105L, 69105L, 69105L, 69105L,
69105L, 69105L, 69105L, 69105L, 69105L, 69105L, 69105L, 69105L,
69105L, 69105L, 69105L, 43901L, 43901L, 43901L, 43901L, 43901L,
43901L, 43901L, 43901L, 43901L, 43901L, 43901L, 43901L, 43901L,
43901L, 43901L, 43901L, 43901L, 43901L, 43901L, 43901L, 43901L,
69105L, 69105L, 69105L, 69105L, 69105L, 69105L, 69105L, 69105L,
69105L, 69105L, 69105L, 69105L, 69105L, 69105L, 69105L, 69105L,
69105L, 69105L, 69105L, 69105L, 69105L, 69149L, 69149L, 69149L,
69149L, 69149L, 69149L, 69149L, 69149L, 69149L, 69149L, 69149L,
69149L, 69149L, 69149L, 69149L, 69149L, 69149L, 69149L, 69149L,
69149L, 69149L, 43901L, 43901L, 43901L, 43901L, 43901L, 43901L,
43901L, 43901L, 43901L, 69149L, 69149L, 69149L, 69149L, 69149L,
69149L, 69149L, 69149L, 69149L, 43901L, 43901L, 43901L, 43901L,
43901L, 43901L, 43901L, 69175L, 69175L, 69175L, 69175L, 69175L,
69175L, 69175L), z = c(0.14, 0.14, 0.425, 0.425, 0.425, 0.425,
0.14, 0.14, 0.14, 0.05, 0.425, 0.4, 0.05, 0.1, 0.14, 0.14, 0.14,
0.425, 0.14, 0.14, 0.14, 0.05, 0.05, 0.12, 0.25, 0.048, 0.05,
0.14, 0.14, 0.08, 0.05, 0.025, 0.025, 0.075, 0.075, 0.075, 0.075,
0.025, 0.025, 0.025, 0.1, 0.075, 0.1, 0.1, 0.15, 0.025, 0.03,
0.025, 0.075, 0.025, 0.025, 0.025, 0.1, 0.1, 0.03, 0.1, 0.02,
0.1, 0.025, 0.025, 0.02, 0.1, 0.267, 0.12, 0.257, 0.4, 0.125,
0.257, 0.05, 0.257, 0.125, 0.25, 0.257, 0.10725, 0.1072, 0.257,
0.12, 0.2525, 0.12, 0.125, 0.12, 0.08, 0.257, 0.0531, 0.012,
0.05, 0.04, 0.012, 0.05, 0.02, 0.05, 0.012, 0.03, 0.05, 0.1706,
0.1706, 0.075, 0.012, 0.0506, 0.01, 0.012, 0.018, 0.008, 0.05,
0.0797, 0.018, 0.075, 0.06, 0.018, 0.075, 0.03, 0.075, 0.018,
0.03, 0.075, 0.078, 0.078, 0.05, 0.018, 0.0759, 0.014, 0.018,
0.012, 0.012, 0.075, 0.24, 0.125, 0.257, 0.24, 0.24, 0.257, 0.24,
0.24, 0.125, 0.06, 0.03, 0.125, 0.06, 0.06, 0.125, 0.06, 0.06,
0.03, 0.125, 0.125, 0.257, 0.257, 0.07, 0.00027, 0.125, 0.025,
0.025, 0.125, 0.125, 0.26, 0.00102, 0.025), form = c("43901",
"43901", "43901", "43901", "43901", "43901", "43901", "43901",
"43901", "43901", "43901", "43901", "43901", "43901", "43901",
"43901", "43901", "43901", "43901", "43901", "43901", "43901",
"43901", "43901", "43901", "43901", "43901", "43901", "43901",
"43901", "43901", "43901", "43901", "43901", "43901", "43901",
"43901", "43901", "43901", "43901", "43901", "43901", "43901",
"43901", "43901", "43901", "43901", "43901", "43901", "43901",
"43901", "43901", "43901", "43901", "43901", "43901", "43901",
"43901", "43901", "43901", "43901", "43901", "43901", "43901",
"43901", "43901", "43901", "43901", "43901", "43901", "43901",
"43901", "43901", "43901", "43901", "43901", "43901", "43901",
"43901", "43901", "43901", "43901", "43901", "43901", "43901",
"43901", "43901", "43901", "43901", "43901", "43901", "43901",
"43901", "43901", "43901", "43901", "43901", "43901", "43901",
"43901", "43901", "43901", "43901", "43901", "43901", "43901",
"43901", "43901", "43901", "43901", "43901", "43901", "43901",
"43901", "43901", "43901", "43901", "43901", "43901", "43901",
"43901", "43901", "43901", "43901", "43901", "43901", "43901",
"43901", "43901", "43901", "43901", "43901", "43901", "43901",
"43901", "43901", "43901", "43901", "43901", "43901", "43901",
"43901", "43901", "43901", "43901", "43901", "43901", "43901",
"43901", "43901", "43901", "43901", "43901", "43901", "43901",
"43901", "43901")), row.names = c(NA, -157L), class = c("tbl_df",
"tbl", "data.frame"))), ptype = structure(list(x = numeric(0),
y = integer(0), z = numeric(0), form = character(0)), class = c("tbl_df",
"tbl", "data.frame"), row.names = integer(0)), class = c("vctrs_list_of",
"vctrs_vctr", "list"))
解决方案
也许,尝试map2_df
使用map
-
library(purrr)
result <- map2_df(dfl, map(dfl, myfunction), cbind)
推荐阅读
- swift - 尝试更新 cocoapods 时出现权限错误
- c - 使用 C 中的运算符从无符号整数中获取 6 个最高有效位的最佳方法?
- go - 使用兼容接口进行恐慌接口转换
- node.js - 尝试从位置 17 的日期时间读取日期时间抛出错误
- c - 如何使用 C 中的 write() 函数将浮点数转换为字符串以打印它
- python - 在 Pandas Dataframe 中合并 2 列数据
- machine-learning - 如果不是为了提高速度,那么减少 FLOP 和参数大小的目的是什么?
- reactjs - 我需要在我的项目中调整图像的大小 react + gatsby
- javascript - 粘性导航不起作用,这是怎么回事?
- python - 为什么我的上传目录相对于项目目录,但 Django 试图从与应用程序相关的路径中获取它?