Как прокрутить скрипт по списку тибблов и объединить их в один тиббл?

#r #loops #iteration #purrr

Вопрос:

ПРИМЕЧАНИЕ: см. Раздел пропускная способность и сценарий в нижнем разделе

Краткое описание проблемы

У меня есть сценарий , который я написал , который берет тиббл datainput , преобразует его в тиббл df , выполняет ряд функций / вычислений, построенных из df объекта, и в конечном итоге присоединяется к исходному datainput тибблу, df чтобы создать новый столбец под названием datainput$cluster

Вот как datainput это выглядит:

 > head(datainput)
# A tibble: 6 x 4
           p f          x g         
       <dbl> <chr>  <dbl> <chr>     
1  409100012 107403 0.005 107403   x
2  409100012 x      0.995 107403   x
3 1032400197 107403 0.05  107403   x
4 1032400197 x      0.95  107403   x
5 3725600001 107403 0.033 107403   x
6 3725600001 x      0.967 107403   x
 

И вот как datainput это выглядит, после применения приведенного выше сценария:

 > head(datainput)
# A tibble: 6 x 5
           p f          x g          cluster
       <dbl> <chr>  <dbl> <chr>        <int>
1  409100012 107403 0.005 107403   x       1
2  409100012 x      0.995 107403   x       1
3 1032400197 107403 0.05  107403   x       2
4 1032400197 x      0.95  107403   x       2
5 3725600001 107403 0.033 107403   x       2
6 3725600001 x      0.967 107403   x       2
 

Фактическая проблема , с которой я сталкиваюсь, заключается в том, что мне нужно выяснить, как применить этот сценарий не к одному тибблу в качестве datainput , а к списку тибблов под названием dfl . Я перепробовал множество вариаций лапли и т. Д., Но мне не везет.

Я думаю, что моя проблема заключается в том, как я сохраняю свой сценарий как функцию.

Может ли кто-нибудь дать какие-либо указания относительно того, как я могу применить свой скрипт к списку тибблов в dfl объекте, а затем преобразовать dfl объект в один тиббл с добавлением нового столбца?

dпутс

datainput :

 structure(list(p = c(409100012, 409100012, 1032400197, 1032400197, 
3725600001, 3725600001, 4218200011, 4218200011, 4873700001, 4873700001, 
5305300007, 5305300007, 6488100007, 6488100007, 7008700002, 7008700002, 
7517400002, 7517400002, 8265300001, 8265300001, 8301900001, 8301900001, 
8301900002, 8301900002, 8301900003, 8301900003, 8301900005, 8301900005, 
8301900006, 8301900006, 8313500001, 8313500001, 8534800002, 8534800002, 
8555600001, 8555600001, 8555600002, 8555600002, 8620000001, 8620000001, 
8620000002, 8620000002, 8758300003, 8758300003, 8790700001, 8790700001, 
8790700002, 8790700002, 8896500001, 8896500001, 8916000002, 8916000002, 
8916000004, 8916000004, 9085600001, 9085600001, 9085600002, 9085600002, 
9085600003, 9085600003, 9179900001, 9179900001, 9208200001, 9208200001, 
9441800001, 9441800001, 9565600001, 9565600001, 9565600002, 9565600002, 
9754300001, 9754300001), f = c("107403", "x", "107403", "x", 
"107403", "x", "107403", "x", "107403", "x", "107403", "x", "107403", 
"x", "107403", "x", "107403", "x", "107403", "x", "107403", "x", 
"107403", "x", "107403", "x", "107403", "x", "107403", "x", "107403", 
"x", "107403", "x", "107403", "x", "107403", "x", "107403", "x", 
"107403", "x", "107403", "x", "107403", "x", "107403", "x", "107403", 
"x", "107403", "x", "107403", "x", "107403", "x", "107403", "x", 
"107403", "x", "107403", "x", "107403", "x", "107403", "x", "107403", 
"x", "107403", "x", "107403", "x"), x = c(0.005, 0.995, 0.05, 
0.95, 0.033, 0.967, 0.036, 0.964, 0.0512, 0.9488, 0.0075, 0.9925, 
0.036, 0.964, 0.001, 0.999, 0.05, 0.95, 0.0074, 0.9926, 0.84, 
0.16, 0.0075, 0.9925, 0.05, 0.95, 0.05, 0.95, 0.0075, 0.9925, 
0.0144, 0.9856, 0.033, 0.967, 0.05, 0.95, 0.0075, 0.9925, 0.0084, 
0.9916, 0.036, 0.964, 0.005, 0.995, 0.036, 0.964, 0.05, 0.95, 
0.0005, 0.9995, 0.036, 0.964, 0.02, 0.98, 0.036, 0.964, 0.013, 
0.987, 0.005, 0.995, 0.036, 0.964, 0.0075, 0.9925, 0.01, 0.99, 
0.005, 0.995, 0.05, 0.95, 0.005, 0.995), g = c("107403   x", 
"107403   x", "107403   x", "107403   x", "107403   x", "107403   x", 
"107403   x", "107403   x", "107403   x", "107403   x", "107403   x", 
"107403   x", "107403   x", "107403   x", "107403   x", "107403   x", 
"107403   x", "107403   x", "107403   x", "107403   x", "107403   x", 
"107403   x", "107403   x", "107403   x", "107403   x", "107403   x", 
"107403   x", "107403   x", "107403   x", "107403   x", "107403   x", 
"107403   x", "107403   x", "107403   x", "107403   x", "107403   x", 
"107403   x", "107403   x", "107403   x", "107403   x", "107403   x", 
"107403   x", "107403   x", "107403   x", "107403   x", "107403   x", 
"107403   x", "107403   x", "107403   x", "107403   x", "107403   x", 
"107403   x", "107403   x", "107403   x", "107403   x", "107403   x", 
"107403   x", "107403   x", "107403   x", "107403   x", "107403   x", 
"107403   x", "107403   x", "107403   x", "107403   x", "107403   x", 
"107403   x", "107403   x", "107403   x", "107403   x", "107403   x", 
"107403   x")), row.names = c(NA, -72L), class = c("tbl_df", 
"tbl", "data.frame"))
 

script :

 # transform data
df <- 
  pivot_wider( 
    datainput,
    id_cols = "p", 
    names_from = "f", 
    values_from = "x"
  )

rows <- df$p

df <- df %>% select(-p) 

df[is.na(df)] <- 0

row.names(df) <- rows

df <- scale(df)

# compute dissimilarity matrix
d <- dist(df, method = "euclidean")

# store method names in m
m <- c( "average", "single", "complete", "ward")
names(m) <- c( "average", "single", "complete", "ward")

# function to compute coefficients
ac <- function(x) { 
  agnes(d, method = x)$ac
}

# choose best method by coefficient, store in method
coeffs <- map_dbl(m, ac) %>% #
  as_tibble %>%
  mutate(method = m) %>%
  filter(value == max(value))

coeffs <- matrix(data = coeffs)

method = coeffs[2,1]

# Function to compute hierarchical clustering using d and method
hc <- function(x) {
  agnes(d, method = method)
}

# compute hierarchical clustering with optimal method
hc1 <- hc(method)

# determine optimal clusters number by slopes of elbow plot
elbowplot <- fviz_nbclust(df, FUN = hcut, method = "wss")

elbow <- ggplot_build(elbowplot)

elbow <- elbow$data[[1]] %>%
  as_tibble() 

elbow <- elbow %>%
  mutate(slope = if_else(
    elbow$x == min(elbow$x), elbow$y/elbow$x,
    -(elbow$y-lag(elbow$y)/(elbow$x-lag(elbow$x)))
  ))

elbow <- elbow %>%
  mutate(lastslope = if_else(
    x == 1, slope, lag(elbow$slope)
    )) %>%
  mutate(nextslope = if_else(
    elbow$x == max(elbow$x), elbow$slope, lead(elbow$slope)
  )) %>%
  mutate(slopedelta = as.numeric(lastslope - slope)) %>%
  arrange(-slopedelta) %>%
  slice_head() %>%
  select(x)

clusters <- matrix(data = elbow)

clusters = clusters[1,1]

# Cut dendrogram by clusters, store in sub_grp
sub_grp <- cutree(hc1, k = clusters)

# store cluster value as column called cluster
df <- df %>%
  as_tibble()

row.names(df) <- rows

df <- df %>%
  rownames_to_column(var = "p") %>%
  mutate(cluster = sub_grp) %>%
  select(p, cluster) %>%
  mutate(p = as.double(p))

datainput <-
  left_join(datainput, df)

# rm unneccesary things
rm(clusters, coeffs, elbow, elbowplot, hc1, method, d, m, rows, sub_grp, ac, hc, df)
 

dfl :

 structure(list(structure(list(p = c(409100012, 409100012, 1032400197, 
1032400197, 3725600001, 3725600001, 4218200011, 4218200011, 4873700001, 
4873700001, 5305300007, 5305300007, 6488100007, 6488100007, 7008700002, 
7008700002, 7517400002, 7517400002, 8265300001, 8265300001, 8301900001, 
8301900001, 8301900002, 8301900002, 8301900003, 8301900003, 8301900005, 
8301900005, 8301900006, 8301900006, 8313500001, 8313500001, 8534800002, 
8534800002, 8555600001, 8555600001, 8555600002, 8555600002, 8620000001, 
8620000001, 8620000002, 8620000002, 8758300003, 8758300003, 8790700001, 
8790700001, 8790700002, 8790700002, 8896500001, 8896500001, 8916000002, 
8916000002, 8916000004, 8916000004, 9085600001, 9085600001, 9085600002, 
9085600002, 9085600003, 9085600003, 9179900001, 9179900001, 9208200001, 
9208200001, 9441800001, 9441800001, 9565600001, 9565600001, 9565600002, 
9565600002, 9754300001, 9754300001), f = c("107403", "x", "107403", 
"x", "107403", "x", "107403", "x", "107403", "x", "107403", "x", 
"107403", "x", "107403", "x", "107403", "x", "107403", "x", "107403", 
"x", "107403", "x", "107403", "x", "107403", "x", "107403", "x", 
"107403", "x", "107403", "x", "107403", "x", "107403", "x", "107403", 
"x", "107403", "x", "107403", "x", "107403", "x", "107403", "x", 
"107403", "x", "107403", "x", "107403", "x", "107403", "x", "107403", 
"x", "107403", "x", "107403", "x", "107403", "x", "107403", "x", 
"107403", "x", "107403", "x", "107403", "x"), x = c(0.005, 0.995, 
0.05, 0.95, 0.033, 0.967, 0.036, 0.964, 0.0512, 0.9488, 0.0075, 
0.9925, 0.036, 0.964, 0.001, 0.999, 0.05, 0.95, 0.0074, 0.9926, 
0.84, 0.16, 0.0075, 0.9925, 0.05, 0.95, 0.05, 0.95, 0.0075, 0.9925, 
0.0144, 0.9856, 0.033, 0.967, 0.05, 0.95, 0.0075, 0.9925, 0.0084, 
0.9916, 0.036, 0.964, 0.005, 0.995, 0.036, 0.964, 0.05, 0.95, 
0.0005, 0.9995, 0.036, 0.964, 0.02, 0.98, 0.036, 0.964, 0.013, 
0.987, 0.005, 0.995, 0.036, 0.964, 0.0075, 0.9925, 0.01, 0.99, 
0.005, 0.995, 0.05, 0.95, 0.005, 0.995), g = c("107403   x", 
"107403   x", "107403   x", "107403   x", "107403   x", "107403   x", 
"107403   x", "107403   x", "107403   x", "107403   x", "107403   x", 
"107403   x", "107403   x", "107403   x", "107403   x", "107403   x", 
"107403   x", "107403   x", "107403   x", "107403   x", "107403   x", 
"107403   x", "107403   x", "107403   x", "107403   x", "107403   x", 
"107403   x", "107403   x", "107403   x", "107403   x", "107403   x", 
"107403   x", "107403   x", "107403   x", "107403   x", "107403   x", 
"107403   x", "107403   x", "107403   x", "107403   x", "107403   x", 
"107403   x", "107403   x", "107403   x", "107403   x", "107403   x", 
"107403   x", "107403   x", "107403   x", "107403   x", "107403   x", 
"107403   x", "107403   x", "107403   x", "107403   x", "107403   x", 
"107403   x", "107403   x", "107403   x", "107403   x", "107403   x", 
"107403   x", "107403   x", "107403   x", "107403   x", "107403   x", 
"107403   x", "107403   x", "107403   x", "107403   x", "107403   x", 
"107403   x")), row.names = c(NA, -72L), class = c("tbl_df", 
"tbl", "data.frame")), structure(list(p = c(50700005, 50700005, 
145900103, 145900103, 183900065, 183900065, 214400008, 214400008, 
546400001, 546400001, 683600191, 683600191, 1032400049, 1032400049, 
7295600001, 7295600001), f = c("128928", "x", "128928", "x", 
"128928", "x", "128928", "x", "128928", "x", "128928", "x", "128928", 
"x", "128928", "x"), x = c(0.4, 0.6, 0.0285, 0.9715, 0.5, 0.5, 
0.1, 0.9, 0.129, 0.871, 0.5, 0.5, 0.5, 0.5, 0.000175, 0.999825
), g = c("128928   x", "128928   x", "128928   x", "128928   x", 
"128928   x", "128928   x", "128928   x", "128928   x", "128928   x", 
"128928   x", "128928   x", "128928   x", "128928   x", "128928   x", 
"128928   x", "128928   x")), row.names = c(NA, -16L), class = c("tbl_df", 
"tbl", "data.frame")), structure(list(p = c(125801401, 125801401, 
144800345, 144800345, 170600168, 170600168, 170600181, 170600181, 
170600217, 170600217, 170600235, 170600235, 221400012, 221400012, 
221400013, 221400013, 221400014, 221400014, 221400015, 221400015, 
337700025, 337700025, 337700028, 337700028, 337700029, 337700029, 
337700032, 337700032, 337700034, 337700034, 337700053, 337700053, 
337700054, 337700054, 337700073, 337700073, 337700075, 337700075, 
337700076, 337700076, 337700077, 337700077, 343200058, 343200058, 
343200090, 343200090, 352500127, 352500127, 387600158, 387600158, 
387600159, 387600159, 518500447, 518500447, 518500448, 518500448, 
518500449, 518500449, 518500450, 518500450, 518500451, 518500451, 
518500466, 518500466, 518500467, 518500467, 573600090, 573600090, 
573600094, 573600094, 578500066, 578500066, 578500067, 578500067, 
578500076, 578500076, 578500078, 578500078, 578500079, 578500079, 
578500080, 578500080, 578500081, 578500081, 736400030, 736400030, 
736400104, 736400104, 736400106, 736400106, 736400107, 736400107, 
761600065, 761600065, 862200045, 862200045, 862200049, 862200049, 
862200051, 862200051, 862200057, 862200057, 862200066, 862200066, 
862200067, 862200067, 862200078, 862200078, 862200089, 862200089, 
862200091, 862200091, 895900052, 895900052, 1032400095, 1032400095, 
1530000026, 1530000026, 4126000041, 4126000041, 4154700013, 4154700013, 
4229100003, 4229100003, 4530900043, 4530900043, 4533700006, 4533700006, 
4533700007, 4533700007, 4533700008, 4533700008, 4533700009, 4533700009, 
4533700010, 4533700010, 4533700011, 4533700011, 4533700014, 4533700014, 
4533700015, 4533700015, 4533700016, 4533700016, 4604300027, 4604300027, 
4604300028, 4604300028, 4604300029, 4604300029, 5499800009, 5499800009, 
5861600003, 5861600003, 5861600005, 5861600005, 5861600006, 5861600006, 
6248100001, 6248100001, 6383800026, 6383800026, 6947000031, 6947000031, 
6968100036, 6968100036, 6968100042, 6968100042, 7170400001, 7170400001, 
7177000005, 7177000005, 7357800001, 7357800001, 7465500019, 7465500019, 
7465500029, 7465500029, 8345100017, 8345100017, 8345100018, 8345100018, 
8345100019, 8345100019, 8871400003, 8871400003, 8911000035, 8911000035, 
9005200001, 9005200001), f = c("13907", "x", "13907", "x", "13907", 
"x", "13907", "x", "13907", "x", "13907", "x", "13907", "x", 
"13907", "x", "13907", "x", "13907", "x", "13907", "x", "13907", 
"x", "13907", "x", "13907", "x", "13907", "x", "13907", "x", 
"13907", "x", "13907", "x", "13907", "x", "13907", "x", "13907", 
"x", "13907", "x", "13907", "x", "13907", "x", "13907", "x", 
"13907", "x", "13907", "x", "13907", "x", "13907", "x", "13907", 
"x", "13907", "x", "13907", "x", "13907", "x", "13907", "x", 
"13907", "x", "13907", "x", "13907", "x", "13907", "x", "13907", 
"x", "13907", "x", "13907", "x", "13907", "x", "13907", "x", 
"13907", "x", "13907", "x", "13907", "x", "13907", "x", "13907", 
"x", "13907", "x", "13907", "x", "13907", "x", "13907", "x", 
"13907", "x", "13907", "x", "13907", "x", "13907", "x", "13907", 
"x", "13907", "x", "13907", "x", "13907", "x", "13907", "x", 
"13907", "x", "13907", "x", "13907", "x", "13907", "x", "13907", 
"x", "13907", "x", "13907", "x", "13907", "x", "13907", "x", 
"13907", "x", "13907", "x", "13907", "x", "13907", "x", "13907", 
"x", "13907", "x", "13907", "x", "13907", "x", "13907", "x", 
"13907", "x", "13907", "x", "13907", "x", "13907", "x", "13907", 
"x", "13907", "x", "13907", "x", "13907", "x", "13907", "x", 
"13907", "x", "13907", "x", "13907", "x", "13907", "x", "13907", 
"x", "13907", "x", "13907", "x"), x = c(0.98, 0.02, 0.4, 0.6, 
0.428, 0.572, 0.428, 0.572, 0.4, 0.6, 0.3, 0.7, 0.453, 0.547, 
0.4, 0.6, 0.38, 0.62, 0.43, 0.57, 0.4, 0.6, 0.45, 0.55, 0.45, 
0.55, 0.4, 0.6, 0.98, 0.02, 0.98, 0.02, 0.4, 0.6, 0.43, 0.57, 
0.1, 0.9, 0.5, 0.5, 0.98, 0.02, 0.35, 0.65, 0.99, 0.01, 0.3218, 
0.6782, 0.4, 0.6, 0.4, 0.6, 0.97, 0.03, 0.97, 0.03, 0.46, 0.54, 
0.46, 0.54, 0.4, 0.6, 0.38, 0.62, 0.43, 0.57, 0.026, 0.974, 0.017, 
0.983, 0.46, 0.54, 0.46, 0.54, 0.38, 0.62, 0.97, 0.03, 0.428, 
0.572, 0.15, 0.85, 0.4, 0.6, 0.3218, 0.6782, 0.98, 0.02, 0.98, 
0.02, 0.98, 0.02, 0.038, 0.962, 0.99, 0.01, 0.4, 0.6, 0.99, 0.01, 
0.99, 0.01, 0.45, 0.55, 0.43, 0.57, 0.99, 0.01, 0.46, 0.54, 0.45, 
0.55, 0.98, 0.02, 0.98, 0.02, 0.4, 0.6, 0.312, 0.688, 0.99, 0.01, 
0.3218, 0.6782, 0.35, 0.65, 0.223, 0.777, 0.208, 0.792, 0.888, 
0.112, 0.485, 0.515, 0.104, 0.896, 0.414, 0.586, 0.676, 0.324, 
0.333, 0.667, 0.6899, 0.3101, 0.99, 0.01, 0.99, 0.01, 0.4, 0.6, 
0.35, 0.65, 0.223, 0.777, 0.468, 0.532, 0.149, 0.851, 0.99, 0.01, 
0.4, 0.6, 0.99, 0.01, 0.99, 0.01, 0.4, 0.6, 0.4, 0.6, 0.771, 
0.229, 0.99, 0.01, 0.4, 0.6, 0.4, 0.6, 0.4, 0.6, 0.43, 0.57, 
0.46, 0.54, 0.4, 0.6, 0.4, 0.6, 0.99, 0.01), g = c("13907   x", 
"13907   x", "13907   x", "13907   x", "13907   x", "13907   x", 
"13907   x", "13907   x", "13907   x", "13907   x", "13907   x", 
"13907   x", "13907   x", "13907   x", "13907   x", "13907   x", 
"13907   x", "13907   x", "13907   x", "13907   x", "13907   x", 
"13907   x", "13907   x", "13907   x", "13907   x", "13907   x", 
"13907   x", "13907   x", "13907   x", "13907   x", "13907   x", 
"13907   x", "13907   x", "13907   x", "13907   x", "13907   x", 
"13907   x", "13907   x", "13907   x", "13907   x", "13907   x", 
"13907   x", "13907   x", "13907   x", "13907   x", "13907   x", 
"13907   x", "13907   x", "13907   x", "13907   x", "13907   x", 
"13907   x", "13907   x", "13907   x", "13907   x", "13907   x", 
"13907   x", "13907   x", "13907   x", "13907   x", "13907   x", 
"13907   x", "13907   x", "13907   x", "13907   x", "13907   x", 
"13907   x", "13907   x", "13907   x", "13907   x", "13907   x", 
"13907   x", "13907   x", "13907   x", "13907   x", "13907   x", 
"13907   x", "13907   x", "13907   x", "13907   x", "13907   x", 
"13907   x", "13907   x", "13907   x", "13907   x", "13907   x", 
"13907   x", "13907   x", "13907   x", "13907   x", "13907   x", 
"13907   x", "13907   x", "13907   x", "13907   x", "13907   x", 
"13907   x", "13907   x", "13907   x", "13907   x", "13907   x", 
"13907   x", "13907   x", "13907   x", "13907   x", "13907   x", 
"13907   x", "13907   x", "13907   x", "13907   x", "13907   x", 
"13907   x", "13907   x", "13907   x", "13907   x", "13907   x", 
"13907   x", "13907   x", "13907   x", "13907   x", "13907   x", 
"13907   x", "13907   x", "13907   x", "13907   x", "13907   x", 
"13907   x", "13907   x", "13907   x", "13907   x", "13907   x", 
"13907   x", "13907   x", "13907   x", "13907   x", "13907   x", 
"13907   x", "13907   x", "13907   x", "13907   x", "13907   x", 
"13907   x", "13907   x", "13907   x", "13907   x", "13907   x", 
"13907   x", "13907   x", "13907   x", "13907   x", "13907   x", 
"13907   x", "13907   x", "13907   x", "13907   x", "13907   x", 
"13907   x", "13907   x", "13907   x", "13907   x", "13907   x", 
"13907   x", "13907   x", "13907   x", "13907   x", "13907   x", 
"13907   x", "13907   x", "13907   x", "13907   x", "13907   x", 
"13907   x", "13907   x", "13907   x", "13907   x", "13907   x", 
"13907   x", "13907   x", "13907   x", "13907   x", "13907   x", 
"13907   x", "13907   x", "13907   x", "13907   x", "13907   x", 
"13907   x", "13907   x", "13907   x", "13907   x")), row.names = c(NA, 
-190L), class = c("tbl_df", "tbl", "data.frame")), structure(list(
    p = c(67500055, 67500055, 77700108, 77700108, 77700133, 77700133, 
    77700135, 77700135, 77700137, 77700137, 77700139, 77700139, 
    104300134, 104300134, 357300053, 357300053, 357300054, 357300054, 
    357300067, 357300067, 357300070, 357300070, 357300072, 357300072, 
    357300078, 357300078, 357300079, 357300079, 357300093, 357300093, 
    574100025, 574100025, 581300127, 581300127, 990200002, 990200002, 
    1032400220, 1032400220, 3481000035, 3481000035, 3481000036, 
    3481000036, 3481000037, 3481000037, 5075700005, 5075700005, 
    6424000064, 6424000064, 6677700001, 6677700001, 6749600001, 
    6749600001, 6761900044, 6761900044, 7027100032, 7027100032, 
    7527700002, 7527700002, 8185700001, 8185700001, 9145200001, 
    9145200001, 9145200005, 9145200005, 9145200006, 9145200006, 
    9270800001, 9270800001, 9533700001, 9533700001), f = c("21801", 
    "x", "21801", "x", "21801", "x", "21801", "x", "21801", "x", 
    "21801", "x", "21801", "x", "21801", "x", "21801", "x", "21801", 
    "x", "21801", "x", "21801", "x", "21801", "x", "21801", "x", 
    "21801", "x", "21801", "x", "21801", "x", "21801", "x", "21801", 
    "x", "21801", "x", "21801", "x", "21801", "x", "21801", "x", 
    "21801", "x", "21801", "x", "21801", "x", "21801", "x", "21801", 
    "x", "21801", "x", "21801", "x", "21801", "x", "21801", "x", 
    "21801", "x", "21801", "x", "21801", "x"), x = c(0.025, 0.975, 
    0.035, 0.965, 0.0263, 0.9737, 0.025, 0.975, 0.0263, 0.9737, 
    0.0278, 0.9722, 0.37, 0.63, 0.045, 0.955, 0.06, 0.94, 0.015, 
    0.985, 0.018, 0.982, 0.045, 0.955, 0.06, 0.94, 0.045, 0.955, 
    0.06, 0.94, 0.08, 0.92, 0.00667, 0.99333, 0.25, 0.75, 0.06, 
    0.94, 0.006, 0.994, 0.006, 0.994, 0.006, 0.994, 0.06, 0.94, 
    0.137, 0.863, 0.94, 0.0600000000000001, 0.0625, 0.9375, 0.003, 
    0.997, 0.06, 0.94, 0.05, 0.95, 0.045, 0.955, 0.25, 0.75, 
    0.002, 0.998, 0.009, 0.991, 0.0066, 0.9934, 0.015, 0.985), 
    g = c("21801   x", "21801   x", "21801   x", "21801   x", 
    "21801   x", "21801   x", "21801   x", "21801   x", "21801   x", 
    "21801   x", "21801   x", "21801   x", "21801   x", "21801   x", 
    "21801   x", "21801   x", "21801   x", "21801   x", "21801   x", 
    "21801   x", "21801   x", "21801   x", "21801   x", "21801   x", 
    "21801   x", "21801   x", "21801   x", "21801   x", "21801   x", 
    "21801   x", "21801   x", "21801   x", "21801   x", "21801   x", 
    "21801   x", "21801   x", "21801   x", "21801   x", "21801   x", 
    "21801   x", "21801   x", "21801   x", "21801   x", "21801   x", 
    "21801   x", "21801   x", "21801   x", "21801   x", "21801   x", 
    "21801   x", "21801   x", "21801   x", "21801   x", "21801   x", 
    "21801   x", "21801   x", "21801   x", "21801   x", "21801   x", 
    "21801   x", "21801   x", "21801   x", "21801   x", "21801   x", 
    "21801   x", "21801   x", "21801   x", "21801   x", "21801   x", 
    "21801   x")), row.names = c(NA, -70L), class = c("tbl_df", 
"tbl", "data.frame")), structure(list(p = c(1032400234, 1032400234, 
1032400234, 1032400234, 1032400234), f = c("21801", "69149", 
"69165", "69166", "169101"), x = c(0.3, 0.0154, 0.0307, 0.0154, 
0.041), g = c("21801 69149 69165 69166 169101", "21801 69149 69165 69166 169101", 
"21801 69149 69165 69166 169101", "21801 69149 69165 69166 169101", 
"21801 69149 69165 69166 169101")), row.names = c(NA, -5L), class = c("tbl_df", 
"tbl", "data.frame")), structure(list(p = c(46400699, 46400699, 
46400700, 46400700, 46400701, 46400701, 46400702, 46400702, 46400712, 
46400712, 46400715, 46400715, 46400716, 46400716, 46408142, 46408142, 
183900249, 183900249, 183900251, 183900251, 183900252, 183900252, 
1032400207, 1032400207, 1032400222, 1032400222, 1032400223, 1032400223, 
1070700067, 1070700067, 5248400005, 5248400005, 7117300007, 7117300007, 
7117300009, 7117300009, 8276000005, 8276000005, 8911000022, 8911000022, 
9051100006, 9051100006, 9051100009, 9051100009, 9092400009, 9092400009, 
9251300001, 9251300001, 9251300002, 9251300002, 9251300003, 9251300003, 
9251300005, 9251300005, 9251300006, 9251300006, 9358500001, 9358500001, 
9460200002, 9460200002, 9460200003, 9460200003), f = c("43901", 
"69105", "43901", "69105", "43901", "69105", "43901", "69105", 
"43901", "69105", "43901", "69105", "43901", "69105", "43901", 
"69105", "43901", "69105", "43901", "69105", "43901", "69105", 
"43901", "69105", "43901", "69105", "43901", "69105", "43901", 
"69105", "43901", "69105", "43901", "69105", "43901", "69105", 
"43901", "69105", "43901", "69105", "43901", "69105", "43901", 
"69105", "43901", "69105", "43901", "69105", "43901", "69105", 
"43901", "69105", "43901", "69105", "43901", "69105", "43901", 
"69105", "43901", "69105", "43901", "69105"), x = c(0.14, 0.025, 
0.14, 0.025, 0.425, 0.075, 0.425, 0.075, 0.425, 0.075, 0.425, 
0.075, 0.14, 0.025, 0.14, 0.025, 0.14, 0.025, 0.05, 0.1, 0.425, 
0.075, 0.4, 0.1, 0.05, 0.1, 0.1, 0.15, 0.14, 0.025, 0.14, 0.03, 
0.14, 0.025, 0.425, 0.075, 0.14, 0.025, 0.14, 0.025, 0.14, 0.025, 
0.05, 0.1, 0.05, 0.1, 0.12, 0.03, 0.25, 0.1, 0.048, 0.02, 0.05, 
0.1, 0.14, 0.025, 0.14, 0.025, 0.08, 0.02, 0.05, 0.1), g = c("43901 69105", 
"43901 69105", "43901 69105", "43901 69105", "43901 69105", "43901 69105", 
"43901 69105", "43901 69105", "43901 69105", "43901 69105", "43901 69105", 
"43901 69105", "43901 69105", "43901 69105", "43901 69105", "43901 69105", 
"43901 69105", "43901 69105", "43901 69105", "43901 69105", "43901 69105", 
"43901 69105", "43901 69105", "43901 69105", "43901 69105", "43901 69105", 
"43901 69105", "43901 69105", "43901 69105", "43901 69105", "43901 69105", 
"43901 69105", "43901 69105", "43901 69105", "43901 69105", "43901 69105", 
"43901 69105", "43901 69105", "43901 69105", "43901 69105", "43901 69105", 
"43901 69105", "43901 69105", "43901 69105", "43901 69105", "43901 69105", 
"43901 69105", "43901 69105", "43901 69105", "43901 69105", "43901 69105", 
"43901 69105", "43901 69105", "43901 69105", "43901 69105", "43901 69105", 
"43901 69105", "43901 69105", "43901 69105", "43901 69105", "43901 69105", 
"43901 69105")), row.names = c(NA, -62L), class = c("tbl_df", 
"tbl", "data.frame"))), ptype = structure(list(p = numeric(0), 
    f = character(0), x = numeric(0), g = character(0)), class = c("tbl_df", 
"tbl", "data.frame"), row.names = integer(0)), class = c("vctrs_list_of", 
"vctrs_vctr", "list"))
 

Комментарии:

1. Если вы обернете блок кода как функцию, а затем используете lapply(dfl, yourfun) , это не сработает или просто используйте map_dfr(dfl, yourfun)

2. лапли, за которым следует bind_rows?

Ответ №1:

Просто оберните код как функцию и используйте map_dfr , чтобы получить все выходные данные в виде отдельных данных

 library(dplyr)
library(purrr)
library(tidyr)
library(cluster)
library(factoextra)
dfl <- list(datainput, datainput)
map_dfr(dfl, f1, .id = 'id')
# A tibble: 144 x 6
   id             p f           x g          cluster
   <chr>      <dbl> <chr>   <dbl> <chr>        <int>
 1 1      409100012 107403 0.005  107403   x       1
 2 1      409100012 x      0.995  107403   x       1
 3 1     1032400197 107403 0.05   107403   x       2
 4 1     1032400197 x      0.95   107403   x       2
 5 1     3725600001 107403 0.033  107403   x       2
 6 1     3725600001 x      0.967  107403   x       2
 7 1     4218200011 107403 0.036  107403   x       2
 8 1     4218200011 x      0.964  107403   x       2
 9 1     4873700001 107403 0.0512 107403   x       2
10 1     4873700001 x      0.949  107403   x       2
# … with 134 more rows
 

где

 f1 <- function(dat) { 
  df <- pivot_wider( 
    dat,
    id_cols = "p", 
    names_from = "f", 
    values_from = "x"
  )
  
  rows <- df$p
  
  df <- df %>% select(-p) 
  
  df[is.na(df)] <- 0
  
  row.names(df) <- rows
  
  df <- scale(df)
  
  # compute dissimilarity matrix
  d <- dist(df, method = "euclidean")
  
  # store method names in m
  m <- c( "average", "single", "complete", "ward")
  names(m) <- c( "average", "single", "complete", "ward")
  
  # function to compute coefficients
  ac <- function(x) { 
    agnes(d, method = x)$ac
  }
  
  # choose best method by coefficient, store in method
  coeffs <- map_dbl(m, ac) %>% #
    as_tibble %>%
    mutate(method = m) %>%
    filter(value == max(value))
  
  coeffs <- matrix(data = coeffs)
  
  method = coeffs[2,1]
  
  # Function to compute hierarchical clustering using d and method
  hc <- function(x) {
    agnes(d, method = method)
  }
  
  # compute hierarchical clustering with optimal method
  hc1 <- hc(method)
  
  # determine optimal clusters number by slopes of elbow plot
  elbowplot <- fviz_nbclust(df, FUN = hcut, method = "wss")
  
  elbow <- ggplot_build(elbowplot)
  
  elbow <- elbow$data[[1]] %>%
    as_tibble() 
  
  elbow <- elbow %>%
    mutate(slope = if_else(
      elbow$x == min(elbow$x), elbow$y/elbow$x,
      -(elbow$y-lag(elbow$y)/(elbow$x-lag(elbow$x)))
    ))
  
  elbow <- elbow %>%
    mutate(lastslope = if_else(
      x == 1, slope, lag(elbow$slope)
    )) %>%
    mutate(nextslope = if_else(
      elbow$x == max(elbow$x), elbow$slope, lead(elbow$slope)
    )) %>%
    mutate(slopedelta = as.numeric(lastslope - slope)) %>%
    arrange(-slopedelta) %>%
    slice_head() %>%
    select(x)
  
  clusters <- matrix(data = elbow)
  
  clusters = clusters[1,1]
  
  # Cut dendrogram by clusters, store in sub_grp
  sub_grp <- cutree(hc1, k = clusters)
  
  # store cluster value as column called cluster
  df <- df %>%
    as_tibble()
  
  row.names(df) <- rows
  
  df <- df %>%
    rownames_to_column(var = "p") %>%
    mutate(cluster = sub_grp) %>%
    select(p, cluster) %>%
    mutate(p = as.double(p))
  
  dat <-
    left_join(dat, df)
  
  return(dat)
}