#r #loops #iteration #purrr
Вопрос:
ПРИМЕЧАНИЕ: см. Раздел пропускная способность и сценарий в нижнем разделе
Краткое описание проблемы
У меня есть сценарий , который я написал , который берет тиббл datainput
, преобразует его в тиббл df
, выполняет ряд функций / вычислений, построенных из df
объекта, и в конечном итоге присоединяется к исходному datainput
тибблу, df
чтобы создать новый столбец под названием datainput$cluster
Вот как datainput
это выглядит:
> head(datainput)
# A tibble: 6 x 4
p f x g
<dbl> <chr> <dbl> <chr>
1 409100012 107403 0.005 107403 x
2 409100012 x 0.995 107403 x
3 1032400197 107403 0.05 107403 x
4 1032400197 x 0.95 107403 x
5 3725600001 107403 0.033 107403 x
6 3725600001 x 0.967 107403 x
И вот как datainput
это выглядит, после применения приведенного выше сценария:
> head(datainput)
# A tibble: 6 x 5
p f x g cluster
<dbl> <chr> <dbl> <chr> <int>
1 409100012 107403 0.005 107403 x 1
2 409100012 x 0.995 107403 x 1
3 1032400197 107403 0.05 107403 x 2
4 1032400197 x 0.95 107403 x 2
5 3725600001 107403 0.033 107403 x 2
6 3725600001 x 0.967 107403 x 2
Фактическая проблема , с которой я сталкиваюсь, заключается в том, что мне нужно выяснить, как применить этот сценарий не к одному тибблу в качестве datainput
, а к списку тибблов под названием dfl
. Я перепробовал множество вариаций лапли и т. Д., Но мне не везет.
Я думаю, что моя проблема заключается в том, как я сохраняю свой сценарий как функцию.
Может ли кто-нибудь дать какие-либо указания относительно того, как я могу применить свой скрипт к списку тибблов в dfl
объекте, а затем преобразовать dfl
объект в один тиббл с добавлением нового столбца?
dпутс
datainput
:
structure(list(p = c(409100012, 409100012, 1032400197, 1032400197,
3725600001, 3725600001, 4218200011, 4218200011, 4873700001, 4873700001,
5305300007, 5305300007, 6488100007, 6488100007, 7008700002, 7008700002,
7517400002, 7517400002, 8265300001, 8265300001, 8301900001, 8301900001,
8301900002, 8301900002, 8301900003, 8301900003, 8301900005, 8301900005,
8301900006, 8301900006, 8313500001, 8313500001, 8534800002, 8534800002,
8555600001, 8555600001, 8555600002, 8555600002, 8620000001, 8620000001,
8620000002, 8620000002, 8758300003, 8758300003, 8790700001, 8790700001,
8790700002, 8790700002, 8896500001, 8896500001, 8916000002, 8916000002,
8916000004, 8916000004, 9085600001, 9085600001, 9085600002, 9085600002,
9085600003, 9085600003, 9179900001, 9179900001, 9208200001, 9208200001,
9441800001, 9441800001, 9565600001, 9565600001, 9565600002, 9565600002,
9754300001, 9754300001), f = c("107403", "x", "107403", "x",
"107403", "x", "107403", "x", "107403", "x", "107403", "x", "107403",
"x", "107403", "x", "107403", "x", "107403", "x", "107403", "x",
"107403", "x", "107403", "x", "107403", "x", "107403", "x", "107403",
"x", "107403", "x", "107403", "x", "107403", "x", "107403", "x",
"107403", "x", "107403", "x", "107403", "x", "107403", "x", "107403",
"x", "107403", "x", "107403", "x", "107403", "x", "107403", "x",
"107403", "x", "107403", "x", "107403", "x", "107403", "x", "107403",
"x", "107403", "x", "107403", "x"), x = c(0.005, 0.995, 0.05,
0.95, 0.033, 0.967, 0.036, 0.964, 0.0512, 0.9488, 0.0075, 0.9925,
0.036, 0.964, 0.001, 0.999, 0.05, 0.95, 0.0074, 0.9926, 0.84,
0.16, 0.0075, 0.9925, 0.05, 0.95, 0.05, 0.95, 0.0075, 0.9925,
0.0144, 0.9856, 0.033, 0.967, 0.05, 0.95, 0.0075, 0.9925, 0.0084,
0.9916, 0.036, 0.964, 0.005, 0.995, 0.036, 0.964, 0.05, 0.95,
0.0005, 0.9995, 0.036, 0.964, 0.02, 0.98, 0.036, 0.964, 0.013,
0.987, 0.005, 0.995, 0.036, 0.964, 0.0075, 0.9925, 0.01, 0.99,
0.005, 0.995, 0.05, 0.95, 0.005, 0.995), g = c("107403 x",
"107403 x", "107403 x", "107403 x", "107403 x", "107403 x",
"107403 x", "107403 x", "107403 x", "107403 x", "107403 x",
"107403 x", "107403 x", "107403 x", "107403 x", "107403 x",
"107403 x", "107403 x", "107403 x", "107403 x", "107403 x",
"107403 x", "107403 x", "107403 x", "107403 x", "107403 x",
"107403 x", "107403 x", "107403 x", "107403 x", "107403 x",
"107403 x", "107403 x", "107403 x", "107403 x", "107403 x",
"107403 x", "107403 x", "107403 x", "107403 x", "107403 x",
"107403 x", "107403 x", "107403 x", "107403 x", "107403 x",
"107403 x", "107403 x", "107403 x", "107403 x", "107403 x",
"107403 x", "107403 x", "107403 x", "107403 x", "107403 x",
"107403 x", "107403 x", "107403 x", "107403 x", "107403 x",
"107403 x", "107403 x", "107403 x", "107403 x", "107403 x",
"107403 x", "107403 x", "107403 x", "107403 x", "107403 x",
"107403 x")), row.names = c(NA, -72L), class = c("tbl_df",
"tbl", "data.frame"))
script
:
# transform data
df <-
pivot_wider(
datainput,
id_cols = "p",
names_from = "f",
values_from = "x"
)
rows <- df$p
df <- df %>% select(-p)
df[is.na(df)] <- 0
row.names(df) <- rows
df <- scale(df)
# compute dissimilarity matrix
d <- dist(df, method = "euclidean")
# store method names in m
m <- c( "average", "single", "complete", "ward")
names(m) <- c( "average", "single", "complete", "ward")
# function to compute coefficients
ac <- function(x) {
agnes(d, method = x)$ac
}
# choose best method by coefficient, store in method
coeffs <- map_dbl(m, ac) %>% #
as_tibble %>%
mutate(method = m) %>%
filter(value == max(value))
coeffs <- matrix(data = coeffs)
method = coeffs[2,1]
# Function to compute hierarchical clustering using d and method
hc <- function(x) {
agnes(d, method = method)
}
# compute hierarchical clustering with optimal method
hc1 <- hc(method)
# determine optimal clusters number by slopes of elbow plot
elbowplot <- fviz_nbclust(df, FUN = hcut, method = "wss")
elbow <- ggplot_build(elbowplot)
elbow <- elbow$data[[1]] %>%
as_tibble()
elbow <- elbow %>%
mutate(slope = if_else(
elbow$x == min(elbow$x), elbow$y/elbow$x,
-(elbow$y-lag(elbow$y)/(elbow$x-lag(elbow$x)))
))
elbow <- elbow %>%
mutate(lastslope = if_else(
x == 1, slope, lag(elbow$slope)
)) %>%
mutate(nextslope = if_else(
elbow$x == max(elbow$x), elbow$slope, lead(elbow$slope)
)) %>%
mutate(slopedelta = as.numeric(lastslope - slope)) %>%
arrange(-slopedelta) %>%
slice_head() %>%
select(x)
clusters <- matrix(data = elbow)
clusters = clusters[1,1]
# Cut dendrogram by clusters, store in sub_grp
sub_grp <- cutree(hc1, k = clusters)
# store cluster value as column called cluster
df <- df %>%
as_tibble()
row.names(df) <- rows
df <- df %>%
rownames_to_column(var = "p") %>%
mutate(cluster = sub_grp) %>%
select(p, cluster) %>%
mutate(p = as.double(p))
datainput <-
left_join(datainput, df)
# rm unneccesary things
rm(clusters, coeffs, elbow, elbowplot, hc1, method, d, m, rows, sub_grp, ac, hc, df)
dfl
:
structure(list(structure(list(p = c(409100012, 409100012, 1032400197,
1032400197, 3725600001, 3725600001, 4218200011, 4218200011, 4873700001,
4873700001, 5305300007, 5305300007, 6488100007, 6488100007, 7008700002,
7008700002, 7517400002, 7517400002, 8265300001, 8265300001, 8301900001,
8301900001, 8301900002, 8301900002, 8301900003, 8301900003, 8301900005,
8301900005, 8301900006, 8301900006, 8313500001, 8313500001, 8534800002,
8534800002, 8555600001, 8555600001, 8555600002, 8555600002, 8620000001,
8620000001, 8620000002, 8620000002, 8758300003, 8758300003, 8790700001,
8790700001, 8790700002, 8790700002, 8896500001, 8896500001, 8916000002,
8916000002, 8916000004, 8916000004, 9085600001, 9085600001, 9085600002,
9085600002, 9085600003, 9085600003, 9179900001, 9179900001, 9208200001,
9208200001, 9441800001, 9441800001, 9565600001, 9565600001, 9565600002,
9565600002, 9754300001, 9754300001), f = c("107403", "x", "107403",
"x", "107403", "x", "107403", "x", "107403", "x", "107403", "x",
"107403", "x", "107403", "x", "107403", "x", "107403", "x", "107403",
"x", "107403", "x", "107403", "x", "107403", "x", "107403", "x",
"107403", "x", "107403", "x", "107403", "x", "107403", "x", "107403",
"x", "107403", "x", "107403", "x", "107403", "x", "107403", "x",
"107403", "x", "107403", "x", "107403", "x", "107403", "x", "107403",
"x", "107403", "x", "107403", "x", "107403", "x", "107403", "x",
"107403", "x", "107403", "x", "107403", "x"), x = c(0.005, 0.995,
0.05, 0.95, 0.033, 0.967, 0.036, 0.964, 0.0512, 0.9488, 0.0075,
0.9925, 0.036, 0.964, 0.001, 0.999, 0.05, 0.95, 0.0074, 0.9926,
0.84, 0.16, 0.0075, 0.9925, 0.05, 0.95, 0.05, 0.95, 0.0075, 0.9925,
0.0144, 0.9856, 0.033, 0.967, 0.05, 0.95, 0.0075, 0.9925, 0.0084,
0.9916, 0.036, 0.964, 0.005, 0.995, 0.036, 0.964, 0.05, 0.95,
0.0005, 0.9995, 0.036, 0.964, 0.02, 0.98, 0.036, 0.964, 0.013,
0.987, 0.005, 0.995, 0.036, 0.964, 0.0075, 0.9925, 0.01, 0.99,
0.005, 0.995, 0.05, 0.95, 0.005, 0.995), g = c("107403 x",
"107403 x", "107403 x", "107403 x", "107403 x", "107403 x",
"107403 x", "107403 x", "107403 x", "107403 x", "107403 x",
"107403 x", "107403 x", "107403 x", "107403 x", "107403 x",
"107403 x", "107403 x", "107403 x", "107403 x", "107403 x",
"107403 x", "107403 x", "107403 x", "107403 x", "107403 x",
"107403 x", "107403 x", "107403 x", "107403 x", "107403 x",
"107403 x", "107403 x", "107403 x", "107403 x", "107403 x",
"107403 x", "107403 x", "107403 x", "107403 x", "107403 x",
"107403 x", "107403 x", "107403 x", "107403 x", "107403 x",
"107403 x", "107403 x", "107403 x", "107403 x", "107403 x",
"107403 x", "107403 x", "107403 x", "107403 x", "107403 x",
"107403 x", "107403 x", "107403 x", "107403 x", "107403 x",
"107403 x", "107403 x", "107403 x", "107403 x", "107403 x",
"107403 x", "107403 x", "107403 x", "107403 x", "107403 x",
"107403 x")), row.names = c(NA, -72L), class = c("tbl_df",
"tbl", "data.frame")), structure(list(p = c(50700005, 50700005,
145900103, 145900103, 183900065, 183900065, 214400008, 214400008,
546400001, 546400001, 683600191, 683600191, 1032400049, 1032400049,
7295600001, 7295600001), f = c("128928", "x", "128928", "x",
"128928", "x", "128928", "x", "128928", "x", "128928", "x", "128928",
"x", "128928", "x"), x = c(0.4, 0.6, 0.0285, 0.9715, 0.5, 0.5,
0.1, 0.9, 0.129, 0.871, 0.5, 0.5, 0.5, 0.5, 0.000175, 0.999825
), g = c("128928 x", "128928 x", "128928 x", "128928 x",
"128928 x", "128928 x", "128928 x", "128928 x", "128928 x",
"128928 x", "128928 x", "128928 x", "128928 x", "128928 x",
"128928 x", "128928 x")), row.names = c(NA, -16L), class = c("tbl_df",
"tbl", "data.frame")), structure(list(p = c(125801401, 125801401,
144800345, 144800345, 170600168, 170600168, 170600181, 170600181,
170600217, 170600217, 170600235, 170600235, 221400012, 221400012,
221400013, 221400013, 221400014, 221400014, 221400015, 221400015,
337700025, 337700025, 337700028, 337700028, 337700029, 337700029,
337700032, 337700032, 337700034, 337700034, 337700053, 337700053,
337700054, 337700054, 337700073, 337700073, 337700075, 337700075,
337700076, 337700076, 337700077, 337700077, 343200058, 343200058,
343200090, 343200090, 352500127, 352500127, 387600158, 387600158,
387600159, 387600159, 518500447, 518500447, 518500448, 518500448,
518500449, 518500449, 518500450, 518500450, 518500451, 518500451,
518500466, 518500466, 518500467, 518500467, 573600090, 573600090,
573600094, 573600094, 578500066, 578500066, 578500067, 578500067,
578500076, 578500076, 578500078, 578500078, 578500079, 578500079,
578500080, 578500080, 578500081, 578500081, 736400030, 736400030,
736400104, 736400104, 736400106, 736400106, 736400107, 736400107,
761600065, 761600065, 862200045, 862200045, 862200049, 862200049,
862200051, 862200051, 862200057, 862200057, 862200066, 862200066,
862200067, 862200067, 862200078, 862200078, 862200089, 862200089,
862200091, 862200091, 895900052, 895900052, 1032400095, 1032400095,
1530000026, 1530000026, 4126000041, 4126000041, 4154700013, 4154700013,
4229100003, 4229100003, 4530900043, 4530900043, 4533700006, 4533700006,
4533700007, 4533700007, 4533700008, 4533700008, 4533700009, 4533700009,
4533700010, 4533700010, 4533700011, 4533700011, 4533700014, 4533700014,
4533700015, 4533700015, 4533700016, 4533700016, 4604300027, 4604300027,
4604300028, 4604300028, 4604300029, 4604300029, 5499800009, 5499800009,
5861600003, 5861600003, 5861600005, 5861600005, 5861600006, 5861600006,
6248100001, 6248100001, 6383800026, 6383800026, 6947000031, 6947000031,
6968100036, 6968100036, 6968100042, 6968100042, 7170400001, 7170400001,
7177000005, 7177000005, 7357800001, 7357800001, 7465500019, 7465500019,
7465500029, 7465500029, 8345100017, 8345100017, 8345100018, 8345100018,
8345100019, 8345100019, 8871400003, 8871400003, 8911000035, 8911000035,
9005200001, 9005200001), f = c("13907", "x", "13907", "x", "13907",
"x", "13907", "x", "13907", "x", "13907", "x", "13907", "x",
"13907", "x", "13907", "x", "13907", "x", "13907", "x", "13907",
"x", "13907", "x", "13907", "x", "13907", "x", "13907", "x",
"13907", "x", "13907", "x", "13907", "x", "13907", "x", "13907",
"x", "13907", "x", "13907", "x", "13907", "x", "13907", "x",
"13907", "x", "13907", "x", "13907", "x", "13907", "x", "13907",
"x", "13907", "x", "13907", "x", "13907", "x", "13907", "x",
"13907", "x", "13907", "x", "13907", "x", "13907", "x", "13907",
"x", "13907", "x", "13907", "x", "13907", "x", "13907", "x",
"13907", "x", "13907", "x", "13907", "x", "13907", "x", "13907",
"x", "13907", "x", "13907", "x", "13907", "x", "13907", "x",
"13907", "x", "13907", "x", "13907", "x", "13907", "x", "13907",
"x", "13907", "x", "13907", "x", "13907", "x", "13907", "x",
"13907", "x", "13907", "x", "13907", "x", "13907", "x", "13907",
"x", "13907", "x", "13907", "x", "13907", "x", "13907", "x",
"13907", "x", "13907", "x", "13907", "x", "13907", "x", "13907",
"x", "13907", "x", "13907", "x", "13907", "x", "13907", "x",
"13907", "x", "13907", "x", "13907", "x", "13907", "x", "13907",
"x", "13907", "x", "13907", "x", "13907", "x", "13907", "x",
"13907", "x", "13907", "x", "13907", "x", "13907", "x", "13907",
"x", "13907", "x", "13907", "x"), x = c(0.98, 0.02, 0.4, 0.6,
0.428, 0.572, 0.428, 0.572, 0.4, 0.6, 0.3, 0.7, 0.453, 0.547,
0.4, 0.6, 0.38, 0.62, 0.43, 0.57, 0.4, 0.6, 0.45, 0.55, 0.45,
0.55, 0.4, 0.6, 0.98, 0.02, 0.98, 0.02, 0.4, 0.6, 0.43, 0.57,
0.1, 0.9, 0.5, 0.5, 0.98, 0.02, 0.35, 0.65, 0.99, 0.01, 0.3218,
0.6782, 0.4, 0.6, 0.4, 0.6, 0.97, 0.03, 0.97, 0.03, 0.46, 0.54,
0.46, 0.54, 0.4, 0.6, 0.38, 0.62, 0.43, 0.57, 0.026, 0.974, 0.017,
0.983, 0.46, 0.54, 0.46, 0.54, 0.38, 0.62, 0.97, 0.03, 0.428,
0.572, 0.15, 0.85, 0.4, 0.6, 0.3218, 0.6782, 0.98, 0.02, 0.98,
0.02, 0.98, 0.02, 0.038, 0.962, 0.99, 0.01, 0.4, 0.6, 0.99, 0.01,
0.99, 0.01, 0.45, 0.55, 0.43, 0.57, 0.99, 0.01, 0.46, 0.54, 0.45,
0.55, 0.98, 0.02, 0.98, 0.02, 0.4, 0.6, 0.312, 0.688, 0.99, 0.01,
0.3218, 0.6782, 0.35, 0.65, 0.223, 0.777, 0.208, 0.792, 0.888,
0.112, 0.485, 0.515, 0.104, 0.896, 0.414, 0.586, 0.676, 0.324,
0.333, 0.667, 0.6899, 0.3101, 0.99, 0.01, 0.99, 0.01, 0.4, 0.6,
0.35, 0.65, 0.223, 0.777, 0.468, 0.532, 0.149, 0.851, 0.99, 0.01,
0.4, 0.6, 0.99, 0.01, 0.99, 0.01, 0.4, 0.6, 0.4, 0.6, 0.771,
0.229, 0.99, 0.01, 0.4, 0.6, 0.4, 0.6, 0.4, 0.6, 0.43, 0.57,
0.46, 0.54, 0.4, 0.6, 0.4, 0.6, 0.99, 0.01), g = c("13907 x",
"13907 x", "13907 x", "13907 x", "13907 x", "13907 x",
"13907 x", "13907 x", "13907 x", "13907 x", "13907 x",
"13907 x", "13907 x", "13907 x", "13907 x", "13907 x",
"13907 x", "13907 x", "13907 x", "13907 x", "13907 x",
"13907 x", "13907 x", "13907 x", "13907 x", "13907 x",
"13907 x", "13907 x", "13907 x", "13907 x", "13907 x",
"13907 x", "13907 x", "13907 x", "13907 x", "13907 x",
"13907 x", "13907 x", "13907 x", "13907 x", "13907 x",
"13907 x", "13907 x", "13907 x", "13907 x", "13907 x",
"13907 x", "13907 x", "13907 x", "13907 x", "13907 x",
"13907 x", "13907 x", "13907 x", "13907 x", "13907 x",
"13907 x", "13907 x", "13907 x", "13907 x", "13907 x",
"13907 x", "13907 x", "13907 x", "13907 x", "13907 x",
"13907 x", "13907 x", "13907 x", "13907 x", "13907 x",
"13907 x", "13907 x", "13907 x", "13907 x", "13907 x",
"13907 x", "13907 x", "13907 x", "13907 x", "13907 x",
"13907 x", "13907 x", "13907 x", "13907 x", "13907 x",
"13907 x", "13907 x", "13907 x", "13907 x", "13907 x",
"13907 x", "13907 x", "13907 x", "13907 x", "13907 x",
"13907 x", "13907 x", "13907 x", "13907 x", "13907 x",
"13907 x", "13907 x", "13907 x", "13907 x", "13907 x",
"13907 x", "13907 x", "13907 x", "13907 x", "13907 x",
"13907 x", "13907 x", "13907 x", "13907 x", "13907 x",
"13907 x", "13907 x", "13907 x", "13907 x", "13907 x",
"13907 x", "13907 x", "13907 x", "13907 x", "13907 x",
"13907 x", "13907 x", "13907 x", "13907 x", "13907 x",
"13907 x", "13907 x", "13907 x", "13907 x", "13907 x",
"13907 x", "13907 x", "13907 x", "13907 x", "13907 x",
"13907 x", "13907 x", "13907 x", "13907 x", "13907 x",
"13907 x", "13907 x", "13907 x", "13907 x", "13907 x",
"13907 x", "13907 x", "13907 x", "13907 x", "13907 x",
"13907 x", "13907 x", "13907 x", "13907 x", "13907 x",
"13907 x", "13907 x", "13907 x", "13907 x", "13907 x",
"13907 x", "13907 x", "13907 x", "13907 x", "13907 x",
"13907 x", "13907 x", "13907 x", "13907 x", "13907 x",
"13907 x", "13907 x", "13907 x", "13907 x", "13907 x",
"13907 x", "13907 x", "13907 x", "13907 x", "13907 x",
"13907 x", "13907 x", "13907 x", "13907 x")), row.names = c(NA,
-190L), class = c("tbl_df", "tbl", "data.frame")), structure(list(
p = c(67500055, 67500055, 77700108, 77700108, 77700133, 77700133,
77700135, 77700135, 77700137, 77700137, 77700139, 77700139,
104300134, 104300134, 357300053, 357300053, 357300054, 357300054,
357300067, 357300067, 357300070, 357300070, 357300072, 357300072,
357300078, 357300078, 357300079, 357300079, 357300093, 357300093,
574100025, 574100025, 581300127, 581300127, 990200002, 990200002,
1032400220, 1032400220, 3481000035, 3481000035, 3481000036,
3481000036, 3481000037, 3481000037, 5075700005, 5075700005,
6424000064, 6424000064, 6677700001, 6677700001, 6749600001,
6749600001, 6761900044, 6761900044, 7027100032, 7027100032,
7527700002, 7527700002, 8185700001, 8185700001, 9145200001,
9145200001, 9145200005, 9145200005, 9145200006, 9145200006,
9270800001, 9270800001, 9533700001, 9533700001), f = c("21801",
"x", "21801", "x", "21801", "x", "21801", "x", "21801", "x",
"21801", "x", "21801", "x", "21801", "x", "21801", "x", "21801",
"x", "21801", "x", "21801", "x", "21801", "x", "21801", "x",
"21801", "x", "21801", "x", "21801", "x", "21801", "x", "21801",
"x", "21801", "x", "21801", "x", "21801", "x", "21801", "x",
"21801", "x", "21801", "x", "21801", "x", "21801", "x", "21801",
"x", "21801", "x", "21801", "x", "21801", "x", "21801", "x",
"21801", "x", "21801", "x", "21801", "x"), x = c(0.025, 0.975,
0.035, 0.965, 0.0263, 0.9737, 0.025, 0.975, 0.0263, 0.9737,
0.0278, 0.9722, 0.37, 0.63, 0.045, 0.955, 0.06, 0.94, 0.015,
0.985, 0.018, 0.982, 0.045, 0.955, 0.06, 0.94, 0.045, 0.955,
0.06, 0.94, 0.08, 0.92, 0.00667, 0.99333, 0.25, 0.75, 0.06,
0.94, 0.006, 0.994, 0.006, 0.994, 0.006, 0.994, 0.06, 0.94,
0.137, 0.863, 0.94, 0.0600000000000001, 0.0625, 0.9375, 0.003,
0.997, 0.06, 0.94, 0.05, 0.95, 0.045, 0.955, 0.25, 0.75,
0.002, 0.998, 0.009, 0.991, 0.0066, 0.9934, 0.015, 0.985),
g = c("21801 x", "21801 x", "21801 x", "21801 x",
"21801 x", "21801 x", "21801 x", "21801 x", "21801 x",
"21801 x", "21801 x", "21801 x", "21801 x", "21801 x",
"21801 x", "21801 x", "21801 x", "21801 x", "21801 x",
"21801 x", "21801 x", "21801 x", "21801 x", "21801 x",
"21801 x", "21801 x", "21801 x", "21801 x", "21801 x",
"21801 x", "21801 x", "21801 x", "21801 x", "21801 x",
"21801 x", "21801 x", "21801 x", "21801 x", "21801 x",
"21801 x", "21801 x", "21801 x", "21801 x", "21801 x",
"21801 x", "21801 x", "21801 x", "21801 x", "21801 x",
"21801 x", "21801 x", "21801 x", "21801 x", "21801 x",
"21801 x", "21801 x", "21801 x", "21801 x", "21801 x",
"21801 x", "21801 x", "21801 x", "21801 x", "21801 x",
"21801 x", "21801 x", "21801 x", "21801 x", "21801 x",
"21801 x")), row.names = c(NA, -70L), class = c("tbl_df",
"tbl", "data.frame")), structure(list(p = c(1032400234, 1032400234,
1032400234, 1032400234, 1032400234), f = c("21801", "69149",
"69165", "69166", "169101"), x = c(0.3, 0.0154, 0.0307, 0.0154,
0.041), g = c("21801 69149 69165 69166 169101", "21801 69149 69165 69166 169101",
"21801 69149 69165 69166 169101", "21801 69149 69165 69166 169101",
"21801 69149 69165 69166 169101")), row.names = c(NA, -5L), class = c("tbl_df",
"tbl", "data.frame")), structure(list(p = c(46400699, 46400699,
46400700, 46400700, 46400701, 46400701, 46400702, 46400702, 46400712,
46400712, 46400715, 46400715, 46400716, 46400716, 46408142, 46408142,
183900249, 183900249, 183900251, 183900251, 183900252, 183900252,
1032400207, 1032400207, 1032400222, 1032400222, 1032400223, 1032400223,
1070700067, 1070700067, 5248400005, 5248400005, 7117300007, 7117300007,
7117300009, 7117300009, 8276000005, 8276000005, 8911000022, 8911000022,
9051100006, 9051100006, 9051100009, 9051100009, 9092400009, 9092400009,
9251300001, 9251300001, 9251300002, 9251300002, 9251300003, 9251300003,
9251300005, 9251300005, 9251300006, 9251300006, 9358500001, 9358500001,
9460200002, 9460200002, 9460200003, 9460200003), f = c("43901",
"69105", "43901", "69105", "43901", "69105", "43901", "69105",
"43901", "69105", "43901", "69105", "43901", "69105", "43901",
"69105", "43901", "69105", "43901", "69105", "43901", "69105",
"43901", "69105", "43901", "69105", "43901", "69105", "43901",
"69105", "43901", "69105", "43901", "69105", "43901", "69105",
"43901", "69105", "43901", "69105", "43901", "69105", "43901",
"69105", "43901", "69105", "43901", "69105", "43901", "69105",
"43901", "69105", "43901", "69105", "43901", "69105", "43901",
"69105", "43901", "69105", "43901", "69105"), x = c(0.14, 0.025,
0.14, 0.025, 0.425, 0.075, 0.425, 0.075, 0.425, 0.075, 0.425,
0.075, 0.14, 0.025, 0.14, 0.025, 0.14, 0.025, 0.05, 0.1, 0.425,
0.075, 0.4, 0.1, 0.05, 0.1, 0.1, 0.15, 0.14, 0.025, 0.14, 0.03,
0.14, 0.025, 0.425, 0.075, 0.14, 0.025, 0.14, 0.025, 0.14, 0.025,
0.05, 0.1, 0.05, 0.1, 0.12, 0.03, 0.25, 0.1, 0.048, 0.02, 0.05,
0.1, 0.14, 0.025, 0.14, 0.025, 0.08, 0.02, 0.05, 0.1), g = c("43901 69105",
"43901 69105", "43901 69105", "43901 69105", "43901 69105", "43901 69105",
"43901 69105", "43901 69105", "43901 69105", "43901 69105", "43901 69105",
"43901 69105", "43901 69105", "43901 69105", "43901 69105", "43901 69105",
"43901 69105", "43901 69105", "43901 69105", "43901 69105", "43901 69105",
"43901 69105", "43901 69105", "43901 69105", "43901 69105", "43901 69105",
"43901 69105", "43901 69105", "43901 69105", "43901 69105", "43901 69105",
"43901 69105", "43901 69105", "43901 69105", "43901 69105", "43901 69105",
"43901 69105", "43901 69105", "43901 69105", "43901 69105", "43901 69105",
"43901 69105", "43901 69105", "43901 69105", "43901 69105", "43901 69105",
"43901 69105", "43901 69105", "43901 69105", "43901 69105", "43901 69105",
"43901 69105", "43901 69105", "43901 69105", "43901 69105", "43901 69105",
"43901 69105", "43901 69105", "43901 69105", "43901 69105", "43901 69105",
"43901 69105")), row.names = c(NA, -62L), class = c("tbl_df",
"tbl", "data.frame"))), ptype = structure(list(p = numeric(0),
f = character(0), x = numeric(0), g = character(0)), class = c("tbl_df",
"tbl", "data.frame"), row.names = integer(0)), class = c("vctrs_list_of",
"vctrs_vctr", "list"))
Комментарии:
1. Если вы обернете блок кода как функцию, а затем используете
lapply(dfl, yourfun)
, это не сработает или просто используйтеmap_dfr(dfl, yourfun)
2. лапли, за которым следует bind_rows?
Ответ №1:
Просто оберните код как функцию и используйте map_dfr
, чтобы получить все выходные данные в виде отдельных данных
library(dplyr)
library(purrr)
library(tidyr)
library(cluster)
library(factoextra)
dfl <- list(datainput, datainput)
map_dfr(dfl, f1, .id = 'id')
# A tibble: 144 x 6
id p f x g cluster
<chr> <dbl> <chr> <dbl> <chr> <int>
1 1 409100012 107403 0.005 107403 x 1
2 1 409100012 x 0.995 107403 x 1
3 1 1032400197 107403 0.05 107403 x 2
4 1 1032400197 x 0.95 107403 x 2
5 1 3725600001 107403 0.033 107403 x 2
6 1 3725600001 x 0.967 107403 x 2
7 1 4218200011 107403 0.036 107403 x 2
8 1 4218200011 x 0.964 107403 x 2
9 1 4873700001 107403 0.0512 107403 x 2
10 1 4873700001 x 0.949 107403 x 2
# … with 134 more rows
где
f1 <- function(dat) {
df <- pivot_wider(
dat,
id_cols = "p",
names_from = "f",
values_from = "x"
)
rows <- df$p
df <- df %>% select(-p)
df[is.na(df)] <- 0
row.names(df) <- rows
df <- scale(df)
# compute dissimilarity matrix
d <- dist(df, method = "euclidean")
# store method names in m
m <- c( "average", "single", "complete", "ward")
names(m) <- c( "average", "single", "complete", "ward")
# function to compute coefficients
ac <- function(x) {
agnes(d, method = x)$ac
}
# choose best method by coefficient, store in method
coeffs <- map_dbl(m, ac) %>% #
as_tibble %>%
mutate(method = m) %>%
filter(value == max(value))
coeffs <- matrix(data = coeffs)
method = coeffs[2,1]
# Function to compute hierarchical clustering using d and method
hc <- function(x) {
agnes(d, method = method)
}
# compute hierarchical clustering with optimal method
hc1 <- hc(method)
# determine optimal clusters number by slopes of elbow plot
elbowplot <- fviz_nbclust(df, FUN = hcut, method = "wss")
elbow <- ggplot_build(elbowplot)
elbow <- elbow$data[[1]] %>%
as_tibble()
elbow <- elbow %>%
mutate(slope = if_else(
elbow$x == min(elbow$x), elbow$y/elbow$x,
-(elbow$y-lag(elbow$y)/(elbow$x-lag(elbow$x)))
))
elbow <- elbow %>%
mutate(lastslope = if_else(
x == 1, slope, lag(elbow$slope)
)) %>%
mutate(nextslope = if_else(
elbow$x == max(elbow$x), elbow$slope, lead(elbow$slope)
)) %>%
mutate(slopedelta = as.numeric(lastslope - slope)) %>%
arrange(-slopedelta) %>%
slice_head() %>%
select(x)
clusters <- matrix(data = elbow)
clusters = clusters[1,1]
# Cut dendrogram by clusters, store in sub_grp
sub_grp <- cutree(hc1, k = clusters)
# store cluster value as column called cluster
df <- df %>%
as_tibble()
row.names(df) <- rows
df <- df %>%
rownames_to_column(var = "p") %>%
mutate(cluster = sub_grp) %>%
select(p, cluster) %>%
mutate(p = as.double(p))
dat <-
left_join(dat, df)
return(dat)
}