#r #cluster-analysis #distance
#r #кластерный анализ #расстояние
Вопрос:
У меня есть набор данных координат по кластерам, и я хочу иметь возможность вычислять оптимальный путь внутри каждого кластера. Меня не волнует порядок кластеров каким-либо образом (так что это не то же самое, что проблема коммивояжера с несколькими продавцами), меня интересует только путь внутри каждого кластера, являющийся наиболее эффективным путем. Вот некоторые фиктивные данные, с которыми я работаю:
text <- ("block_id y_cent x_cent
1 0 86.32583 21.23006
2 0 86.34592 21.26965
3 0 86.32777 21.28417
4 0 86.35122 21.25605
5 0 86.32938 21.22385
6 1 86.20016 21.21201
7 1 86.20832 21.19509
8 1 86.30536 21.22023
9 1 86.28319 21.24836
10 1 86.27600 21.20099
11 2 86.30879 21.09177
12 2 86.26537 21.12278
13 2 86.27436 21.10759
14 2 86.29274 21.12408
15 2 86.28498 21.08070
16 3 86.20485 21.16394
17 3 86.24649 21.12542
18 3 86.20451 21.18954
19 3 86.24388 21.17954
20 3 86.28371 21.18983
21 4 86.26007 21.18481
22 4 86.26417 21.15848
23 4 86.24346 21.18187
24 4 86.23786 21.14750
25 4 86.24028 21.17758
26 5 85.83458 21.09636
27 5 85.88187 21.19782
28 5 86.25454 21.10205
29 5 86.21222 21.18343
30 5 86.22216 21.15054
31 6 85.65182 21.25936
32 6 85.59950 21.10918
33 6 85.32598 21.08253
34 6 85.82243 21.08069
35 6 85.55521 21.18281
36 7 85.78904 21.25499
37 7 85.74958 21.26402
38 7 85.91549 21.27784
39 7 85.87116 21.21600
40 7 85.81141 21.29736
41 8 85.69824 21.27207
42 8 85.79566 21.30878
43 8 85.82959 21.31363
44 8 85.77804 21.34154
45 8 85.76139 21.29419
46 9 85.67412 21.29211
47 9 85.78540 21.34394
48 9 85.72229 21.34350
49 9 85.75251 21.35174
50 9 85.72273 21.33228
51 10 85.69474 21.39794
52 10 85.63031 21.38825
53 10 85.59537 21.39553
54 10 85.65995 21.34239
55 10 85.68217 21.33520
56 11 85.38762 21.40342
57 11 85.36199 21.47532
58 11 85.37264 21.45773
59 11 85.41091 21.37915
60 11 85.44151 21.45487
61 12 85.48393 21.66811
62 12 85.58078 21.66112
63 12 85.54180 21.68870
64 12 85.62086 21.67811
65 12 85.59801 21.69405
66 13 85.62965 21.74519
67 13 85.59164 21.71804
68 13 85.59901 21.73280
69 13 85.60186 21.73106
70 13 85.58615 21.72894
71 14 85.42223 21.74209
72 14 85.43497 21.73899
73 14 85.40001 21.78543
74 14 85.49442 21.75747
75 14 85.56546 21.73744
76 15 85.38635 21.63870
77 15 85.43960 21.65576
78 15 85.35741 21.73981
79 15 85.31951 21.68389
80 15 85.31521 21.69784
81 16 85.49374 21.64992
82 16 85.65722 21.63587
83 16 85.72079 21.64818
84 16 85.63256 21.63490
85 16 85.65646 21.62013
86 17 85.50028 21.52405
87 17 85.41230 21.61336
88 17 85.66391 21.53833
89 17 85.44119 21.58837
90 17 85.57245 21.54461
91 18 85.40519 21.56703
92 18 85.42931 21.62131
93 18 85.32510 21.63665
94 18 85.36225 21.51129
95 18 85.37187 21.51874
96 19 85.75957 21.50509
97 19 86.01517 21.55289
98 19 85.86172 21.54278
99 19 85.80931 21.62484
100 19 85.72187 21.60939
101 20 85.72597 21.87745
102 20 85.61561 21.85956
103 20 85.63301 21.85524
104 20 85.57621 21.90503
105 20 85.59059 21.89072
106 21 85.77667 21.85104
107 21 85.76700 21.89142
108 21 85.69538 21.83426
109 21 85.72023 21.82641
110 21 85.63628 21.80932
111 22 85.68025 21.82371
112 22 85.65279 21.80758
113 22 85.68691 21.81113
114 22 85.62421 21.76988
115 22 85.63872 21.76241
116 23 85.60844 21.83434
117 23 85.59517 21.77207
118 23 85.55184 21.77496
119 23 85.56366 21.78844
120 23 85.59948 21.80092
121 24 85.56240 21.83107
122 24 85.56282 21.83471
123 24 85.39359 21.83519
124 24 85.55357 21.80139
125 24 85.51735 21.77419
126 25 85.58370 21.85120
127 25 85.61084 21.87096
128 25 85.52559 21.85486
129 25 85.53922 21.91427
130 25 85.45412 21.88206
131 26 85.67089 21.93965
132 26 85.70483 21.93289
133 26 85.57407 21.95770
134 26 85.68766 21.91724
135 26 85.61921 21.94312
136 27 85.66283 21.97410
137 27 85.63839 21.99442
138 27 85.71954 21.96943
139 27 85.68017 21.99348
140 27 85.63894 21.98225
141 28 84.97737 21.93042
142 28 84.63300 21.98902
143 28 84.88830 21.88623
144 28 84.92210 21.90147
145 28 84.92228 21.90512
146 29 84.99754 21.96503
147 29 85.10119 21.97525
148 29 85.01123 22.00321
149 29 85.06754 22.00531
150 29 84.97547 21.92426
151 30 85.18716 21.86380
152 30 85.17287 21.97213
153 30 85.25031 21.84228
154 30 85.17082 21.88729
155 30 84.95938 21.88108
156 31 85.16441 21.75878
157 31 85.04450 21.76113
158 31 85.05902 21.82381
159 31 85.01908 21.73900
160 31 85.00035 21.78922
161 32 84.97914 21.84518
162 32 85.02973 21.81753
163 32 84.90811 21.84523
164 32 85.06993 21.86186
165 32 85.00793 21.83738
166 33 85.31744 22.08676
167 33 85.36343 22.05680
168 33 85.38304 22.10104
169 33 85.38918 22.10781
170 33 85.36145 22.05842
171 34 85.65591 22.01718
172 34 85.59756 22.04280
173 34 85.59563 21.99338
174 34 85.61070 22.04790
175 34 85.59399 22.07905
176 35 85.55172 22.02003
177 35 85.52242 22.06459
178 35 85.37774 22.04271
179 35 85.47693 22.07631
180 35 85.54970 21.99843
181 36 85.58273 21.99266
182 36 85.56933 21.96981
183 36 85.54938 21.95204
184 36 85.54460 21.97576
185 36 85.50312 21.98531
186 37 85.42173 21.98364
187 37 85.36873 21.99288
188 37 85.43321 21.97371
189 37 85.42685 21.94900
190 37 85.40407 22.00062
191 38 85.31743 21.96311
192 38 85.42195 21.92883
193 38 85.30920 21.98116
194 38 85.23696 21.92306
195 38 85.26164 21.92340
196 39 84.79890 22.28108
197 39 84.73370 22.27024
198 39 84.65563 22.05850
199 39 84.66156 22.14506
200 39 84.70807 22.17602
201 40 84.84906 22.27504
202 40 85.17966 22.13878
203 40 84.84859 22.06098
204 40 84.79058 22.10862
205 40 84.90738 22.07956
206 41 84.68856 22.28907
207 41 84.82779 22.30070
208 41 84.78039 22.30228
209 41 84.71282 22.29906
210 41 84.76225 22.33086
211 42 84.55853 22.37071
212 42 84.51923 22.32529
213 42 84.62699 22.36851
214 42 84.66651 22.37086
215 42 84.49291 22.34431
216 43 84.68952 22.37099
217 43 84.72575 22.35902
218 43 84.70820 22.35887
219 43 84.64178 22.37120
220 43 84.67436 22.36713
221 44 84.69428 22.33692
222 44 84.73014 22.31905
223 44 84.67779 22.23046
224 44 84.60429 22.29762
225 44 84.65155 22.25575
226 45 84.40603 22.23597
227 45 84.49529 22.24281
228 45 84.58443 22.26809
229 45 84.59409 22.23394
230 45 84.59829 22.25254
231 46 84.39357 22.29561
232 46 84.44703 22.30280
233 46 84.46925 22.27578
234 46 84.41866 22.25660
235 46 84.52129 22.25205
236 47 84.30487 22.17233
237 47 84.45558 22.23191
238 47 84.37898 22.16621
239 47 84.43046 22.18487
240 47 84.45154 22.18526
241 48 84.51769 22.09600
242 48 84.55436 22.14137
243 48 84.57622 22.16390
244 48 84.51546 22.14937
245 48 84.63149 22.14019
246 49 84.45983 22.23095
247 49 84.62353 22.20400
248 49 84.56912 22.21290
249 49 84.54201 22.20328
250 49 84.54809 22.17835
251 50 84.00676 21.97773
252 50 83.66445 21.87512
253 50 83.84650 21.94241
254 50 83.80769 21.89265
255 50 84.15527 22.08124
256 51 83.70758 21.87988
257 51 83.74622 21.97113
258 51 83.74746 21.94747
259 51 83.63556 21.97660
260 51 83.79836 21.96437
261 52 83.74530 22.00021
262 52 83.79702 22.03037
263 52 83.68398 22.05825
264 52 83.82263 22.00627
265 52 83.71190 22.02140
266 53 83.62893 22.04063
267 53 83.62875 22.10658
268 53 83.64160 21.98653
269 53 83.71510 22.08841
270 53 83.63223 22.02763
271 54 84.46019 21.99294
272 54 84.37296 21.74227
273 54 84.51199 22.06852
274 54 84.58428 22.05692")
df <- read.table(text = text, header = T, fill = T)
Ответ №1:
Быстрый способ решения этой проблемы с помощью пакета TSP в R:
Группируйте данные по переменной блока / кластера:
df <- dplyr::group_by(df, block_id)
Разделите фрейм данных на список на основе вашего block_id
df_by_block <- dplyr::group_split(df)
Инициировать пустой список
all_block_paths_list <- list()
for(i in 1:length(df_by_block)){
# calculate optimum path
tsp <- TSP::TSP(dist(df_by_block[[i]][,c("x_cent", "y_cent")]))
tsp <- TSP::insert_dummy(tsp, label = "cut")
initial_tour <- TSP::solve_TSP(tsp)
tour <- TSP::solve_TSP(tsp, model = "two_opt", control = list(tour = initial_tour))
path.tsp <- unname(TSP::cut_tour(tour, "cut"))
# Save new datasets in list
all_block_paths_list[[i]] <- df_by_block[[i]][path.tsp,]
}
Теперь свяжите список фреймов данных в один
all_block_paths_df <- dplyr::bind_rows(all_block_paths_list)
Визуализируйте:
library(ggplot2)
ggplot()
geom_point(all_block_paths_df, mapping = aes(x = x_cent, y = y_cent, color = factor(block_id)), size = 2)
geom_path(all_block_paths_df, mapping = aes(x = x_cent, y = y_cent, color = factor(block_id)))
labs(title = "Optimum path distance within clusters",
x = "",
y = "")
theme_bw()
theme(title = element_text(size = 16),
legend.position = "none")