#r #dataframe
#r #фрейм данных
Вопрос:
Я действительно новичок в R, и это, вероятно, действительно основной вопрос. Пожалуйста, посмотрите мой пример кода. Я хотел бы представить процент людей, выполняющих еженедельную работу в течение 24-часового периода времени. Как я могу изменить ось y на проценты вместо итогов?
Я пробовал этот код, но я не уверен в нем:
df2 <- df3 %>%
group_by(day,time) %>%
summarise(Total=sum(value))
df2$Pct <- df2$Total/ sum(df2$Total)
df2<-structure(list(`Day of the week` = structure(c(1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L), .Label = c("Monday",
"Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"
), class = "factor"), time = c(4, 4.25, 4.5, 4.75, 5, 5.25, 5.5,
5.75, 6, 6.25, 6.5, 6.75, 7, 7.25, 7.5, 7.75, 8, 8.25, 8.5, 8.75,
9, 9.25, 9.5, 9.75, 10, 10.25, 10.5, 10.75, 11, 11.25, 11.5,
11.75, 12, 12.25, 12.5, 12.75, 13, 13.25, 13.5, 13.75, 14, 14.25,
14.5, 14.75, 15, 15.25, 15.5, 15.75, 16, 16.25, 16.5, 16.75,
17, 17.25, 17.5, 17.75, 18, 18.25, 18.5, 18.75, 19, 19.25, 19.5,
19.75, 20, 20.25, 20.5, 20.75, 21, 21.25, 21.5, 21.75, 22, 22.25,
22.5, 22.75, 23, 23.25, 23.5, 23.75, 24, 24.25, 24.5, 24.75,
25, 25.25, 25.5, 25.75, 26, 26.25, 26.5, 26.75, 27, 27.25, 27.5,
27.75, 4, 4.25, 4.5, 4.75, 5, 5.25, 5.5, 5.75, 6, 6.25, 6.5,
6.75, 7, 7.25, 7.5, 7.75, 8, 8.25, 8.5, 8.75, 9, 9.25, 9.5, 9.75,
10, 10.25, 10.5, 10.75, 11, 11.25, 11.5, 11.75, 12, 12.25, 12.5,
12.75, 13, 13.25, 13.5, 13.75, 14, 14.25, 14.5, 14.75, 15, 15.25,
15.5, 15.75, 16, 16.25, 16.5, 16.75, 17, 17.25, 17.5, 17.75,
18, 18.25, 18.5, 18.75, 19, 19.25, 19.5, 19.75, 20, 20.25, 20.5,
20.75, 21, 21.25, 21.5, 21.75, 22, 22.25, 22.5, 22.75, 23, 23.25,
23.5, 23.75, 24, 24.25, 24.5, 24.75, 25, 25.25, 25.5, 25.75,
26, 26.25, 26.5, 26.75, 27, 27.25, 27.5, 27.75, 4, 4.25, 4.5,
4.75, 5, 5.25, 5.5, 5.75, 6, 6.25, 6.5, 6.75, 7, 7.25, 7.5, 7.75,
8, 8.25, 8.5, 8.75, 9, 9.25, 9.5, 9.75, 10, 10.25, 10.5, 10.75,
11, 11.25, 11.5, 11.75, 12, 12.25, 12.5, 12.75, 13, 13.25, 13.5,
13.75, 14, 14.25, 14.5, 14.75, 15, 15.25, 15.5, 15.75, 16, 16.25,
16.5, 16.75, 17, 17.25, 17.5, 17.75, 18, 18.25, 18.5, 18.75,
19, 19.25, 19.5, 19.75, 20, 20.25, 20.5, 20.75, 21, 21.25, 21.5,
21.75, 22, 22.25, 22.5, 22.75, 23, 23.25, 23.5, 23.75, 24, 24.25,
24.5, 24.75, 25, 25.25, 25.5, 25.75, 26, 26.25, 26.5, 26.75,
27, 27.25, 27.5, 27.75, 4, 4.25, 4.5, 4.75, 5, 5.25, 5.5, 5.75,
6, 6.25, 6.5, 6.75, 7, 7.25, 7.5, 7.75, 8, 8.25, 8.5, 8.75, 9,
9.25, 9.5, 9.75, 10, 10.25, 10.5, 10.75, 11, 11.25, 11.5, 11.75,
12, 12.25, 12.5, 12.75, 13, 13.25, 13.5, 13.75, 14, 14.25, 14.5,
14.75, 15, 15.25, 15.5, 15.75, 16, 16.25, 16.5, 16.75, 17, 17.25,
17.5, 17.75, 18, 18.25, 18.5, 18.75, 19, 19.25, 19.5, 19.75,
20, 20.25, 20.5, 20.75, 21, 21.25, 21.5, 21.75, 22, 22.25, 22.5,
22.75, 23, 23.25, 23.5, 23.75, 24, 24.25, 24.5, 24.75, 25, 25.25,
25.5, 25.75, 26, 26.25, 26.5, 26.75, 27, 27.25, 27.5, 27.75,
4, 4.25, 4.5, 4.75, 5, 5.25, 5.5, 5.75, 6, 6.25, 6.5, 6.75, 7,
7.25, 7.5, 7.75, 8, 8.25, 8.5, 8.75, 9, 9.25, 9.5, 9.75, 10,
10.25, 10.5, 10.75, 11, 11.25, 11.5, 11.75, 12, 12.25, 12.5,
12.75, 13, 13.25, 13.5, 13.75, 14, 14.25, 14.5, 14.75, 15, 15.25,
15.5, 15.75, 16, 16.25, 16.5, 16.75, 17, 17.25, 17.5, 17.75,
18, 18.25, 18.5, 18.75, 19, 19.25, 19.5, 19.75, 20, 20.25, 20.5,
20.75, 21, 21.25, 21.5, 21.75, 22, 22.25, 22.5, 22.75, 23, 23.25,
23.5, 23.75, 24, 24.25, 24.5, 24.75, 25, 25.25, 25.5, 25.75,
26, 26.25, 26.5, 26.75, 27, 27.25, 27.5, 27.75, 4, 4.25, 4.5,
4.75, 5, 5.25, 5.5, 5.75, 6, 6.25, 6.5, 6.75, 7, 7.25, 7.5, 7.75,
8, 8.25, 8.5, 8.75, 9, 9.25, 9.5, 9.75, 10, 10.25, 10.5, 10.75,
11, 11.25, 11.5, 11.75, 12, 12.25, 12.5, 12.75, 13, 13.25, 13.5,
13.75, 14, 14.25, 14.5, 14.75, 15, 15.25, 15.5, 15.75, 16, 16.25,
16.5, 16.75, 17, 17.25, 17.5, 17.75, 18, 18.25, 18.5, 18.75,
19, 19.25, 19.5, 19.75, 20, 20.25, 20.5, 20.75, 21, 21.25, 21.5,
21.75, 22, 22.25, 22.5, 22.75, 23, 23.25, 23.5, 23.75, 24, 24.25,
24.5, 24.75, 25, 25.25, 25.5, 25.75, 26, 26.25, 26.5, 26.75,
27, 27.25, 27.5, 27.75, 4, 4.25, 4.5, 4.75, 5, 5.25, 5.5, 5.75,
6, 6.25, 6.5, 6.75, 7, 7.25, 7.5, 7.75, 8, 8.25, 8.5, 8.75, 9,
9.25, 9.5, 9.75, 10, 10.25, 10.5, 10.75, 11, 11.25, 11.5, 11.75,
12, 12.25, 12.5, 12.75, 13, 13.25, 13.5, 13.75, 14, 14.25, 14.5,
14.75, 15, 15.25, 15.5, 15.75, 16, 16.25, 16.5, 16.75, 17, 17.25,
17.5, 17.75, 18, 18.25, 18.5, 18.75, 19, 19.25, 19.5, 19.75,
20, 20.25, 20.5, 20.75, 21, 21.25, 21.5, 21.75, 22, 22.25, 22.5,
22.75, 23, 23.25, 23.5, 23.75, 24, 24.25, 24.5, 24.75, 25, 25.25,
25.5, 25.75, 26, 26.25, 26.5, 26.75, 27, 27.25, 27.5, 27.75),
Total = c(6, 6, 6, 6, 7, 8, 10, 11, 19, 22, 27, 28, 44, 47,
56, 59, 100, 106, 135, 136, 173, 184, 191, 197, 200, 199,
203, 201, 198, 199, 202, 202, 193, 189, 182, 183, 155, 153,
153, 157, 183, 185, 185, 185, 185, 182, 173, 172, 158, 158,
140, 139, 125, 118, 108, 101, 68, 66, 54, 50, 37, 38, 32,
30, 26, 26, 26, 25, 24, 23, 23, 23, 25, 23, 21, 20, 15, 14,
14, 15, 11, 11, 10, 10, 10, 9, 9, 9, 10, 10, 10, 10, 10,
10, 9, 9, 8, 8, 8, 8, 10, 10, 14, 15, 20, 20, 27, 27, 45,
47, 59, 62, 104, 110, 137, 140, 179, 186, 202, 203, 206,
209, 209, 210, 205, 207, 211, 210, 200, 199, 194, 197, 169,
166, 176, 180, 193, 196, 197, 197, 192, 190, 180, 176, 162,
162, 153, 148, 124, 122, 106, 97, 64, 61, 57, 54, 38, 37,
38, 34, 32, 33, 31, 28, 24, 24, 22, 21, 20, 20, 17, 16, 13,
12, 10, 10, 9, 9, 8, 8, 8, 7, 8, 9, 9, 8, 8, 8, 8, 7, 7,
8, 7, 7, 7, 7, 10, 11, 14, 16, 22, 24, 27, 28, 45, 48, 63,
66, 104, 116, 141, 145, 191, 198, 209, 210, 215, 216, 218,
216, 216, 218, 221, 221, 206, 204, 194, 194, 180, 179, 184,
186, 206, 209, 208, 207, 204, 203, 196, 194, 179, 182, 168,
164, 131, 127, 115, 106, 66, 60, 57, 52, 39, 36, 36, 33,
32, 31, 29, 29, 22, 21, 18, 17, 16, 15, 14, 14, 12, 12, 12,
11, 9, 9, 8, 8, 7, 7, 7, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5,
5, 5, 7, 7, 9, 9, 18, 20, 21, 21, 38, 39, 58, 61, 108, 116,
138, 141, 179, 185, 196, 196, 200, 205, 205, 201, 202, 204,
204, 202, 191, 188, 184, 188, 170, 172, 180, 178, 190, 191,
196, 195, 193, 194, 184, 180, 165, 166, 150, 149, 128, 123,
108, 99, 66, 66, 60, 55, 36, 36, 33, 34, 35, 35, 31, 31,
22, 22, 22, 22, 17, 17, 15, 14, 12, 12, 11, 10, 10, 10, 10,
10, 9, 8, 8, 8, 8, 8, 8, 7, 7, 7, 7, 7, 6, 6, 7, 7, 8, 8,
12, 12, 21, 21, 25, 27, 43, 44, 56, 59, 100, 110, 129, 132,
166, 172, 187, 189, 189, 191, 193, 192, 188, 194, 193, 192,
173, 173, 172, 176, 159, 154, 157, 163, 166, 167, 170, 169,
162, 161, 157, 156, 141, 142, 130, 125, 92, 91, 73, 68, 46,
47, 40, 35, 24, 23, 21, 19, 20, 20, 20, 21, 19, 19, 17, 17,
20, 19, 18, 18, 11, 11, 12, 11, 10, 10, 10, 10, 9, 7, 7,
7, 6, 6, 7, 7, 7, 7, 7, 6, 5, 6, 7, 7, 10, 9, 11, 12, 13,
14, 14, 15, 20, 20, 20, 21, 26, 26, 28, 29, 32, 33, 40, 40,
38, 37, 43, 43, 44, 43, 43, 44, 43, 41, 40, 39, 39, 40, 39,
38, 37, 37, 39, 41, 33, 34, 37, 36, 34, 34, 35, 33, 28, 28,
24, 24, 19, 19, 19, 18, 18, 19, 17, 15, 15, 15, 15, 15, 14,
14, 15, 15, 14, 13, 13, 13, 12, 11, 10, 9, 8, 8, 8, 6, 4,
4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 6, 7, 8, 8,
7, 8, 8, 9, 14, 14, 15, 16, 18, 18, 17, 16, 18, 18, 20, 20,
21, 22, 25, 25, 30, 30, 29, 28, 25, 24, 23, 23, 22, 21, 21,
21, 20, 21, 23, 23, 23, 22, 21, 23, 19, 18, 19, 18, 19, 19,
21, 21, 16, 17, 16, 16, 17, 17, 19, 19, 19, 19, 20, 20, 15,
15, 17, 17, 18, 17, 16, 16, 13, 12, 12, 12, 11, 11, 10, 10,
9, 9, 9, 9, 9, 8, 8, 8, 8, 8, 8, 8)), row.names = c(NA, -672L
), class = c("grouped_df", "tbl_df", "tbl", "data.frame"), groups = structure(list(
day = structure(1:7, .Label = c("Monday", "Tuesday", "Wednesday",
"Thursday", "Friday", "Saturday", "Sunday"), class = "factor"),
.rows = list(1:96, 97:192, 193:288, 289:384, 385:480, 481:576,
577:672)), row.names = c(NA, -7L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE))
Комментарии:
1. Если человек работает несколько раз по 15 минут в день, учитывается ли этот человек за каждую четверть часа? Если это так, то при суммировании за день в знаменателе получается количество 15-минутных интервалов, отработанных за день. Если вам нужен процент от
people
, а не приращения по времени, вы бы использовали максимальное значениеTotal
за 7-дневный период. Пожалуйста, опишите, как следует вычислять знаменатель в процентах.
Ответ №1:
Диаграмма в другом ответе иллюстрирует процент людей, работающих с каждым 15-минутным шагом в день, учитывая общее количество 15-минутных приращений, отработанных в течение дня.
Если ось y должна представлять процент людей, работающих с любым 15-минутным интервалом в течение недели, знаменателем должно быть max(Total)
, максимальное количество людей, работающих с любым 15-минутным интервалом в течение 7-дневного периода времени в данных.
Другим подходом было бы использовать максимальное значение по дням, чтобы в течение 15-минутного приращения наибольшее количество людей, работавших в этот день, отображалось на графике как 100%.
Суммируя данные, мы можем увидеть, как будут рассчитаны знаменатели.
df2 %>%
group_by(`Day of the week`) %>%
summarise(.,max = max(Total))
… и вывод:
`summarise()` ungrouping output (override with `.groups` argument)
# A tibble: 7 x 2
`Day of the week` max
<fct> <dbl>
1 Monday 203
2 Tuesday 211
3 Wednesday 221
4 Thursday 205
5 Friday 194
6 Saturday 44
7 Sunday 30
График, нормированный на максимальное количество работников в день, выглядит следующим образом, учитывая график, который был опубликован в исходном ответе:
df2 %>% group_by(`Day of the week`) %>%
mutate(Percent = Total / max(Total)) -> df3
ggplot(df3,aes(x=time,y=Percent,color=`Day of the week`))
geom_line()
scale_y_continuous(labels = scales::percent)
… и вывод:
График, нормированный на максимальное количество работников за все дни, выглядит следующим образом.
df2 %>% ungroup() %>%
mutate(Percent = Total / max(Total)) -> df3
ggplot(df3,aes(x=time,y=Percent,color=`Day of the week`))
geom_line()
scale_y_continuous(labels = scales::percent)
…и выходные данные, где мы можем четко видеть, что меньше людей работало по выходным.
Ответ №2:
Я бы предложил следующий подход. Если вы группируете по дням и времени, все проценты равны 1. Если сгруппировать по дням, вы получите это:
library(dplyr)
library(ggplot2)
#Compute
f2 %>% group_by(`Day of the week`) %>% mutate(Percentage=Total/sum(Total)) -> f3
#Plot
ggplot(f3,aes(x=time,y=Percentage,color=`Day of the week`))
geom_line()
scale_y_continuous(labels = scales::percent)
Вывод: