#r #merge
#r #слияние
Вопрос:
Я пытаюсь добавить данные ВВП в имеющийся у меня фрейм данных. Я включил первые 5 строк фреймов данных для справки.
Я хотел бы добавить ВВП каждого государства по годам. В моем фрейме данных у меня есть столбцы «цикл» и «состояние», которые я хотел бы использовать для «сопоставления» соответствующего значения ВВП, но, похоже, я не могу понять, как это сделать, потому что информация о ВВП представлена в широком формате.
Любая помощь будет оценена!
candidates <- structure(list(Cycle = c("1990", "1990", "1990", "1990", "1990"
), CandidateName = c("Robert Palmer", "David Elliott Smith",
"Gary A Condit", "Cliff Burris", "Jerry M Reiss"), State = c("CA",
"CA", "CA", "CA", "CA"), VoteShare = c(0.336755826502931, 0.356542953604619,
0.661849966957576, 0.338150033042424, 0.220356681141645), stateGDP = c("NA",
"NA", "NA", "NA", "NA")), row.names = c(NA, 5L), class = "data.frame")
stateGDP <- structure(list(X1 = c(1998, 2000, 2002, 2004, 2006), USA = c(5.7,
6.5, 3.4, 6.6, 6), AK = c(5.1, 3.7, 4.4, 9.6, 5.1), AL = c(-6.1,
8.3, 4.2, 10, 11.7), AZ = c(8.3, 6, 4.6, 6.6, 8.8), AR = c(4.3,
2.9, 4.7, 8.2, 5.3), CA = c(7.2, 9.6, 3.8, 6.9, 6.9), CO = c(10,
10, 1.8, 3.8, 5.1), CT = c(4.9, 8.9, 1.8, 9.6, 6.8), DE = c(11.6,
7.5, -1.5, 7.7, 5.6), DC = c(4.6, 3.8, 6.8, 8.6, 3.8), FL = c(7.1,
6.8, 6.2, 8.9, 6.8), GA = c(9.2, 6.4, 2.6, 7, 4), HI = c(-0.1,
5.7, 6, 9.9, 6.3), ID = c(5.2, 11.3, 3.6, 8.4, 6.3), IL = c(4.9,
6.1, 2.4, 5.5, 6), IN = c(8.2, 5.2, 3.8, 6, 4.6), IA = c(2.4,
6.1, 4.4, 11, 3.8), KS = c(5.1, 4.9, 2.9, 3.2, 8.4), KY = c(4.8,
-0.8, 4.1, 5.7, 5.9), LA = c(3.4, 5.4, 1.2, 10, 4.2), ME = c(5,
7.2, 4.4, 6.4, 4.5), MD = c(6.7, 6.6, 6.3, 7.9, 5.1), MA = c(5,
9.5, 2.4, 4.9, 4.5), MI = c(4.3, 4.2, 3.9, 2, 0.3), MN = c(6.7,
8.8, 4.2, 7, 2.8), MS = c(4.5, 2.8, 2.7, 5.3, 6), MO = c(3.8,
5.6, 2.8, 5, 3.7), MT = c(5.9, 5.1, 4.5, 9, 7.9), NE = c(3.9,
5.2, 3.6, 5.8, 5.7), NV = c(7.8, 7.1, 5.4, 15, 8.2), NH = c(6.1,
8, 4.4, 5.3, 5.4), NJ = c(3.9, 7.5, 3.9, 4.4, 5.4), NM = c(-3.5,
4, 3.7, 10.9, 5.1), NY = c(3.7, 5.7, 1.1, 5.5, 5.6), NC = c(5.1,
4.4, 3.3, 6.1, 8.8), ND = c(6.8, 5.8, 6.8, 4.9, 7), OH = c(5.3,
4, 4.1, 4.9, 2.4), OK = c(2.7, 8, 2.5, 7.5, 10.1), OR = c(4.2,
9, 3.4, 7.1, 8), PA = c(4.7, 4.6, 2.8, 5.9, 4.1), RI = c(6.2,
6.9, 5.9, 7.4, 5.6), SC = c(6.4, 4.5, 4.1, 3.7, 5.6), SD = c(6.2,
7.8, 12, 8, 4), TN = c(8.4, 3.3, 5.1, 7, 5.3), TX = c(5.8, 8.4,
1.8, 9.2, 10), UT = c(8, 6.5, 3.8, 8.3, 12.7), VT = c(4.3, 6.7,
4.6, 7.3, 3.4), VA = c(7.3, 6.8, 3.3, 7.2, 5.2), WA = c(8.4,
3.2, 3.7, 4.6, 7.1), WV = c(3.2, 1.9, 3.3, 5.6, 6.1), WI = c(6.1,
4.8, 3.7, 5.9, 4.4), WY = c(1.4, 9.5, 2.4, 10.3, 19.2)), row.names = c(NA,
-5L), class = c("tbl_df", "tbl", "data.frame"))
Комментарии:
1. Измените формат с широкого на длинный, затем объедините как обычно
Ответ №1:
Измените формат с широкого на длинный, затем объедините, как обычно.
С вашими примерами данных совпадений нет, но приведенное ниже должно работать для ваших полных данных:
merge(candidates,
pivot_longer(stateGDP, cols = -1, names_to = "State", values_to = "GDP"),
by.x = c("Cycle", "State"), by.y = c("X1", "State"))
Ответ №2:
Вот tidyverse
версия, в которой я немного изменил ваши входные данные, чтобы убедиться, что совпадения были…
library(dplyr)
library(tidyr)
candidates$stateGDP <- NULL
stateGDP_long <-
stateGDP %>%
pivot_longer(cols = c(AK:WY),
names_to = "State",
values_to = "stateGDP") %>%
mutate(Cycle = as.character(X1)) %>%
select(-USA, -X1)
left_join(candidates, stateGDP_long)
#> Joining, by = c("Cycle", "State")
#> Cycle CandidateName State VoteShare stateGDP
#> 1 1990 Robert Palmer CA 0.3367558 7.2
#> 2 1990 David Elliott Smith CA 0.3565430 7.2
#> 3 1990 Gary A Condit CA 0.6618500 7.2
#> 4 1990 Cliff Burris CA 0.3381500 7.2
#> 5 1990 Jerry M Reiss CA 0.2203567 7.2
Ваши данные слегка изменены
candidates <- structure(list(Cycle = c("1990", "1990", "1990", "1990", "1990"
), CandidateName = c("Robert Palmer", "David Elliott Smith",
"Gary A Condit", "Cliff Burris", "Jerry M Reiss"), State = c("CA",
"CA", "CA", "CA", "CA"), VoteShare = c(0.336755826502931, 0.356542953604619,
0.661849966957576, 0.338150033042424, 0.220356681141645), stateGDP = c("NA",
"NA", "NA", "NA", "NA")), row.names = c(NA, 5L), class = "data.frame")
stateGDP <- structure(list(X1 = c(1990, 1991, 1992, 1993, 1994), USA = c(5.7,
6.5, 3.4, 6.6, 6), AK = c(5.1, 3.7, 4.4, 9.6, 5.1), AL = c(-6.1,
8.3, 4.2, 10, 11.7), AZ = c(8.3, 6, 4.6, 6.6, 8.8), AR = c(4.3,
2.9, 4.7, 8.2, 5.3), CA = c(7.2, 9.6, 3.8, 6.9, 6.9), CO = c(10,
10, 1.8, 3.8, 5.1), CT = c(4.9, 8.9, 1.8, 9.6, 6.8), DE = c(11.6,
7.5, -1.5, 7.7, 5.6), DC = c(4.6, 3.8, 6.8, 8.6, 3.8), FL = c(7.1,
6.8, 6.2, 8.9, 6.8), GA = c(9.2, 6.4, 2.6, 7, 4), HI = c(-0.1,
5.7, 6, 9.9, 6.3), ID = c(5.2, 11.3, 3.6, 8.4, 6.3), IL = c(4.9,
6.1, 2.4, 5.5, 6), IN = c(8.2, 5.2, 3.8, 6, 4.6), IA = c(2.4,
6.1, 4.4, 11, 3.8), KS = c(5.1, 4.9, 2.9, 3.2, 8.4), KY = c(4.8,
-0.8, 4.1, 5.7, 5.9), LA = c(3.4, 5.4, 1.2, 10, 4.2), ME = c(5,
7.2, 4.4, 6.4, 4.5), MD = c(6.7, 6.6, 6.3, 7.9, 5.1), MA = c(5,
9.5, 2.4, 4.9, 4.5), MI = c(4.3, 4.2, 3.9, 2, 0.3), MN = c(6.7,
8.8, 4.2, 7, 2.8), MS = c(4.5, 2.8, 2.7, 5.3, 6), MO = c(3.8,
5.6, 2.8, 5, 3.7), MT = c(5.9, 5.1, 4.5, 9, 7.9), NE = c(3.9,
5.2, 3.6, 5.8, 5.7), NV = c(7.8, 7.1, 5.4, 15, 8.2), NH = c(6.1,
8, 4.4, 5.3, 5.4), NJ = c(3.9, 7.5, 3.9, 4.4, 5.4), NM = c(-3.5,
4, 3.7, 10.9, 5.1), NY = c(3.7, 5.7, 1.1, 5.5, 5.6), NC = c(5.1,
4.4, 3.3, 6.1, 8.8), ND = c(6.8, 5.8, 6.8, 4.9, 7), OH = c(5.3,
4, 4.1, 4.9, 2.4), OK = c(2.7, 8, 2.5, 7.5, 10.1), OR = c(4.2,
9, 3.4, 7.1, 8), PA = c(4.7, 4.6, 2.8, 5.9, 4.1), RI = c(6.2,
6.9, 5.9, 7.4, 5.6), SC = c(6.4, 4.5, 4.1, 3.7, 5.6), SD = c(6.2,
7.8, 12, 8, 4), TN = c(8.4, 3.3, 5.1, 7, 5.3), TX = c(5.8, 8.4,
1.8, 9.2, 10), UT = c(8, 6.5, 3.8, 8.3, 12.7), VT = c(4.3, 6.7,
4.6, 7.3, 3.4), VA = c(7.3, 6.8, 3.3, 7.2, 5.2), WA = c(8.4,
3.2, 3.7, 4.6, 7.1), WV = c(3.2, 1.9, 3.3, 5.6, 6.1), WI = c(6.1,
4.8, 3.7, 5.9, 4.4), WY = c(1.4, 9.5, 2.4, 10.3, 19.2)), row.names = c(NA,
-5L), class = c("tbl_df", "tbl", "data.frame"))