#r
Вопрос:
У меня есть набор данных со многими NAs, но я могу заполнить эти NAs альтернативными именами. Цель состоит в том, чтобы использовать эти имена для чего-то после, например, для объединения набора данных путем сопоставления значений.
Однако я не могу назначить эти векторы символов, потому что это не размер кадра данных.
Например:
join_pop1[is.na(join_pop1$UK_Districts.y)] <- pop_names
Вход x имеет размер 19, но индекс
is.na(join_pop1$UK_Districts.y)
имеет размер 1437.
По сути, я хочу, чтобы каждое имя было присвоено этим значениям с помощью NAs для каждого уникального города, например, вот как выглядит мой набор данных:
# A tibble: 132 x 2
UK_Districts.x UK_Districts.y
<chr> <chr>
1 Abertawe - Swansea NA
2 Abertawe - Swansea NA
3 Abertawe - Swansea NA
4 Abertawe - Swansea NA
5 Brent London Boro NA
6 Brent London Boro NA
7 Brent London Boro NA
8 Brent London Boro NA
9 Bro Morgannwg - the Vale of Glamorgan NA
10 Bro Morgannwg - the Vale of Glamorgan NA
Ожидаемые результаты:
# A tibble: 132 x 2
UK_Districts.x UK_Districts.y
<chr> <chr>
1 Abertawe - Swansea Swansea
2 Abertawe - Swansea Swansea
3 Abertawe - Swansea Swansea
4 Abertawe - Swansea Swansea
5 Brent London Boro Brent
6 Brent London Boro Brent
7 Brent London Boro Brent
8 Brent London Boro Brent
9 Bro Morgannwg - the Vale of Glamorgan Vale of Glamorgan
10 Bro Morgannwg - the Vale of Glamorgan Vale of Glamorgan
Воспроизводимый код:
#city names to assign
pop_names <- c("Swansea", "Brent", "Vale of Glamorgan", "South Bucks", "Cardiff",
"Caerphilly", "Newport", "Neath Port Talbot", "City of London",
"Bristol, City of", "Derby", "Leicester", "Peterborough", "Plymouth",
"Portsmouth", "Southampton", "Stoke-on-Trent", "Westminster",
"Wolverhampton", "Herefordshire, County of", "Shepway", "Merthyr Tydfil",
"Bridgend", "Pembrokeshire", "Ceredigion", "Denbighshire", "Monmouthshire",
"Carmarthenshire", "Flintshire", "Isle of Anglesey", "Somerset",
"Brighton and Hove", "Wrexham")
join_pop1 <- structure(list(UK_Districts.x = c("Abertawe - Swansea", "Abertawe - Swansea",
"Abertawe - Swansea", "Abertawe - Swansea", "Brent London Boro",
"Brent London Boro", "Brent London Boro", "Brent London Boro",
"Bro Morgannwg - the Vale of Glamorgan", "Bro Morgannwg - the Vale of Glamorgan",
"Bro Morgannwg - the Vale of Glamorgan", "Bro Morgannwg - the Vale of Glamorgan",
"Buckinghamshire", "Buckinghamshire", "Buckinghamshire", "Buckinghamshire",
"Caerdydd - Cardiff", "Caerdydd - Cardiff", "Caerdydd - Cardiff",
"Caerdydd - Cardiff", "Caerffili - Caerphilly", "Caerffili - Caerphilly",
"Caerffili - Caerphilly", "Caerffili - Caerphilly", "Casnewydd - Newport",
"Casnewydd - Newport", "Casnewydd - Newport", "Casnewydd - Newport",
"Castell-nedd Port Talbot - Neath Port Talbot", "Castell-nedd Port Talbot - Neath Port Talbot",
"Castell-nedd Port Talbot - Neath Port Talbot", "Castell-nedd Port Talbot - Neath Port Talbot",
"City and County of the City of London", "City and County of the City of London",
"City and County of the City of London", "City and County of the City of London",
"City of Bristol ", "City of Bristol ", "City of Bristol ", "City of Bristol ",
"City of Derby ", "City of Derby ", "City of Derby ", "City of Derby ",
"City of Leicester ", "City of Leicester ", "City of Leicester ",
"City of Leicester ", "City of Peterborough ", "City of Peterborough ",
"City of Peterborough ", "City of Peterborough ", "City of Plymouth ",
"City of Plymouth ", "City of Plymouth ", "City of Plymouth ",
"City of Portsmouth ", "City of Portsmouth ", "City of Portsmouth ",
"City of Portsmouth ", "City of Southampton ", "City of Southampton ",
"City of Southampton ", "City of Southampton ", "City of Stoke-on-Trent ",
"City of Stoke-on-Trent ", "City of Stoke-on-Trent ", "City of Stoke-on-Trent ",
"City of Westminster London Boro", "City of Westminster London Boro",
"City of Westminster London Boro", "City of Westminster London Boro",
"City of Wolverhampton ", "City of Wolverhampton ", "City of Wolverhampton ",
"City of Wolverhampton ", "County of Herefordshire", "County of Herefordshire",
"County of Herefordshire", "County of Herefordshire", "Folkestone and Hythe",
"Folkestone and Hythe", "Folkestone and Hythe", "Folkestone and Hythe",
"Merthyr Tudful - Merthyr Tydfil", "Merthyr Tudful - Merthyr Tydfil",
"Merthyr Tudful - Merthyr Tydfil", "Merthyr Tudful - Merthyr Tydfil",
"Pen-y-bont ar Ogwr - Bridgend", "Pen-y-bont ar Ogwr - Bridgend",
"Pen-y-bont ar Ogwr - Bridgend", "Pen-y-bont ar Ogwr - Bridgend",
"Sir Benfro - Pembrokeshire", "Sir Benfro - Pembrokeshire", "Sir Benfro - Pembrokeshire",
"Sir Benfro - Pembrokeshire", "Sir Ceredigion - Ceredigion",
"Sir Ceredigion - Ceredigion", "Sir Ceredigion - Ceredigion",
"Sir Ceredigion - Ceredigion", "Sir Ddinbych - Denbighshire",
"Sir Ddinbych - Denbighshire", "Sir Ddinbych - Denbighshire",
"Sir Ddinbych - Denbighshire", "Sir Fynwy - Monmouthshire", "Sir Fynwy - Monmouthshire",
"Sir Fynwy - Monmouthshire", "Sir Fynwy - Monmouthshire", "Sir Gaerfyrddin - Carmarthenshire",
"Sir Gaerfyrddin - Carmarthenshire", "Sir Gaerfyrddin - Carmarthenshire",
"Sir Gaerfyrddin - Carmarthenshire", "Sir y Fflint - Flintshire",
"Sir y Fflint - Flintshire", "Sir y Fflint - Flintshire", "Sir y Fflint - Flintshire",
"Sir Ynys Mon - Isle of Anglesey", "Sir Ynys Mon - Isle of Anglesey",
"Sir Ynys Mon - Isle of Anglesey", "Sir Ynys Mon - Isle of Anglesey",
"Somerset West and Taunton", "Somerset West and Taunton", "Somerset West and Taunton",
"Somerset West and Taunton", "The City of Brighton and Hove ",
"The City of Brighton and Hove ", "The City of Brighton and Hove ",
"The City of Brighton and Hove ", "Wrecsam - Wrexham", "Wrecsam - Wrexham",
"Wrecsam - Wrexham", "Wrecsam - Wrexham"), UK_Districts.y = c(NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_)), row.names = c(NA, -132L), class = c("tbl_df",
"tbl", "data.frame"))
Ответ №1:
Мы могли бы использовать str_extract
из stringr
упаковки. Сначала мы создаем шаблон pop_names
library(dplyr)
library(stringr)
pattern <- paste(as.character(pop_names), collapse = "|")
join_pop1 %>%
mutate(UK_Districts.y = str_extract(UK_Districts.x, pattern))
выход:
UK_Districts.x UK_Districts.y
<chr> <chr>
1 Abertawe - Swansea Swansea
2 Abertawe - Swansea Swansea
3 Abertawe - Swansea Swansea
4 Abertawe - Swansea Swansea
5 Brent London Boro Brent
6 Brent London Boro Brent
7 Brent London Boro Brent
8 Brent London Boro Brent
9 Bro Morgannwg - the Vale of Glamorgan Vale of Glamorgan
10 Bro Morgannwg - the Vale of Glamorgan Vale of Glamorgan
# ... with 122 more rows
Комментарии:
1. Мне нравится такой подход! Буду иметь это в виду на будущее.
Ответ №2:
С base R
помощью , мы можем использовать regmatches/regexpr
после построения pattern
pattern <- paste(pop_names, collapse = "|")
join_pop1$UK_Districts.y <- regmatches(join_pop1$UK_Districts.x,
regexpr(pattern, join_pop1$UK_Districts.x))