Назначьте несколько повторяющихся векторов в качестве значений в другом столбце

#r

Вопрос:

У меня есть набор данных со многими NAs, но я могу заполнить эти NAs альтернативными именами. Цель состоит в том, чтобы использовать эти имена для чего-то после, например, для объединения набора данных путем сопоставления значений.

Однако я не могу назначить эти векторы символов, потому что это не размер кадра данных.

Например:

 join_pop1[is.na(join_pop1$UK_Districts.y)] <- pop_names
 

Вход x имеет размер 19, но индекс is.na(join_pop1$UK_Districts.y) имеет размер 1437.

По сути, я хочу, чтобы каждое имя было присвоено этим значениям с помощью NAs для каждого уникального города, например, вот как выглядит мой набор данных:

 # A tibble: 132 x 2
   UK_Districts.x                        UK_Districts.y
   <chr>                                 <chr>         
 1 Abertawe - Swansea                    NA            
 2 Abertawe - Swansea                    NA            
 3 Abertawe - Swansea                    NA            
 4 Abertawe - Swansea                    NA            
 5 Brent London Boro                     NA            
 6 Brent London Boro                     NA            
 7 Brent London Boro                     NA            
 8 Brent London Boro                     NA            
 9 Bro Morgannwg - the Vale of Glamorgan NA            
10 Bro Morgannwg - the Vale of Glamorgan NA       

 

Ожидаемые результаты:

 # A tibble: 132 x 2
   UK_Districts.x                        UK_Districts.y
   <chr>                                 <chr>         
 1 Abertawe - Swansea                    Swansea            
 2 Abertawe - Swansea                    Swansea       
 3 Abertawe - Swansea                    Swansea       
 4 Abertawe - Swansea                    Swansea       
 5 Brent London Boro                     Brent            
 6 Brent London Boro                     Brent            
 7 Brent London Boro                     Brent            
 8 Brent London Boro                     Brent            
 9 Bro Morgannwg - the Vale of Glamorgan Vale of Glamorgan            
10 Bro Morgannwg - the Vale of Glamorgan Vale of Glamorgan       
 

Воспроизводимый код:

 #city names to assign
pop_names <- c("Swansea", "Brent", "Vale of Glamorgan", "South Bucks", "Cardiff", 
"Caerphilly", "Newport", "Neath Port Talbot", "City of London", 
"Bristol, City of", "Derby", "Leicester", "Peterborough", "Plymouth", 
"Portsmouth", "Southampton", "Stoke-on-Trent", "Westminster", 
"Wolverhampton", "Herefordshire, County of", "Shepway", "Merthyr Tydfil", 
"Bridgend", "Pembrokeshire", "Ceredigion", "Denbighshire", "Monmouthshire", 
"Carmarthenshire", "Flintshire", "Isle of Anglesey", "Somerset", 
"Brighton and Hove", "Wrexham")

join_pop1 <- structure(list(UK_Districts.x = c("Abertawe - Swansea", "Abertawe - Swansea", 
"Abertawe - Swansea", "Abertawe - Swansea", "Brent London Boro", 
"Brent London Boro", "Brent London Boro", "Brent London Boro", 
"Bro Morgannwg - the Vale of Glamorgan", "Bro Morgannwg - the Vale of Glamorgan", 
"Bro Morgannwg - the Vale of Glamorgan", "Bro Morgannwg - the Vale of Glamorgan", 
"Buckinghamshire", "Buckinghamshire", "Buckinghamshire", "Buckinghamshire", 
"Caerdydd - Cardiff", "Caerdydd - Cardiff", "Caerdydd - Cardiff", 
"Caerdydd - Cardiff", "Caerffili - Caerphilly", "Caerffili - Caerphilly", 
"Caerffili - Caerphilly", "Caerffili - Caerphilly", "Casnewydd - Newport", 
"Casnewydd - Newport", "Casnewydd - Newport", "Casnewydd - Newport", 
"Castell-nedd Port Talbot - Neath Port Talbot", "Castell-nedd Port Talbot - Neath Port Talbot", 
"Castell-nedd Port Talbot - Neath Port Talbot", "Castell-nedd Port Talbot - Neath Port Talbot", 
"City and County of the City of London", "City and County of the City of London", 
"City and County of the City of London", "City and County of the City of London", 
"City of Bristol ", "City of Bristol ", "City of Bristol ", "City of Bristol ", 
"City of Derby ", "City of Derby ", "City of Derby ", "City of Derby ", 
"City of Leicester ", "City of Leicester ", "City of Leicester ", 
"City of Leicester ", "City of Peterborough ", "City of Peterborough ", 
"City of Peterborough ", "City of Peterborough ", "City of Plymouth ", 
"City of Plymouth ", "City of Plymouth ", "City of Plymouth ", 
"City of Portsmouth ", "City of Portsmouth ", "City of Portsmouth ", 
"City of Portsmouth ", "City of Southampton ", "City of Southampton ", 
"City of Southampton ", "City of Southampton ", "City of Stoke-on-Trent ", 
"City of Stoke-on-Trent ", "City of Stoke-on-Trent ", "City of Stoke-on-Trent ", 
"City of Westminster London Boro", "City of Westminster London Boro", 
"City of Westminster London Boro", "City of Westminster London Boro", 
"City of Wolverhampton  ", "City of Wolverhampton  ", "City of Wolverhampton  ", 
"City of Wolverhampton  ", "County of Herefordshire", "County of Herefordshire", 
"County of Herefordshire", "County of Herefordshire", "Folkestone and Hythe", 
"Folkestone and Hythe", "Folkestone and Hythe", "Folkestone and Hythe", 
"Merthyr Tudful - Merthyr Tydfil", "Merthyr Tudful - Merthyr Tydfil", 
"Merthyr Tudful - Merthyr Tydfil", "Merthyr Tudful - Merthyr Tydfil", 
"Pen-y-bont ar Ogwr - Bridgend", "Pen-y-bont ar Ogwr - Bridgend", 
"Pen-y-bont ar Ogwr - Bridgend", "Pen-y-bont ar Ogwr - Bridgend", 
"Sir Benfro - Pembrokeshire", "Sir Benfro - Pembrokeshire", "Sir Benfro - Pembrokeshire", 
"Sir Benfro - Pembrokeshire", "Sir Ceredigion - Ceredigion", 
"Sir Ceredigion - Ceredigion", "Sir Ceredigion - Ceredigion", 
"Sir Ceredigion - Ceredigion", "Sir Ddinbych - Denbighshire", 
"Sir Ddinbych - Denbighshire", "Sir Ddinbych - Denbighshire", 
"Sir Ddinbych - Denbighshire", "Sir Fynwy - Monmouthshire", "Sir Fynwy - Monmouthshire", 
"Sir Fynwy - Monmouthshire", "Sir Fynwy - Monmouthshire", "Sir Gaerfyrddin - Carmarthenshire", 
"Sir Gaerfyrddin - Carmarthenshire", "Sir Gaerfyrddin - Carmarthenshire", 
"Sir Gaerfyrddin - Carmarthenshire", "Sir y Fflint - Flintshire", 
"Sir y Fflint - Flintshire", "Sir y Fflint - Flintshire", "Sir y Fflint - Flintshire", 
"Sir Ynys Mon - Isle of Anglesey", "Sir Ynys Mon - Isle of Anglesey", 
"Sir Ynys Mon - Isle of Anglesey", "Sir Ynys Mon - Isle of Anglesey", 
"Somerset West and Taunton", "Somerset West and Taunton", "Somerset West and Taunton", 
"Somerset West and Taunton", "The City of Brighton and Hove ", 
"The City of Brighton and Hove ", "The City of Brighton and Hove ", 
"The City of Brighton and Hove ", "Wrecsam - Wrexham", "Wrecsam - Wrexham", 
"Wrecsam - Wrexham", "Wrecsam - Wrexham"), UK_Districts.y = c(NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, 
NA_character_)), row.names = c(NA, -132L), class = c("tbl_df", 
"tbl", "data.frame"))


 

Ответ №1:

Мы могли бы использовать str_extract из stringr упаковки. Сначала мы создаем шаблон pop_names

 library(dplyr)
library(stringr)
pattern <- paste(as.character(pop_names), collapse = "|")

join_pop1 %>% 
    mutate(UK_Districts.y = str_extract(UK_Districts.x, pattern))
 

выход:

  UK_Districts.x                        UK_Districts.y   
   <chr>                                 <chr>            
 1 Abertawe - Swansea                    Swansea          
 2 Abertawe - Swansea                    Swansea          
 3 Abertawe - Swansea                    Swansea          
 4 Abertawe - Swansea                    Swansea          
 5 Brent London Boro                     Brent            
 6 Brent London Boro                     Brent            
 7 Brent London Boro                     Brent            
 8 Brent London Boro                     Brent            
 9 Bro Morgannwg - the Vale of Glamorgan Vale of Glamorgan
10 Bro Morgannwg - the Vale of Glamorgan Vale of Glamorgan
# ... with 122 more rows
 

Комментарии:

1. Мне нравится такой подход! Буду иметь это в виду на будущее.

Ответ №2:

С base R помощью , мы можем использовать regmatches/regexpr после построения pattern

 pattern <- paste(pop_names, collapse = "|")
join_pop1$UK_Districts.y <- regmatches(join_pop1$UK_Districts.x, 
    regexpr(pattern, join_pop1$UK_Districts.x))