#r #dataframe
Вопрос:
Я пытаюсь создать набор данных с длинным типом структуры, с 2 переменными между объектами и 2 переменными внутри объектов из таблицы Excel. Текущая структура набора данных выглядит следующим образом:
gt; str(Subset_0) 'data.frame': 54 obs. of 11 variables: $ Subject : num 1 2 3 4 5 6 7 8 9 10 ... $ BETWEEN1: num 1 1 1 2 2 2 2 1 1 2 ... $ BETWEEN2: num 1 1 2 2 2 2 1 1 1 1 ... $ A_x1 : num 5 1 3 1 0 6 1 2 7 1 ... $ B_x2 : num 5 1 3 0 3 0 0 2 6 1 ... $ C_y1 : num 6 9 9 2 2 4 2 2 6 0 ... $ D_y2 : num 6 15 4 1 2 4 3 1 3 0 ... $ K_x1 : num 5 1 3 1 0 6 1 2 7 1 ... $ L_x2 : num 5 1 3 0 3 0 0 2 6 1 ... $ M_y1 : num 6 9 9 2 2 4 2 2 6 14 ... $ N_y2 : num 3 1 0 4 0 5 6 5 17 21 ...
файл данных из dput
:
structure(list(Subject = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 55), BETWEEN1 = c(1, 1, 1, 2, 2, 2, 2, 1, 1, 2, 2, 2, 2, 1, 2, 1, 2, 1, 1, 2, 2, 1, 2, 1, 2, 1, 1, 2, 1, 1, 2, 1, 1, 2, 1, 2, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), BETWEEN2 = c(1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2, 1, 1, 2, 2, 1, 1, 2, 2, 2, 2, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), A_x1 = c(5, 1, 3, 1, 0, 6, 1, 2, 7, 1, 1, 0, 0, 2, 0, 8, NA, NA, NA, NA, 14, 23, 19, 10, 9, 10, 11, 14, 16, 8, 24, 17, 8, 22, 14, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), B_x2 = c(5, 1, 3, 0, 3, 0, 0, 2, 6, 1, 0, 0, 0, 0, 1, 7, 14, 23, 19, 10, 14, 29, 15, 7, 13, 16, 7, 9, 17, 6, 7, 16, 6, 11, 13, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), C_y1 = c(6, 9, 9, 2, 2, 4, 2, 2, 6, 0, 6, 0, 1, 10, 3, 8, 14, 29, 15, 7, 17, 21, 24, 7, 32, 31, 31, 21, 27, 29, 18, 27, 33, 23, 28, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), D_y2 = c(6, 15, 4, 1, 2, 4, 3, 1, 3, 0, 0, 0, 2, 2, 2, 5, 17, 21, 24, 7, 24, 16, 28, 7, 28, 23, 25, 25, 24, 28, 33, 27, 31, 33, 21, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), K_x1 = c(5, 1, 3, 1, 0, 6, 1, 2, 7, 1, 1, 0, 0, 2, 0, 8, 24, 16, 28, 7, 24, 31, 31, 13, 32, 35, 32, 22, 29, 32, 32, 29, 34, 32, 34, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), L_x2 = c(5, 1, 3, 0, 3, 0, 0, 2, 6, 1, 0, 0, 0, 0, 1, 7, 24, 31, 31, 13, 30, 30, 34, 12, 31, 27, 23, 25, 33, 28, 31, 29, 30, 36, 24, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), M_y1 = c(6, 9, 9, 2, 2, 4, 2, 2, 6, 14, 23, 19, 10, 9, 10, 11, 14, 16, 8, 24, 17, 8, 22, 14, 33, 28, 31, 14, 23, 19, 10, 9, 10, 11, 14, 16, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), N_y2 = c(3, 1, 0, 4, 0, 5, 6, 5, 17, 21, 24, 7, 32, 31, 31, 21, NA, NA, NA, NA, 27, 29, 18, 27, NA, NA, 17, 21, 24, 7, 32, 31, 31, 21, 27, 17, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA)), class = "data.frame", row.names = c(NA, -54L))
Мне нужно разделить его по теме и условиям: по одному на столбец со значениями A, B, C и D в одном столбце и назвать его «Первым»; и K, L, M, N в другом и назвать его «Вторым». Более того, x, y, _1 и _2, присутствующие в этих переменных, представляют факторы внутри субъекта, которые мне также необходимо учитывать в других двух столбцах- «В пределах 1» для x и y; и «В пределах 2» для 1 и 2. И, наконец, с двумя столбцами «Между 1» и «Между 2», которые являются факторами между субъектами.
Мне нужно, чтобы это выглядело так:
Subject First SecondI Within2 Within2 Between1 Between2 1 Ai Ki 1 x 1 1 1 Bi Li 2 x 1 1 1 Ci Mi 1 y 1 1 1 Di Ni 2 y 1 1 2 Ai Ki 1 x 1 1 2 Bi Li 2 x 1 1 2 Ci Mi 1 y 1 1 2 Di Ni 2 y 1 1 ...
Я использовал эту reshape
функцию дважды, один раз для группировки в один столбец A, B,C,D и отделения от нее переменных внутри темы,и мне это удалось:
Subset_1 lt;-reshape(Subset_0, varying = c("A_x1", " B_x2", "C_y1", "D_y2"), v.names = "First", timevar = "Within1", times = c("A_x1", " B_x2", "C_y1", "D_y2"), direction = "long") # Next_Trial_Choice column Subset_1$Within1[Subset_1$Within1== "A_x1"] lt;- "x" Subset_1$Within1[Subset_1$Within1== "B_x2"] lt;- "x" Subset_1$Within1[Subset_1$Within1== "C_y1"] lt;- "y" Subset_1$Within1[Subset_1$Within1== "D_y2"] lt;- "y" #cleaning the names - opponent column Subset_1$Within2[Subset_1$Within2== "A_x1"] lt;- "1" Subset_1$Within2[Subset_1$Within2== "B_x2"] lt;- "2" Subset_1$Within2[Subset_1$Within2== "C_y1"] lt;- "1" Subset_1$Within2[Subset_1$Within2== "D_y2"] lt;- "2"
Проблема в том, что мне нужно сделать то же самое для другого столбца («Второй»), и я попытался снова использовать reshape, как делал раньше, на этот раз применительно к подмножеству 1. Но это не делает того, что мне нужно.
Есть ли способ сделать это?
Комментарии:
1. Добро пожаловать в Stack Overflow. Это один очень хорошо написанный первый вопрос!
Ответ №1:
Это выглядит так, как будто он получает ваш приведенный пример результата:
# pipe library(magrittr) # input data dxyz lt;- structure(list(Subject = c( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 55 ), BETWEEN1 = c( 1, 1, 1, 2, 2, 2, 2, 1, 1, 2, 2, 2, 2, 1, 2, 1, 2, 1, 1, 2, 2, 1, 2, 1, 2, 1, 1, 2, 1, 1, 2, 1, 1, 2, 1, 2, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA ), BETWEEN2 = c( 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2, 1, 1, 2, 2, 1, 1, 2, 2, 2, 2, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA ), A_x1 = c( 5, 1, 3, 1, 0, 6, 1, 2, 7, 1, 1, 0, 0, 2, 0, 8, NA, NA, NA, NA, 14, 23, 19, 10, 9, 10, 11, 14, 16, 8, 24, 17, 8, 22, 14, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA ), B_x2 = c( 5, 1, 3, 0, 3, 0, 0, 2, 6, 1, 0, 0, 0, 0, 1, 7, 14, 23, 19, 10, 14, 29, 15, 7, 13, 16, 7, 9, 17, 6, 7, 16, 6, 11, 13, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA ), C_y1 = c( 6, 9, 9, 2, 2, 4, 2, 2, 6, 0, 6, 0, 1, 10, 3, 8, 14, 29, 15, 7, 17, 21, 24, 7, 32, 31, 31, 21, 27, 29, 18, 27, 33, 23, 28, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA ), D_y2 = c( 6, 15, 4, 1, 2, 4, 3, 1, 3, 0, 0, 0, 2, 2, 2, 5, 17, 21, 24, 7, 24, 16, 28, 7, 28, 23, 25, 25, 24, 28, 33, 27, 31, 33, 21, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA ), K_x1 = c( 5, 1, 3, 1, 0, 6, 1, 2, 7, 1, 1, 0, 0, 2, 0, 8, 24, 16, 28, 7, 24, 31, 31, 13, 32, 35, 32, 22, 29, 32, 32, 29, 34, 32, 34, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA ), L_x2 = c( 5, 1, 3, 0, 3, 0, 0, 2, 6, 1, 0, 0, 0, 0, 1, 7, 24, 31, 31, 13, 30, 30, 34, 12, 31, 27, 23, 25, 33, 28, 31, 29, 30, 36, 24, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA ), M_y1 = c( 6, 9, 9, 2, 2, 4, 2, 2, 6, 14, 23, 19, 10, 9, 10, 11, 14, 16, 8, 24, 17, 8, 22, 14, 33, 28, 31, 14, 23, 19, 10, 9, 10, 11, 14, 16, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA ), N_y2 = c( 3, 1, 0, 4, 0, 5, 6, 5, 17, 21, 24, 7, 32, 31, 31, 21, NA, NA, NA, NA, 27, 29, 18, 27, NA, NA, 17, 21, 24, 7, 32, 31, 31, 21, 27, 17, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA )), class = "data.frame", row.names = c( NA, -54L )) # extract all abcd in long format with Within seperated abcd lt;- dxyz %gt;% tidyr::pivot_longer(-c(Subject, BETWEEN1,BETWEEN2)) %gt;% tidyr::separate(col = name, sep = "_", into = c("First", "Within")) %gt;% dplyr::filter(First %in% c("A", "B", "C", "D")) %gt;% dplyr::mutate( Within21 = stringr::str_extract_all(Within, "[:digit:]") %gt;% unlist(), Within22 = stringr::str_extract_all(Within, "[:alpha:]") %gt;% unlist() ) %gt;% dplyr::select(-Within) # extract all klmn in long format with Within seperated klmn lt;- dxyz %gt;% tidyr::pivot_longer(-c(Subject, BETWEEN1,BETWEEN2)) %gt;% tidyr::separate(col = name, sep = "_", into = c("Second", "Within")) %gt;% dplyr::filter(Second %in% c("K", "L", "M", "N"))%gt;% dplyr::mutate( Within21 = stringr::str_extract_all(Within, "[:digit:]") %gt;% unlist(), Within22 = stringr::str_extract_all(Within, "[:alpha:]") %gt;% unlist() ) %gt;% dplyr::select(-Within) # join both data sets together abcd %gt;% dplyr::left_join( klmn, by = c("Subject", "BETWEEN1", "BETWEEN2", "Within21", "Within22") ) %gt;% dplyr::select( Subject, First, Second, Within21, Within22, BETWEEN1, BETWEEN2, value.x, value.y )
Я разделил преобразование на две части для A, B, C, D и K, L, M, N, а затем объединил данные вместе.
Ответ №2:
вот один из вариантов с pivot_longer
. Я знаю, что разделил немного слишком много, но это просто для того, чтобы устранить путаницу с именами. Вы можете настроить их в соответствии с вашими выходными данными.
library(tidyr) df %gt;% pivot_longer(cols=c("A_x1", "B_x2", "C_y1", "D_y2"), names_to="first") %gt;% pivot_longer(cols=c("K_x1", "L_x2", "M_y1", "N_y2"), names_to="second",values_to = "value2") %gt;% separate(first, into = c("first", "Within1"), sep = "_") %gt;% separate(Within1,into = c("Within1", "Within1_2"), sep = "(?lt;=[A-Za-z])(?=[0-9])") %gt;% separate(second, into = c("second", "Within2"), sep = "_") %gt;% separate(Within2,into = c("Within2", "Within2_2"), sep = "(?lt;=[A-Za-z])(?=[0-9])") %gt;% select(-c(value, value2)) %gt;% distinct()