#r #data-modeling
#r #моделирование данных
Вопрос:
Я пишу модель для своего набора данных. После выполнения bake() в результате отсутствует один предиктор и результат.
Это происходит после записи шагов рецепта. Есть ли какой-либо способ решить эту проблему?
top10_renewableEnergyProd_split <- initial_split(top10_renewableEnergyProd)
top10_renewableEnergyProd_train <- training(top10_renewableEnergyProd_split)
top10_renewableEnergyProd_test <- testing(top10_renewableEnergyProd_split)
top10_renewableEnergyProd_recipe <- recipe(energyProd_2018 ~ country_name energyProd_2016 energyProd_2017 , data = top10_renewableEnergyProd_train)
#recipe steps
top10_renewableEnergyProd_recipe <- top10_renewableEnergyProd_recipe %>%
step_center(all_numeric(), -all_outcomes()) %>%
step_scale(all_numeric(), -all_outcomes()) %>%
step_corr(all_numeric())
top10_renewableEnergyProd_prep <- prep(top10_renewableEnergyProd_recipe, training = top10_renewableEnergyProd_train)
top10_renewableEnergyProd_bake <- bake(top10_renewableEnergyProd_prep, top10_renewableEnergyProd_train)
top10_renewableEnergyProd_bake
> dput(top10_renewableEnergyProd)
structure(list(type2 = c("Renewable", "Renewable", "Renewable",
"Renewable", "Renewable", "Renewable", "Renewable", "Renewable",
"Renewable", "Renewable"), country = c("DE", "ES", "FR", "IT",
"NO", "PL", "SE", "TR", "UA", "UK"), country_name = c("Germany",
"Spain", "France", "Italy", "Norway", "Poland", "Sweden", "Turkey",
"Ukraine", "United Kingdom"), energyProd_2016 = c(147622, 103353,
99885.054, 90756.826, 146557, 15468, 77505, 87090, 12097, 58909.047
), energyProd_2017 = c(175063, 84664, 93907.184, 86786.294, 146285,
18187.708, 82540, 83536.342, 12082.6, 73113.964), energyProd_2018 = c(185226.211,
99725.566, 113658.177, 96820, 146878.825, 15541.473, 77615.947,
93425.906, 13843.9, 79955.967)), row.names = c(NA, -10L), groups = structure(list(
country = c("DE", "ES", "FR", "IT", "NO", "PL", "SE", "TR",
"UA", "UK"), country_name = c("Germany", "Spain", "France",
"Italy", "Norway", "Poland", "Sweden", "Turkey", "Ukraine",
"United Kingdom"), type2 = c("Renewable", "Renewable", "Renewable",
"Renewable", "Renewable", "Renewable", "Renewable", "Renewable",
"Renewable", "Renewable"), .rows = structure(list(1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = c(NA, -10L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
> dput(top10_renewableEnergyProd_bake)
structure(list(country_name = structure(c(2L, 5L, 1L, 3L, 4L,
6L, 7L, 8L), .Label = c("France", "Germany", "Italy", "Norway",
"Spain", "Sweden", "Turkey", "United Kingdom"), class = "factor"),
energyProd_2016 = c(1.47285970883518, 0.0604065991667829,
-0.0502421869332435, -0.341488748282317, 1.4388796649716,
-0.764303423167326, -0.458483028395103, -1.35762858619557
)), row.names = c(NA, -8L), class = c("tbl_df", "tbl", "data.frame"
))