#r #dplyr #forecast #fable-r #tsibble
Вопрос:
Это моя оригинальная модель df и подогнанная модель
library(tsibble) library(tibble) library(ISOweek) library(fable) library(forecast) library(fpp3) library(dplyr) library(tidyverse) Original.df lt;- structure(list(YearWeek = c("201901", "201902", "201903", "201904", "201905", "201906", "201907", "201908", "201909", "201910", "201911", "201912", "201913", "201914", "201915", "201916", "201917", "201918", "201919", "201920", "201921", "201922", "201923", "201924", "201925", "201926", "201927", "201928", "201929", "201930", "201931", "201932", "201933", "201934", "201935", "201936", "201937", "201938", "201939", "201940", "201941", "201942", "201943", "201944", "201945", "201946", "201947", "201948", "201949", "201950", "201951", "201952", "202001", "202002", "202003", "202004", "202005", "202006", "202007", "202008", "202009", "202010", "202011", "202012", "202013", "202014", "202015", "202016", "202017", "202018", "202019", "202020", "202021", "202022", "202023", "202024", "202025", "202026", "202027", "202028", "202029", "202030", "202031", "202032", "202033", "202034", "202035", "202036", "202037", "202038", "202039", "202040", "202041", "202042", "202043", "202044", "202045", "202046", "202047", "202048", "202049", "202050", "202051", "202052", "202053", "202101", "202102", "202103", "202104", "202105", "202106", "202107", "202108", "202109", "202110", "202111", "202112", "202113", "202114", "202115", "202116", "202117", "202118", "202119", "202120", "202121", "202122", "202123", "202124", "202125", "202126", "202127", "202128", "202129", "202130", "202131", "202132", "202133", "202134", "202135", "202136", "202137", "202138", "202139", "202140", "202141", "202142", "202143"), Shipment = c(418, 1442, 1115, 1203, 1192, 1353, 1191, 1411, 933, 1384, 1362, 1353, 1739, 1751, 1595, 1380, 1711, 2058, 1843, 1602, 2195, 2159, 2009, 1812, 2195, 1763, 821, 1892, 1781, 2071, 1789, 1789, 1732, 1384, 1435, 1247, 1839, 2034, 1963, 1599, 1596, 1548, 1084, 1350, 1856, 1882, 1979, 1021, 1311, 2031, 1547, 591, 724, 1535, 1268, 1021, 1269, 1763, 1275, 1411, 1847, 1379, 1606, 1473, 1180, 926, 800, 840, 1375, 1755, 1902, 1921, 1743, 1275, 1425, 1088, 1416, 1168, 842, 1185, 1570, 1435, 1209, 1470, 1368, 1926, 1233, 1189, 1245, 1465, 1226, 887, 1489, 1369, 1358, 1179, 1200, 1226, 1066, 823, 1913, 2308, 1842, 910, 794, 1098, 1557, 1417, 1851, 1876, 1010, 160, 1803, 1607, 1185, 1347, 1700, 981, 1191, 1058, 1464, 1513, 1333, 1169, 1294, 978, 962, 1254, 987, 1290, 758, 436, 579, 636, 614, 906, 982, 649, 564, 502, 274, 473, 506, 902, 639, 810, 398, 488 ), Production = c(0, 198, 1436, 1055, 1396, 1330, 1460, 1628, 1513, 1673, 1737, 1274, 1726, 1591, 2094, 1411, 2009, 1909, 1759, 1693, 1748, 1455, 2078, 1717, 1737, 1886, 862, 1382, 1779, 1423, 1460, 1454, 1347, 1409, 1203, 1235, 1397, 1563, 1411, 1455, 1706, 688, 1446, 1336, 1618, 1404, 1759, 746, 1560, 1665, 1317, 0, 441, 1390, 1392, 1180, 1477, 1265, 1485, 1495, 1543, 1584, 1575, 1609, 1233, 1420, 908, 1008, 1586, 1392, 1385, 1259, 1010, 973, 1053, 905, 1101, 1196, 891, 1033, 925, 889, 1136, 1058, 1179, 1047, 967, 900, 904, 986, 1014, 945, 1030, 1066, 1191, 1143, 1292, 574, 1174, 515, 1296, 1315, 1241, 0, 0, 1182, 1052, 1107, 1207, 1254, 1055, 258, 1471, 1344, 1353, 1265, 1444, 791, 1397, 1186, 1264, 1032, 949, 1059, 954, 798, 956, 1074, 1136, 1209, 975, 833, 994, 1127, 1153, 1202, 1234, 1336, 1484, 1515, 1151, 1175, 976, 1135, 1272, 869, 1900, 1173), Net.Production.Qty = c(22, 188, 1428, 1031, 1382, 1368, 1456, 1578, 1463, 1583, 1699, 1318, 1582, 1537, 2118, 1567, 1961, 1897, 1767, 1603, 1666, 1419, 2186, 1621, 1677, 1840, 698, 1290, 1411, 927, 1754, 1222, 1411, 1549, 1491, 1359, 1179, 1945, 1463, 1465, 1764, 764, 810, 1308, 1830, 1542, 1695, 544, 1482, 1673, 1659, 0, 445, 1358, 1364, 1224, 1417, 1239, 1387, 1595, 1469, 1624, 1643, 1763, 1217, 1456, 568, 1290, 1666, 1428, 1327, 773, 1118, 1231, 1143, 921, 1083, 1124, 935, 903, 937, 849, 1132, 1032, 1143, 1081, 891, 886, 880, 1002, 1072, 969, 1000, 996, 1243, 1183, 1306, 650, 1226, 553, 1306, 1379, 1359, 0, 0, 1182, 988, 1099, 1173, 1244, 1039, 254, 1425, 1318, 1385, 1221, 1364, 739, 1397, 1112, 1160, 924, 971, 1015, 978, 828, 868, 994, 1090, 1165, 783, 887, 934, 1023, 1045, 1114, 1052, 1186, 1456, 1401, 1249, 779, 430, 1625, 1498, 883, 1860, 1101), isoweek = c("2019-W01-1", "2019-W02-1", "2019-W03-1", "2019-W04-1", "2019-W05-1", "2019-W06-1", "2019-W07-1", "2019-W08-1", "2019-W09-1", "2019-W10-1", "2019-W11-1", "2019-W12-1", "2019-W13-1", "2019-W14-1", "2019-W15-1", "2019-W16-1", "2019-W17-1", "2019-W18-1", "2019-W19-1", "2019-W20-1", "2019-W21-1", "2019-W22-1", "2019-W23-1", "2019-W24-1", "2019-W25-1", "2019-W26-1", "2019-W27-1", "2019-W28-1", "2019-W29-1", "2019-W30-1", "2019-W31-1", "2019-W32-1", "2019-W33-1", "2019-W34-1", "2019-W35-1", "2019-W36-1", "2019-W37-1", "2019-W38-1", "2019-W39-1", "2019-W40-1", "2019-W41-1", "2019-W42-1", "2019-W43-1", "2019-W44-1", "2019-W45-1", "2019-W46-1", "2019-W47-1", "2019-W48-1", "2019-W49-1", "2019-W50-1", "2019-W51-1", "2019-W52-1", "2020-W01-1", "2020-W02-1", "2020-W03-1", "2020-W04-1", "2020-W05-1", "2020-W06-1", "2020-W07-1", "2020-W08-1", "2020-W09-1", "2020-W10-1", "2020-W11-1", "2020-W12-1", "2020-W13-1", "2020-W14-1", "2020-W15-1", "2020-W16-1", "2020-W17-1", "2020-W18-1", "2020-W19-1", "2020-W20-1", "2020-W21-1", "2020-W22-1", "2020-W23-1", "2020-W24-1", "2020-W25-1", "2020-W26-1", "2020-W27-1", "2020-W28-1", "2020-W29-1", "2020-W30-1", "2020-W31-1", "2020-W32-1", "2020-W33-1", "2020-W34-1", "2020-W35-1", "2020-W36-1", "2020-W37-1", "2020-W38-1", "2020-W39-1", "2020-W40-1", "2020-W41-1", "2020-W42-1", "2020-W43-1", "2020-W44-1", "2020-W45-1", "2020-W46-1", "2020-W47-1", "2020-W48-1", "2020-W49-1", "2020-W50-1", "2020-W51-1", "2020-W52-1", "2020-W53-1", "2021-W01-1", "2021-W02-1", "2021-W03-1", "2021-W04-1", "2021-W05-1", "2021-W06-1", "2021-W07-1", "2021-W08-1", "2021-W09-1", "2021-W10-1", "2021-W11-1", "2021-W12-1", "2021-W13-1", "2021-W14-1", "2021-W15-1", "2021-W16-1", "2021-W17-1", "2021-W18-1", "2021-W19-1", "2021-W20-1", "2021-W21-1", "2021-W22-1", "2021-W23-1", "2021-W24-1", "2021-W25-1", "2021-W26-1", "2021-W27-1", "2021-W28-1", "2021-W29-1", "2021-W30-1", "2021-W31-1", "2021-W32-1", "2021-W33-1", "2021-W34-1", "2021-W35-1", "2021-W36-1", "2021-W37-1", "2021-W38-1", "2021-W39-1", "2021-W40-1", "2021-W41-1", "2021-W42-1", "2021-W43-1" ), date = structure(c(17896, 17903, 17910, 17917, 17924, 17931, 17938, 17945, 17952, 17959, 17966, 17973, 17980, 17987, 17994, 18001, 18008, 18015, 18022, 18029, 18036, 18043, 18050, 18057, 18064, 18071, 18078, 18085, 18092, 18099, 18106, 18113, 18120, 18127, 18134, 18141, 18148, 18155, 18162, 18169, 18176, 18183, 18190, 18197, 18204, 18211, 18218, 18225, 18232, 18239, 18246, 18253, 18260, 18267, 18274, 18281, 18288, 18295, 18302, 18309, 18316, 18323, 18330, 18337, 18344, 18351, 18358, 18365, 18372, 18379, 18386, 18393, 18400, 18407, 18414, 18421, 18428, 18435, 18442, 18449, 18456, 18463, 18470, 18477, 18484, 18491, 18498, 18505, 18512, 18519, 18526, 18533, 18540, 18547, 18554, 18561, 18568, 18575, 18582, 18589, 18596, 18603, 18610, 18617, 18624, 18631, 18638, 18645, 18652, 18659, 18666, 18673, 18680, 18687, 18694, 18701, 18708, 18715, 18722, 18729, 18736, 18743, 18750, 18757, 18764, 18771, 18778, 18785, 18792, 18799, 18806, 18813, 18820, 18827, 18834, 18841, 18848, 18855, 18862, 18869, 18876, 18883, 18890, 18897, 18904, 18911, 18918, 18925), class = "Date")), row.names = c(NA, 148L), class = "data.frame") # Converting the df to accomodate leap year for weekly observations Original.df lt;- Original.df %gt;% mutate( isoweek =stringr::str_replace(YearWeek, "^(\d{4})(\d{2})$", "\1-W\2-1"), date = ISOweek::ISOweek2date(isoweek) ) # creating test and train data Original.train.df lt;- Original.df %gt;% filter(date gt;= "2018-12-31", date lt;= "2021-03-29") Original.test.df lt;- Original.df %gt;% filter(date gt;= "2021-04-05", date lt;= "2021-10-25") # splitting the original train data to contain only Week, Dependent and Independent variables Total.train.dflt;-Original.train.df %gt;% mutate(Week.1 = yearweek(ISOweek::ISOweek(date))) %gt;% select(-YearWeek, -Production, -date,-isoweek) %gt;% as_tsibble(index = Week.1) #Fitting forecast model(Arima with Fourier terms) to Net.Production.qty fit_all_models.Prod.1 lt;- list() for(K in seq(25)){ fit.Prod.1 lt;- Total.train.df %gt;% model(ARIMA(Net.Production.Qty ~ fourier(K = K),stepwise = FALSE, approximation = FALSE)) names(fit.Prod.1) lt;- paste0("arima_", K) fit_all_models.Prod.1 lt;- bind_cols(fit_all_models.Prod.1, fit.Prod.1) } glance(fit_all_models.Prod.1) %gt;% arrange(AICc) %gt;% select(.model:BIC) best_model.Prod.1 lt;- glance(fit_all_models.Prod.1) %gt;% filter(AICc == min(AICc)) %gt;% select(.model) %gt;% as.character #Forecasting Net.Production.Qty for 30 steps using the fitted model above-Model.1 Forecast.Net.Prod.1lt;-fit_all_models.Prod.1 %gt;% select(all_of(best_model.Prod.1)) %gt;% forecast(h = 30) #To extract fitted values from the model which has min AICc fitted.Prod.1lt;-fit.Prod.1 %gt;% filter(AICc == min(AICc)) %gt;% fitted()
Как вы видите из последнего шага выше, я пытаюсь извлечь подходящие значения из модели, которая имеет минимальный AICc, что, однако, не работает
Если бы кто-нибудь мог помочь мне получить подходящие значения из приведенной выше модели, которая имеет минимум.AICc был бы действительно полезен
Спасибо
Комментарии:
1. поскольку это дает вам модель с наименьшими подходящими значениями, не могли бы вы просто запустить последнюю строку кода с этой моделью и использовать что-то вроде
predict()
функции. Например, создайте столбец с именем «fit» и извлеките подходящие (или прогнозируемые) значения из выбранной вами модели.dataframe$fit lt;- predict(model)
Конечно, модель вам нужно запустить отдельно в качестве последнего шага перед извлечением подходящих значений2. @Andy Я намерен видеть соответствующие значения в своем наборе данных для обучения, а не прогнозируемые значения. Если я применяю функцию прогноза или прогнозирования к набору обучающих данных, она выдает мне прогнозируемые значения. Похоже, мне нужно либо найти модель наилучшего соответствия вручную и получить соответствующие значения оттуда, либо использовать условие if для хранения модели наилучшего соответствия на основе min AICc и использовать это для получения подходящих значений
3. Не используйте все эти пакеты, для воспроизводимости необходимы только fpp3 и ISOWeek.
Ответ №1:
Ты почти на месте:
# your code ..... # get the fitted based on the selection in best_model.Prod.1 fitted.Prod.1 lt;- fit_all_models.Prod.1 %gt;% select(all_of(best_model.Prod.1)) %gt;% fitted() fitted.Prod.1 # A tsibble: 118 x 3 [1W] # Key: .model [1] .model Week.1 .fitted lt;chrgt; lt;weekgt; lt;dblgt; 1 arima_13 2019 W01 21.0 2 arima_13 2019 W02 486. 3 arima_13 2019 W03 1007. 4 arima_13 2019 W04 965. 5 arima_13 2019 W05 1012. 6 arima_13 2019 W06 1088. 7 arima_13 2019 W07 1175. 8 arima_13 2019 W08 1166. 9 arima_13 2019 W09 1305. 10 arima_13 2019 W10 1613. # ... with 108 more rows