Я хочу найти 10 лучших клиентов в R, в которых :

#r #user-defined-functions

Вопрос:

Мне нужно создать функцию определения пользователя, в которой пользователь может указать продукт(Золото/Серебро/Платину) и время(ежегодно или ежемесячно) и выполнить агрегирование как за год, так и за месяц, чтобы, когда пользователь указывает месяц, ежемесячные данные агрегировались при определении 10 лучших клиентов из каждого города.

До сих пор то, что я пробовал, не выполняется и не может понять, что делать дальше

Что я сделал :

 udf <- function(product,time_period){
  if(product="gold" amp; time_period="monthly")
    print(dataset%>%group_by(product="Gold",City,Customer,mnth)%>%summarise(repayment=sum(Amount))%>%top_n(10))
  
  else if(product="gold" amp; time_period="yearly")
  print(dataset%>%group_by(product="Gold",City,Customer,year)%>%summarise(repayment=sum(Amount))%>%top_n(10))
  
  else if(product="silver" amp; time_period="monthly")
  print(dataset%>%group_by(product="Gold",City,Customer,mnth)%>%summarise(repayment=sum(Amount))%>%top_n(10))
  
  else if(product="silver" amp; time_period="yearly")
  print(dataset%>%group_by(product="Gold",City,Customer,year)%>%summarise(repayment=sum(Amount))%>%top_n(10))
  
  else if(product="platinum" amp; time_period="monthly")
  print(dataset%>%group_by(product="Gold",City,Customer,mnth)%>%summarise(repayment=sum(Amount))%>%top_n(10))
  
  else
  print(dataset%>%group_by(product="Gold",City,Customer,year)%>%summarise(repayment=sum(Amount))%>%top_n(10))
  
}
 

Набор данных :

 structure(list(No = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L), Customer = c("A1", "A1", "A1", 
"A1", "A1", "A1", "A1", "A1", "A1", "A1", "A1", "A1", "A1", "A1", 
"A1", "A1", "A1", "A1", "A2", "A2", "A2", "A2", "A2", "A2", "A2", 
"A2", "A2", "A2", "A2", "A3", "A3", "A3", "A3", "A3", "A3", "A3", 
"A3", "A3", "A3", "A3", "A4", "A4", "A4", "A4", "A4", "A4", "A4", 
"A4", "A4", "A4"), Age = c(76, 76, 76, 76, 76, 76, 76, 76, 76, 
76, 76, 76, 76, 76, 76, 76, 76, 76, 71, 71, 71, 71, 71, 71, 71, 
71, 71, 71, 71, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 47, 
47, 47, 47, 47, 47, 47, 47, 47, 47), City = c("BANGALORE", "BANGALORE", 
"BANGALORE", "BANGALORE", "BANGALORE", "BANGALORE", "BANGALORE", 
"BANGALORE", "BANGALORE", "BANGALORE", "BANGALORE", "BANGALORE", 
"BANGALORE", "BANGALORE", "BANGALORE", "BANGALORE", "BANGALORE", 
"BANGALORE", "CALCUTTA", "CALCUTTA", "CALCUTTA", "CALCUTTA", 
"CALCUTTA", "CALCUTTA", "CALCUTTA", "CALCUTTA", "CALCUTTA", "CALCUTTA", 
"CALCUTTA", "COCHIN", "COCHIN", "COCHIN", "COCHIN", "COCHIN", 
"COCHIN", "COCHIN", "COCHIN", "COCHIN", "COCHIN", "COCHIN", "BOMBAY", 
"BOMBAY", "BOMBAY", "BOMBAY", "BOMBAY", "BOMBAY", "BOMBAY", "BOMBAY", 
"BOMBAY", "BOMBAY"), Product = c("Gold", "Gold", "Gold", "Gold", 
"Gold", "Gold", "Gold", "Gold", "Gold", "Gold", "Gold", "Gold", 
"Gold", "Gold", "Gold", "Gold", "Gold", "Gold", "Silver", "Silver", 
"Silver", "Silver", "Silver", "Silver", "Silver", "Silver", "Silver", 
"Silver", "Silver", "Platinum", "Platinum", "Platinum", "Platinum", 
"Platinum", "Platinum", "Platinum", "Platinum", "Platinum", "Platinum", 
"Platinum", "Platinum", "Platinum", "Platinum", "Platinum", "Platinum", 
"Platinum", "Platinum", "Platinum", "Platinum", "Platinum"), 
    Limit = c(500000L, 500000L, 500000L, 500000L, 500000L, 500000L, 
    500000L, 500000L, 500000L, 500000L, 500000L, 500000L, 500000L, 
    500000L, 500000L, 500000L, 500000L, 500000L, 100000L, 100000L, 
    100000L, 100000L, 100000L, 100000L, 100000L, 100000L, 100000L, 
    100000L, 100000L, 10000L, 10000L, 10000L, 10000L, 10000L, 
    10000L, 10000L, 10000L, 10000L, 10000L, 10000L, 10001L, 10001L, 
    10001L, 10001L, 10001L, 10001L, 10001L, 10001L, 10001L, 10001L
    ), Company = c("C1", "C1", "C1", "C1", "C1", "C1", "C1", 
    "C1", "C1", "C1", "C1", "C1", "C1", "C1", "C1", "C1", "C1", 
    "C1", "C2", "C2", "C2", "C2", "C2", "C2", "C2", "C2", "C2", 
    "C2", "C2", "C3", "C3", "C3", "C3", "C3", "C3", "C3", "C3", 
    "C3", "C3", "C3", "C4", "C4", "C4", "C4", "C4", "C4", "C4", 
    "C4", "C4", "C4"), Segment = c("Self Employed", "Self Employed", 
    "Self Employed", "Self Employed", "Self Employed", "Self Employed", 
    "Self Employed", "Self Employed", "Self Employed", "Self Employed", 
    "Self Employed", "Self Employed", "Self Employed", "Self Employed", 
    "Self Employed", "Self Employed", "Self Employed", "Self Employed", 
    "Salaried_MNC", "Salaried_MNC", "Salaried_MNC", "Salaried_MNC", 
    "Salaried_MNC", "Salaried_MNC", "Salaried_MNC", "Salaried_MNC", 
    "Salaried_MNC", "Salaried_MNC", "Salaried_MNC", "Salaried_Pvt", 
    "Salaried_Pvt", "Salaried_Pvt", "Salaried_Pvt", "Salaried_Pvt", 
    "Salaried_Pvt", "Salaried_Pvt", "Salaried_Pvt", "Salaried_Pvt", 
    "Salaried_Pvt", "Salaried_Pvt", "Govt", "Govt", "Govt", "Govt", 
    "Govt", "Govt", "Govt", "Govt", "Govt", "Govt"), SL.No. = c(NA, 
    2L, 3L, 4L, 5L, 6L, 7L, 103L, 117L, 131L, 145L, 159L, 173L, 
    187L, 201L, 215L, 229L, 243L, 104L, 118L, 132L, 146L, 160L, 
    174L, 188L, 202L, 216L, 230L, 244L, 105L, 119L, 133L, 147L, 
    161L, 175L, 189L, 203L, 217L, 231L, 245L, 106L, 120L, 134L, 
    148L, 162L, 176L, 190L, 204L, 218L, 232L), Month = structure(c(12429, 
    12420, 12432, 12442, 12800, 12837, 12815, 12429, 12453, 13257, 
    13109, 13473, 12967, 13394, 12837, 13239, 12432, 12830, 12420, 
    12842, 12514, 12745, 13109, 12998, 13424, 12817, 13240, 12442, 
    12837, 12432, 12830, 12900, 12927, 12745, 13029, 13455, 13183, 
    13241, 12429, 12817, 12442, 12837, 12543, 12928, 12927, 13059, 
    13485, 13232, 13242, 12420), class = "Date"), Amount = c(495414.75, 
    1e 05, 10000, 10001, 10002, 1e 05, 14473.41, 10000, 117964.43, 
    79849.19, 402099.78, 100003, 100001, 100003, 96670.7, 429099.97, 
    453027.64, 187398.64, 10001, 1e 05, 148038.67, 1e 05, 10274, 
    100002, 298423.86, 1e 05, 339899.24, 298200.38, 132970.25, 
    10002, 10000, 10002, 10000, 233364.46, 100003, 233945.84, 
    10000, 79705.12, 327273.46, 104406.96, 1e 05, 10001, 1e 05, 
    10001, 358341.19, 373214.8, 277825.13, 10001, 1e 05, 15557.49
    ), year = c(2004, 2004, 2004, 2004, 2005, 2005, 2005, 2004, 
    2004, 2006, 2005, 2006, 2005, 2006, 2005, 2006, 2004, 2005, 
    2004, 2005, 2004, 2004, 2005, 2005, 2006, 2005, 2006, 2004, 
    2005, 2004, 2005, 2005, 2005, 2004, 2005, 2006, 2006, 2006, 
    2004, 2005, 2004, 2005, 2004, 2005, 2005, 2005, 2006, 2006, 
    2006, 2004), mnth = c(1, 1, 1, 1, 1, 2, 2, 1, 2, 4, 11, 11, 
    7, 9, 2, 4, 1, 2, 1, 2, 4, 11, 11, 8, 10, 2, 4, 1, 2, 1, 
    2, 4, 5, 11, 9, 11, 2, 4, 1, 2, 1, 2, 5, 5, 5, 10, 12, 3, 
    4, 1)), row.names = c(NA, 50L), class = "data.frame")
 

Ответ №1:

Измените все одинарные = на двойные == , чтобы проверить равенство, а не присваивать значение.

 udf <- function(product, time_period) {
    if (product == "gold" amp; time_period == "monthly")
            print(dataset %>% group_by(product = "Gold", City, Customer, mnth) %>% summarise(repayment = sum(Amount)) %>% top_n(10))
    else if (product == "gold" amp; time_period == "yearly")
            print(dataset %>% group_by(product = "Gold", City, Customer, year) %>% summarise(repayment = sum(Amount)) %>% top_n(10))
    else if (product == "silver" amp; time_period == "monthly")
            print(dataset %>% group_by(product = "Gold", City, Customer, mnth) %>% summarise(repayment = sum(Amount)) %>% top_n(10))
    else if (product == "silver" amp; time_period == "yearly")
            print(dataset %>% group_by(product = "Gold", City, Customer, year) %>% summarise(repayment = sum(Amount)) %>% top_n(10))
    else if (product == "platinum" amp; time_period == "monthly")
            print(dataset %>% group_by(product = "Gold", City, Customer, mnth) %>% summarise(repayment = sum(Amount)) %>% top_n(10))
    else
            print(dataset %>% group_by(product = "Gold", City, Customer, year) %>% summarise(repayment = sum(Amount)) %>% top_n(10))
}