R: Почему выполняется первый код, но второй код приводит к ошибке?

#r #algorithm #function #optimization

Вопрос:

Я работаю с R. пытаюсь использовать следующую библиотеку для оптимизации произвольной функции, которую я написал: https://cran.r-project.org/web/packages/nsga2R/nsga2R.pdf

Сначала я создал некоторые данные для этого примера:

 #load library
set.seed(123)
library(dplyr)
library(nsga2R)

#create data for this example
# create some data for this example
a1 = rnorm(1000,100,10)
b1 = rnorm(1000,100,10)
c1 = sample.int(1000, 1000, replace = TRUE)
train_data = data.frame(a1,b1,c1)
 

Затем я определил две следующие функции для оптимизации (funct_set и funct_set2)

Вот первая функция:

 #define first function

funct_set <- function (x) {
x1 <- x[1]; x2 <- x[2]; x3 <- x[3] ; x4 <- x[4]; x5 <- x[5]; x6 <- x[6]; x[7] <- x[7]
f <- numeric(4)


#bin data according to random criteria
train_data <- train_data %>%
    mutate(cat = ifelse(a1 <= x1 amp; b1 <= x3, "a",
                        ifelse(a1 <= x2 amp; b1 <= x4, "b", "c")))

train_data$cat = as.factor(train_data$cat)

#new splits
a_table = train_data %>%
    filter(cat == "a") %>%
    select(a1, b1, c1, cat)

b_table = train_data %>%
    filter(cat == "b") %>%
    select(a1, b1, c1, cat)

c_table = train_data %>%
    filter(cat == "c") %>%
    select(a1, b1, c1, cat)



#calculate  quantile ("quant") for each bin

table_a = data.frame(a_table%>% group_by(cat) %>%
                         mutate(quant = ifelse(c1 > x[5],1,0 )))

table_b = data.frame(b_table%>% group_by(cat) %>%
                         mutate(quant = ifelse(c1 > x[6],1,0 )))

table_c = data.frame(c_table%>% group_by(cat) %>%
                         mutate(quant = ifelse(c1 > x[7],1,0 )))

f[1] = mean(table_a$quant)
f[2] = mean(table_b$quant)
f[3] = mean(table_c$quant)


#group all tables

final_table = rbind(table_a, table_b, table_c)
# calculate the total mean : this is what needs to be optimized

f[4] = mean(final_table$quant)

 #this is the only line of code that is different between the two functions
    return (f);
}
 

Вот вторая функция:

 #define second function

funct_set_2 <- function (x) {
    x1 <- x[1]; x2 <- x[2]; x3 <- x[3] ; x4 <- x[4]; x5 <- x[5]; x6 <- x[6]; x[7] <- x[7]
    f <- numeric(4)


#bin data according to random criteria
train_data <- train_data %>%
    mutate(cat = ifelse(a1 <= x1 amp; b1 <= x3, "a",
                        ifelse(a1 <= x2 amp; b1 <= x4, "b", "c")))

train_data$cat = as.factor(train_data$cat)

#new splits
a_table = train_data %>%
    filter(cat == "a") %>%
    select(a1, b1, c1, cat)

b_table = train_data %>%
    filter(cat == "b") %>%
    select(a1, b1, c1, cat)

c_table = train_data %>%
    filter(cat == "c") %>%
    select(a1, b1, c1, cat)



#calculate  quantile ("quant") for each bin

table_a = data.frame(a_table%>% group_by(cat) %>%
                         mutate(quant = ifelse(c1 > x[5],1,0 )))

table_b = data.frame(b_table%>% group_by(cat) %>%
                         mutate(quant = ifelse(c1 > x[6],1,0 )))

table_c = data.frame(c_table%>% group_by(cat) %>%
                         mutate(quant = ifelse(c1 > x[7],1,0 )))

f[1] = mean(table_a$quant)
f[2] = mean(table_b$quant)
f[3] = mean(table_c$quant)


#group all tables

final_table = rbind(table_a, table_b, table_c)
# calculate the total mean : this is what needs to be optimized

f[4] = mean(final_table$quant)

#here is the only line of code that is different
return(f[3:4])
}
 

Отсюда я попытался оптимизировать обе эти функции.

Первая оптимизация привела к ошибке:

 results_1 <- nsga2R(fn=funct_set, varNo=7, objDim=4, 
                   lowerBounds=c(80,80,80,80, 100, 200, 300), 
                   upperBounds=c(120,120,120,120,200,300,400),
                   popSize=50, tourSize=2, generations=50, 
                  cprob=0.9, XoverDistIdx=20, mprob=0.1,MuDistIdx=3)

********** R based Nondominated Sorting Genetic Algorithm II *********
initializing the population
ranking the initial population

Error in if (all(xi <= xj) amp;amp; any(xi < xj)) { : 
  missing value where TRUE/FALSE needed
 

Но второй код оптимизации, похоже, работает нормально:

 results_2 <- nsga2R(fn=funct_set_2, varNo=7, objDim=2, 
                   lowerBounds=c(80,80,80,80, 100, 200, 300), 
                  upperBounds=c(120,120,120,120,200,300,400),
                   popSize=50, tourSize=2, generations=50, 
                cprob=0.9, XoverDistIdx=20, mprob=0.1,MuDistIdx=3)
 

Вопрос: Кто-нибудь знает, почему «results_1» выдает ошибку, но «results_2» не выдает ошибку? Это из-за того, как я настроил функцию или данные?

Спасибо

Ответ №1:

 A partial solution :


#define function

funct_set <- function (x) {
    x1 <- x[1]; x2 <- x[2]; x3 <- x[3] ; x4 <- x[4]
    f <- numeric(4)
    
    
    #bin data according to random criteria
    train_data <- train_data %>%
        mutate(cat = ifelse(a1 <= x1 amp; b1 <= x3, "a",
                            ifelse(a1 <= x2 amp; b1 <= x4, "b", "c")))
    
    train_data$cat = as.factor(train_data$cat)
    
    #new splits
    a_table = train_data %>%
        filter(cat == "a") %>%
        select(a1, b1, c1, cat)
    
    b_table = train_data %>%
        filter(cat == "b") %>%
        select(a1, b1, c1, cat)
    
    c_table = train_data %>%
        filter(cat == "c") %>%
        select(a1, b1, c1, cat)
    
    
    
    #calculate  quantile ("quant") for each bin
    
    table_a = data.frame(a_table%>% group_by(cat) %>%
                             mutate(quant = ifelse(c1 > 100,1,0 )))
    
    table_b = data.frame(b_table%>% group_by(cat) %>%
                             mutate(quant = ifelse(c1 > 300,1,0 )))
    
    table_c = data.frame(c_table%>% group_by(cat) %>%
                             mutate(quant = ifelse(c1 > 500,1,0 )))
    
    f[1] = mean(table_a$quant)
    f[2] = mean(table_b$quant)
    f[3] = mean(table_c$quant)
    
    
    #group all tables
    
    final_table = rbind(table_a, table_b, table_c)
    # calculate the total mean : this is what needs to be optimized
    
    f[4] = mean(final_table$quant)
    
    
    return (f[2:4]);
}


#optimization

results_2 <- nsga2R(fn=funct_set, varNo=4, objDim=3, 
                    lowerBounds=c(70,90,70,90), 
                    upperBounds=c(90,110,90,110),
                    popSize=50, tourSize=2, generations=50, 
                    cprob=0.9, XoverDistIdx=20, mprob=0.1,MuDistIdx=3)