mlr-org/mlrMBO

Final Answer from mlrMBO outside of the specified variable ranges (multi objective function)

swaheera opened this issue · 2 comments

I am working with R, and I am trying to perform multi-objective constrained Bayesian optimization using the "mlrMBO" library (https://cran.r-project.org/web/packages/mlrMBO/mlrMBO.pdf).

I wrote the following code to optimize some arbitrary function I created:

    library(mlrMBO)
    library(dplyr)
    library(ParamHelpers)
    
    a1 = rnorm(1000,100,10)
        b1 = rnorm(1000,100,9)
        c1 = sample.int(1000, 1000, replace = TRUE)
        train_data = data.frame(a1,b1,c1)
    
    
        obj.fun = makeMultiObjectiveFunction(
        name = "Some function",
        fn = function(x) {
        #bin data according to random criteria
        train_data <- train_data %>%
            mutate(cat = ifelse(a1 <= x[1] & b1 <= x[3], "a",
                                ifelse(a1 <= x[2] & b1 <= x[4], "b", "c")))
       
        train_data$cat = as.factor(train_data$cat)
       
        #new splits
        a_table = train_data %>%
            filter(cat == "a") %>%
            select(a1, b1, c1, cat)
       
        b_table = train_data %>%
            filter(cat == "b") %>%
            select(a1, b1, c1, cat)
       
        c_table = train_data %>%
            filter(cat == "c") %>%
            select(a1, b1, c1, cat)
       
       
        #calculate  quantile ("quant") for each bin
       
        table_a = data.frame(a_table%>% group_by(cat) %>%
                                 mutate(quant = ifelse(c1 > 150,1,0 )))
       
        table_b = data.frame(b_table%>% group_by(cat) %>%
                                 mutate(quant = ifelse(c1 > 300,1,0 )))
       
        table_c = data.frame(c_table%>% group_by(cat) %>%
                                 mutate(quant = ifelse(c1 > 400,1,0 )))
       
        f1 = mean(table_a$quant)
        f2 = mean(table_b$quant)
        f3 = mean(table_c$quant)
       
       
        #group all tables
       
        final_table = rbind(table_a, table_b, table_c)
        # calculate the total mean : this is what needs to be optimized
       
        f4 = mean(final_table$quant)
       
       
        return (f1, f2, f3, f4);
    },
        par.set = makeParamSet(
                  makeNumericParam("x[1]", lower = 80, upper = 90),
                  makeNumericParam("x[2]", lower = 95, upper = 110),
                  makeNumericParam("x[3]", lower = 80, upper = 90),
                  makeNumericParam("x[4]", lower = 95, upper = 110),
  forbidden = expression(x[2] >x[1] & x[4] >x[3])
    ),
         minimize = TRUE
    )
    
    ctrl = makeMBOControl()
    ctrl = setMBOControlTermination(ctrl, iters = 20L)
    
    # we can basically do an exhaustive search in 3 values
    ctrl = setMBOControlInfill(ctrl, crit = makeMBOInfillCritEI())
      # opt.restarts = 1L, opt.focussearch.points = 3L, opt.focussearch.maxit = 1L)
    
    #design = generateDesign(20L, getParamSet(obj.fun), fun = lhs::maximinLHS)
    
    lrn = makeMBOLearner(ctrl, obj.fun)
    
    res = mbo(obj.fun, design = NULL, learner = lrn, control = ctrl, show.info = TRUE)

But when you look at the final result:

    Solution Fitness Value: 1.762747e+02
    
    Parameters at the Solution (parameter, gradient):
    
     X[ 1] :	1.994399e+01	G[ 1] :	7.832140e-01
     X[ 2] :	1.182418e+01	G[ 2] :	7.563822e-03
     X[ 3] :	1.997264e+01	G[ 3] :	6.824901e-01
     X[ 4] :	7.681157e+00	G[ 4] :	2.370936e-02
     X[ 5] :	7.515392e-05	G[ 5] :	3.824832e-02

These seem to be outside the specified ranges:

par.set = makeParamSet(
               makeNumericParam("x[1]", lower = 80, upper = 90),
               makeNumericParam("x[2]", lower = 95, upper = 110),
               makeNumericParam("x[3]", lower = 80, upper = 90),
               makeNumericParam("x[4]", lower = 95, upper = 110)

Am I doing something wrong?

Thanks

can you change your example so it does not use x[1] as param ids but x1 etc instead.

I repaced [x1], x[2], x[3], x[4] with x1, x2, x3, x4:

library(mlrMBO)
library(dplyr)
library(ParamHelpers)

a1 = rnorm(1000,100,10)
b1 = rnorm(1000,100,9)
c1 = sample.int(1000, 1000, replace = TRUE)
train_data = data.frame(a1,b1,c1)


obj.fun = makeMultiObjectiveFunction(
    name = "Some function",
    fn = function(x1,x2,x3,x4) {
        #bin data according to random criteria
        train_data <- train_data %>%
            mutate(cat = ifelse(a1 <= x1 & b1 <= x3, "a",
                                ifelse(a1 <= x2 & b1 <= x4, "b", "c")))
        
        train_data$cat = as.factor(train_data$cat)
        
        #new splits
        a_table = train_data %>%
            filter(cat == "a") %>%
            select(a1, b1, c1, cat)
        
        b_table = train_data %>%
            filter(cat == "b") %>%
            select(a1, b1, c1, cat)
        
        c_table = train_data %>%
            filter(cat == "c") %>%
            select(a1, b1, c1, cat)
        
        
        #calculate  quantile ("quant") for each bin
        
        table_a = data.frame(a_table%>% group_by(cat) %>%
                                 mutate(quant = ifelse(c1 > 150,1,0 )))
        
        table_b = data.frame(b_table%>% group_by(cat) %>%
                                 mutate(quant = ifelse(c1 > 300,1,0 )))
        
        table_c = data.frame(c_table%>% group_by(cat) %>%
                                 mutate(quant = ifelse(c1 > 400,1,0 )))
        
        f1 = mean(table_a$quant)
        f2 = mean(table_b$quant)
        f3 = mean(table_c$quant)
        
        
        #group all tables
        
        final_table = rbind(table_a, table_b, table_c)
        # calculate the total mean : this is what needs to be optimized
        
        f4 = mean(final_table$quant)
        
        
        return (f1, f2, f3, f4);
    },
    par.set = makeParamSet(
        makeNumericParam("x1", lower = 80, upper = 90),
        makeNumericParam("x2", lower = 95, upper = 110),
        makeNumericParam("x3", lower = 80, upper = 90),
        makeNumericParam("x4", lower = 95, upper = 110),
        forbidden = expression(x2 >x1 & x4 >x3)
    ),
    minimize = TRUE
)

ctrl = makeMBOControl()
ctrl = setMBOControlTermination(ctrl, iters = 20L)

# we can basically do an exhaustive search in 3 values
ctrl = setMBOControlInfill(ctrl, crit = makeMBOInfillCritEI())
# opt.restarts = 1L, opt.focussearch.points = 3L, opt.focussearch.maxit = 1L)

#design = generateDesign(20L, getParamSet(obj.fun), fun = lhs::maximinLHS)

lrn = makeMBOLearner(ctrl, obj.fun)

res = mbo(obj.fun, design = NULL, learner = lrn, control = ctrl, show.info = TRUE)

But this code has been running for the past few hours ... Am I doing something wrong?
Thank you so much!