rstudio/mleap

mleap_transform() fails when there are both numeric and character predictors

kevinykuo opened this issue · 0 comments

This errors because we're inadvertently coercing the entire row of pred_data to character.

diamonds_tbl <- sdf_copy_to(sc, ggplot2::diamonds) %>%
    dplyr::mutate(price = as.numeric(price))
  
pipeline <- ml_pipeline(sc) %>%
    ft_string_indexer("cut", "cut_cat") %>%
    ft_string_indexer("color", "color_cat") %>%
    ft_string_indexer("clarity", "clarity_cat") %>%
    ft_vector_assembler(
      c("carat", "cut_cat", "color_cat", "clarity_cat",
        "depth", "table", "x", "y", "z"),
      "features"
    ) %>%
    ml_gbt_regressor(label_col = "price", seed = 42)
  
pipeline_model <- pipeline %>%
    ml_fit(diamonds_tbl)
  
model_path <- file.path(tempdir(), "diamonds_model.zip")

mleap_model <- mleap_load_bundle(model_path)

pred_data <- data.frame(
    carat = 0.65, cut = "Good", color = "E",
    clarity = "VS1", depth = 60, table = 60,
    x = 4.5, y = 4.6, z = 2.7,
    stringsAsFactors = FALSE
  )
mleap_transform(mleap_model, pred_data)