Saving Treatment Plans

John Mount

2024-06-12

You can save and load treatment plans. Note: treatments plans are intended to be used with the version of vtreat they were constructed with (though we try to make plans forward-compatible). So it is good idea to have procedures to re-build treatment plans.

The easiest way to save vtreat treatment plans is to use R’s built in saveRDS function.

To save in a file:

library("vtreat")
dTrainC <- data.frame(x=c('a','a','a','b','b',NA,NA),
                      z=c(1,2,3,4,NA,6,NA),
                      y=c(FALSE,FALSE,TRUE,FALSE,TRUE,TRUE,TRUE))
treatmentsC <- designTreatmentsC(dTrainC, colnames(dTrainC),
                                 'y', TRUE,
                                 verbose= FALSE)

fileName = paste0(tempfile(c('vtreatPlan')), '.RDS')
saveRDS(treatmentsC,fileName)
rm(list=c('treatmentsC'))

And then to restore and use.

library("vtreat")
treatmentsC <- readRDS(fileName)

dTestC <- data.frame(x=c('a','b','c',NA),z=c(10,20,30,NA))
dTestCTreated <- prepare(treatmentsC, dTestC, pruneSig= c())

# clean up
unlink(fileName)

Treatment plans can also be stored as binary blobs in databases. Using ideas from here gives us the following through the DBI interface.

con <- NULL
if (requireNamespace('RSQLite', quietly = TRUE) &&
    requireNamespace('DBI', quietly = TRUE)) {
  library("RSQLite")
  con <- dbConnect(drv=SQLite(), dbname=":memory:")
  # create table
  dbExecute(con, 'create table if not exists treatments 
                 (key varchar(200) primary key, 
                  treatment blob)')
  # wrap data
  df <- data.frame(
    key='treatmentsC', 
    treatment = I(list(serialize(treatmentsC, NULL))))
  # Clear any previous version
  dbExecute(con, 
               "delete from treatments where key='treatmentsC'")
  # insert treatmentplan
  # depreciated
  # dbGetPreparedQuery(con,
  #                    'insert into treatments (key, treatment) values (:key, :treatment)',
  #                    bind.data=df)
  dbExecute(con, 
             'insert into treatments (key, treatment) values (:key, :treatment)',
             params=df)
  constr <- paste(capture.output(print(con)),collapse='\n')
  paste('saved to db: ', constr)
}
## Warning: package 'RSQLite' was built under R version 4.3.2
## [1] "saved to db:  <SQLiteConnection>\n  Path: :memory:\n  Extensions: TRUE"
rm(list= c('treatmentsC', 'dTestCTreated'))

And we can read the treatment back in as follows.

if(!is.null(con)) {
  treatmentsList <- lapply(
    dbGetQuery(con, 
               "select * from treatments where key='treatmentsC'")$treatment,
    unserialize)
  treatmentsC <- treatmentsList[[1]]
  dbDisconnect(con)
  dTestCTreated <- prepare(treatmentsC, dTestC, pruneSig= c())
  print(dTestCTreated)
}
##       x_catP     x_catB    z z_isBAD x_lev_NA x_lev_x_a x_lev_x_b
## 1 0.42857143 -0.9807709 10.0       0        0         1         0
## 2 0.28571429 -0.2876737 20.0       0        0         0         1
## 3 0.07142857  0.0000000 30.0       0        0         0         0
## 4 0.28571429  9.6158638  3.2       1        1         0         0