## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(
  collapse  = TRUE,
  comment   = "#>",
  eval      = TRUE
)

## ----data---------------------------------------------------------------------
library(smfa)

N <- 500; set.seed(12345)
z1 <- rnorm(N); z2 <- rnorm(N)
v1 <- rnorm(N); v2 <- rnorm(N)
g  <- rnorm(N)
e1 <- v1
e2 <- 0.7071 * (v1 + v2)
ds <- z1 + z2 + e1
d  <- ifelse(ds > 0, 1, 0)        # 1 = selected into the sample
group <- ifelse(g > 0, 1, 0)      # two technology groups
u  <- abs(rnorm(N))
x1 <- abs(rnorm(N)); x2 <- abs(rnorm(N))
y  <- abs(x1 + x2 + e2 - u)
dat <- as.data.frame(cbind(y = y, x1 = x1, x2 = x2,
                            z1 = z1, z2 = z2, d = d, group = group))

# About 50% of observations are selected
table(dat$d)
#>    0    1
#> 1013  987

## ----lp-----------------------------------------------------------------------
meta_sel_lp <- smfa(
  formula    = log(y) ~ log(x1) + log(x2),
  selectionF = d ~ z1 + z2,      # selection equation: d is the binary indicator
  data       = dat,
  group      = "group",
  S          = 1L,
  udist      = "hnormal",
  groupType  = "sfaselectioncross",
  modelType  = "greene10",        # Greene (2010) two-step ML correction
  lType      = "kronrod",         # integration method for the selection likelihood
  Nsub       = 20,               # number of sub-intervals for numerical integration
  uBound     = Inf,
  method     = "bfgs",
  itermax    = 2000,
  metaMethod = "lp"
)
summary(meta_sel_lp)

## ----qp-----------------------------------------------------------------------
meta_sel_qp <- smfa(
  formula    = log(y) ~ log(x1) + log(x2),
  selectionF = d ~ z1 + z2,
  data       = dat,
  group      = "group",
  S          = 1L,
  udist      = "hnormal",
  groupType  = "sfaselectioncross",
  modelType  = "greene10",
  lType      = "kronrod",
  Nsub       = 20,
  uBound     = Inf,
  method     = "bfgs",
  itermax    = 2000,
  metaMethod = "qp"
)
summary(meta_sel_qp)

## ----huang--------------------------------------------------------------------
meta_sel_huang <- smfa(
  formula     = log(y) ~ log(x1) + log(x2),
  selectionF  = d ~ z1 + z2,
  data        = dat,
  group       = "group",
  S           = 1L,
  udist       = "hnormal",
  groupType   = "sfaselectioncross",
  modelType   = "greene10",
  lType       = "kronrod",
  Nsub        = 100,
  uBound      = Inf,
  method      = "bfgs",
  itermax     = 2000,
  metaMethod  = "sfa",
  sfaApproach = "huang"
)
summary(meta_sel_huang)

## ----odonnell-----------------------------------------------------------------
meta_sel_odonnell <- smfa(
  formula     = log(y) ~ log(x1) + log(x2),
  selectionF  = d ~ z1 + z2,
  data        = dat,
  group       = "group",
  S           = 1L,
  udist       = "hnormal",
  groupType   = "sfaselectioncross",
  modelType   = "greene10",
  lType       = "kronrod",
  Nsub        = 100,
  uBound      = Inf,
  method      = "bfgs",
  itermax     = 2000,
  metaMethod  = "sfa",
  sfaApproach = "ordonnell"
)
summary(meta_sel_odonnell)

## ----rho----------------------------------------------------------------------
# The rho parameter appears in the summary output:
# ----------------------------------------------------------------
#              Selection bias parameter
# ----------------------------------------------------------------
#           Coefficient Std. Error z value  Pr(>|z|)
# rho          0.89550    0.28696  3.1207  0.001804 **

# A significant rho indicates selection bias IS present and the
# correction is important.

## ----eff----------------------------------------------------------------------
eff_sel <- efficiencies(meta_sel_lp)

# Non-selected observations have NA efficiencies
sum(is.na(eff_sel$TE_group_BC))   # count of non-selected obs

# Subset for selected observations in group 1
sel_grp1 <- eff_sel[eff_sel$group == 1 & !is.na(eff_sel$TE_group_BC), ]
summary(sel_grp1[, c("TE_group_BC", "TE_meta_BC", "MTR_BC")])

