## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

# For reproducibility
set.seed(6006)

# Set the number of levels
n_levels <- 3000

# Randomly draw the number of observations per level
n_obs_lambda <- rgamma(n_levels, 3.1) * 100
n_obs_per_level <- rpois(n_levels, n_obs_lambda) + 1

# Simulate the mean probability per level
prob_per_level <- rbeta(n_levels, 3.1, 4.4)

# For each level, simulate data
d <- vector(mode = "list", length = n_levels)
for (l in seq_along(n_obs_per_level)){
	d[[l]] <- data.frame(
		y = rbinom(n_obs_per_level[l], 1, prob_per_level[l]),
		level = l)
}

# Combine into a data.frame
df <- do.call("rbind", d)
df$level <- as.factor(df$level)

## ----echo=FALSE, fig.width=7, fig.height=4, fig.align = 'center'--------------
n_obs_total <- sum(n_obs_per_level)
hist(n_obs_per_level, breaks = 80,
		 main = paste(format(n_levels, big.mark = ","),
		 						 "levels, total N =",
		 						 format(n_obs_total, big.mark = ",")),
		 xlab = "Number of observations by level")

## -----------------------------------------------------------------------------
library(glm4)

run_time <- system.time(
	fit <- glm4(y ~ level, data = df, family = binomial(), sparse = TRUE)
)

run_time

## -----------------------------------------------------------------------------
# Only print a subset of the output, restoring existing max.print after
old_options <- options(max.print = 20)
print(fit)
options(old_options)

## -----------------------------------------------------------------------------
# Model-implied probability for level 2
plogis(sum(coef(fit)[1:2]))

# Should equal the observed proportion
with(df, mean(y[level == 2]))

## -----------------------------------------------------------------------------
class(fit$glm4_fit)

## -----------------------------------------------------------------------------
summary(fit)

## -----------------------------------------------------------------------------
logLik(fit)

## -----------------------------------------------------------------------------
confint(fit)[1:20,]

## -----------------------------------------------------------------------------
vcov(fit)[1:5, 1:5]

## -----------------------------------------------------------------------------
set.seed(6006)

# Add covariate and fit
df$x <- rnorm(nrow(df))
fit_x <- glm4(y ~ x + level, data = df, family = binomial(), sparse = TRUE)

# glm4 anova method
anova(fit_x)

## -----------------------------------------------------------------------------
anova(fit_x, test = "Chisq")

## -----------------------------------------------------------------------------
anova(fit, fit_x, test = "Chisq")

## ----eval = FALSE-------------------------------------------------------------
# set.seed(6006)
# 
# n_levels <- 3000
# 
# n_obs_lambda    <- rgamma(n_levels, 3.1) * 100
# n_obs_per_level <- rpois(n_levels, n_obs_lambda) + 1
# prob_per_level  <- rbeta(n_levels, 3.1, 4.4)
# 
# d <- vector(mode = "list", length = n_levels)
# for (l in seq_along(n_obs_per_level)) {
# 	d[[l]] <- data.frame(
# 		y     = rbinom(n_obs_per_level[l], 1, prob_per_level[l]),
# 		level = l)
# }
# 
# df       <- do.call("rbind", d)
# df$level <- as.factor(df$level)

