## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  fig.width = 8,
  fig.height = 5,
  warning = FALSE,
  message = FALSE,
  fig.alt = "Example visualization produced by a bayesiansurpriser function."
)

## ----load-packages------------------------------------------------------------
library(bayesiansurpriser)
library(sf)
library(ggplot2)

## ----surprise-sf--------------------------------------------------------------
# Load NC SIDS data
nc <- st_read(system.file("shape/nc.shp", package = "sf"), quiet = TRUE)

# Compute surprise with default models (uniform, baserate, funnel)
result <- surprise(nc, observed = SID74, expected = BIR74)

# View structure
print(result)

## ----surprise-sf-plot---------------------------------------------------------
# Visualize the result
ggplot(result) +
  geom_sf(aes(fill = signed_surprise)) +
  scale_fill_surprise_diverging() +
  labs(
    title = "Bayesian Surprise: NC SIDS Cases (1974)",
    subtitle = "Blue = fewer than expected, Red = more than expected",
    fill = "Signed\nSurprise"
  ) +
  theme_minimal()

## ----auto-surprise------------------------------------------------------------
# Observed counts and expected values
observed <- c(50, 100, 150, 200, 75, 300, 25)
expected <- c(10000, 50000, 100000, 25000, 15000, 80000, 5000)

result_auto <- auto_surprise(observed, expected)

# View surprise values
cat("Surprise values:\n")
print(round(result_auto$surprise, 4))

cat("\nSigned surprise:\n")
print(round(result_auto$signed_surprise, 4))

## ----auto-surprise-plot-------------------------------------------------------
# Visualize
df <- data.frame(
  region = LETTERS[1:7],
  observed = observed,
  expected = expected,
  surprise = result_auto$surprise,
  signed = result_auto$signed_surprise
)

ggplot(df, aes(x = reorder(region, -surprise), y = surprise, fill = signed)) +
  geom_col() +
  scale_fill_gradient2(low = "#2166AC", mid = "white", high = "#B2182B", midpoint = 0) +
  labs(
    title = "Surprise by Region",
    x = "Region", y = "Surprise (bits)",
    fill = "Direction"
  ) +
  theme_minimal()

## ----compute-surprise---------------------------------------------------------
# Build a model space
mspace <- model_space(
  bs_model_uniform(),
  bs_model_baserate(nc$BIR74),
  bs_model_funnel(nc$BIR74)
)

# Compute surprise directly
result_low <- compute_surprise(
  model_space = mspace,
  observed = nc$SID74,
  expected = nc$BIR74,
  return_signed = TRUE
)

cat("Computed", length(result_low$surprise), "surprise values\n")
cat("Range:", round(range(result_low$surprise), 4), "\n")

## ----model-uniform------------------------------------------------------------
model_uniform <- bs_model_uniform()
print(model_uniform)

## ----model-baserate-----------------------------------------------------------
model_baserate <- bs_model_baserate(nc$BIR74)
print(model_baserate)

## ----model-gaussian-----------------------------------------------------------
# Fit from data (default)
model_gaussian_fit <- bs_model_gaussian()
print(model_gaussian_fit)

# Fixed parameters
model_gaussian_fixed <- bs_model_gaussian(mu = 100, sigma = 20, fit_from_data = FALSE)
print(model_gaussian_fixed)

## ----model-sampled------------------------------------------------------------
model_kde <- bs_model_sampled(sample_frac = 0.2, kernel = "gaussian")
print(model_kde)

## ----model-funnel-------------------------------------------------------------
model_funnel <- bs_model_funnel(nc$BIR74, type = "count")
print(model_funnel)

## ----model-bootstrap----------------------------------------------------------
model_boot <- bs_model_bootstrap(n_bootstrap = 100)
print(model_boot)

## ----model-mixture------------------------------------------------------------
# Define a two-component mixture
model_mix <- bs_model_gaussian_mixture(
  means = c(50, 150),
  sds = c(10, 20),
  weights = c(0.6, 0.4)
)
print(model_mix)

## ----model-space-create-------------------------------------------------------
space <- model_space(
  bs_model_uniform(),
  bs_model_baserate(nc$BIR74),
  bs_model_funnel(nc$BIR74),
  prior = c(0.2, 0.5, 0.3),
  names = c("Uniform", "Population", "Funnel")
)
print(space)

## ----default-space------------------------------------------------------------
default_space <- default_model_space(nc$BIR74)
print(default_space)

## ----model-space-ops----------------------------------------------------------
# Add a model
space_expanded <- add_model(space, bs_model_gaussian())
cat("After add_model:", n_models(space_expanded), "models\n")
cat("Model names:", model_names(space_expanded), "\n")

# Remove a model (by index)
space_reduced <- remove_model(space_expanded, 4)
cat("After remove_model:", n_models(space_reduced), "models\n")

# Set new prior
space_reweighted <- set_prior(space, c(0.1, 0.6, 0.3))
cat("New prior:", space_reweighted$prior, "\n")

# Get model names
cat("Model names:", model_names(space), "\n")

## ----bayesian-update----------------------------------------------------------
# Update model space with observed data
updated_space <- bayesian_update(space, nc$SID74)

cat("Prior:    ", round(space$prior, 4), "\n")
cat("Posterior:", round(updated_space$posterior, 4), "\n")

## ----bayesian-update-plot-----------------------------------------------------
# Visualize prior vs posterior
plot(updated_space)

## ----cumulative-update--------------------------------------------------------
space_simple <- model_space(bs_model_uniform(), bs_model_gaussian())
# Sequential observations (processed one at a time)
observations <- c(10, 15, 20, 25, 100, 30, 35, 40)

cum_result <- cumulative_bayesian_update(space_simple, observations)
cat("Observations processed:", length(cum_result$cumulative_surprise), "\n")
cat("Final posterior:", round(cum_result$final_space$posterior, 4), "\n")

## ----accessors----------------------------------------------------------------
result <- surprise(nc, observed = SID74, expected = BIR74)

# Get surprise values
surprise_vals <- get_surprise(result, "surprise")
signed_vals <- get_surprise(result, "signed")

cat("First 5 surprise values:", round(surprise_vals[1:5], 4), "\n")
cat("First 5 signed values:", round(signed_vals[1:5], 4), "\n")

# Get model space
mspace <- get_model_space(result)
print(mspace)

## ----temporal-----------------------------------------------------------------
# Create panel data
set.seed(42)
panel_data <- data.frame(
  year = rep(2018:2022, each = 4),
  region = rep(c("North", "South", "East", "West"), 5),
  cases = c(45, 80, 55, 70,   # 2018
            50, 85, 60, 65,   # 2019
            48, 90, 58, 72,   # 2020
            55, 95, 52, 68,   # 2021
            60, 100, 65, 75), # 2022
  population = rep(c(10000, 20000, 15000, 18000), 5)
)

temporal_result <- surprise_temporal(
  panel_data,
  time_col = year,
  observed = cases,
  expected = population,
  region_col = region
)

print(temporal_result)

## ----temporal-plot------------------------------------------------------------
# Plot temporal evolution
plot(temporal_result, type = "time_series")

## ----temporal-heatmap---------------------------------------------------------
# Heatmap view
plot(temporal_result, type = "heatmap")

## ----streaming----------------------------------------------------------------
# Initial computation
initial <- auto_surprise(c(50, 100, 75), c(10000, 20000, 15000))
cat("Initial surprise:", round(initial$surprise, 4), "\n")

# Stream new observations
updated <- update_surprise(initial,
                           new_observed = c(55, 110),
                           new_expected = c(11000, 22000))
cat("After update:", round(updated$surprise, 4), "\n")

## ----rolling------------------------------------------------------------------
# Time series data
ts_data <- c(50, 52, 48, 55, 51, 120, 115, 125, 60, 58, 62, 55)
expected_ts <- rep(1000, length(ts_data))

rolling_result <- surprise_rolling(
  ts_data,
  expected = expected_ts,
  window_size = 4,
  step = 1
)

# Extract mean surprise per window
window_means <- sapply(rolling_result$windows, function(w) mean(w$result$surprise))

plot(seq_along(window_means), window_means, type = "b",
     xlab = "Window Position", ylab = "Mean Surprise",
     main = "Rolling Window Surprise", pch = 19)
abline(h = mean(window_means), lty = 2, col = "red")

## ----time-slice---------------------------------------------------------------
# Get surprise for specific year
surprise_2020 <- get_surprise_at_time(temporal_result, 2020)
cat("2020 surprise values:", round(surprise_2020$surprise, 4), "\n")

## ----animate------------------------------------------------------------------
anim_data <- surprise_animate(temporal_result)
head(anim_data)

## ----funnel-data--------------------------------------------------------------
funnel_df <- compute_funnel_data(
  observed = nc$SID74,
  sample_size = nc$BIR74,
  type = "count",
  limits = c(2, 3)  # 2 and 3 SD limits
)

head(funnel_df)

## ----funnel-plot--------------------------------------------------------------
# Funnel plot - shows observed vs expected with control limits
ggplot(funnel_df, aes(x = sample_size, y = observed)) +
  geom_ribbon(aes(ymin = lower_3sd, ymax = upper_3sd), fill = "gray90") +
  geom_ribbon(aes(ymin = lower_2sd, ymax = upper_2sd), fill = "gray70") +
  geom_line(aes(y = expected), linetype = "dashed") +
  geom_point(aes(color = abs(z_score) > 2), size = 2) +
  scale_color_manual(values = c("FALSE" = "black", "TRUE" = "red")) +
  labs(
    title = "Funnel Plot: NC SIDS Cases",
    x = "Births (Sample Size)",
    y = "Observed SIDS Cases",
    color = "Outside 2 SD"
  ) +
  theme_minimal()

## ----funnel-stats-------------------------------------------------------------
# Compute z-scores (need observed, expected, and sample_size)
# For counts, expected is derived from overall rate
overall_rate <- sum(nc$SID74) / sum(nc$BIR74)
expected_cases <- nc$BIR74 * overall_rate

z_scores <- funnel_zscore(nc$SID74, expected_cases, nc$BIR74, type = "count")
cat("First 5 z-scores:", round(z_scores[1:5], 3), "\n")

# Compute p-values from z-scores
p_vals <- funnel_pvalue(z_scores)
cat("First 5 p-values:", round(p_vals[1:5], 4), "\n")
cat("Significant at 0.05:", sum(p_vals < 0.05, na.rm = TRUE), "counties\n")

## ----normalize-prob-----------------------------------------------------------
raw <- c(10, 20, 30, 40)
normalized <- normalize_prob(raw)
cat("Raw:", raw, "\n")
cat("Normalized:", normalized, "(sum =", sum(normalized), ")\n")

## ----normalize-rate-----------------------------------------------------------
counts <- c(50, 100, 150)
population <- c(10000, 50000, 100000)
rates <- normalize_rate(counts, population, per = 100000)
cat("Rates per 100k:", round(rates, 1), "\n")

## ----normalize-zscore---------------------------------------------------------
values <- c(10, 20, 30, 40, 50)
z <- normalize_zscore(values)
cat("Z-scores:", round(z, 3), "\n")

## ----normalize-minmax---------------------------------------------------------
values <- c(10, 20, 30, 40, 50)
scaled <- normalize_minmax(values)
cat("Min-max scaled:", scaled, "\n")

## ----normalize-robust---------------------------------------------------------
values <- c(10, 20, 30, 40, 500)  # With outlier
robust <- normalize_robust(values)
cat("Robust scaled:", round(robust, 3), "\n")

## ----kl-divergence------------------------------------------------------------
p <- c(0.25, 0.25, 0.25, 0.25)  # Uniform
q <- c(0.1, 0.2, 0.3, 0.4)      # Non-uniform

kl <- kl_divergence(p, q)
cat("KL(P || Q):", round(kl, 4), "bits\n")

## ----log-sum-exp--------------------------------------------------------------
log_vals <- c(-1000, -1001, -1002)  # Very small numbers
result <- log_sum_exp(log_vals)
cat("log_sum_exp:", round(result, 4), "\n")

## ----scale-sequential---------------------------------------------------------
result <- surprise(nc, observed = SID74, expected = BIR74)

ggplot(result) +
  geom_sf(aes(fill = surprise)) +
  scale_fill_surprise(option = "viridis") +
  labs(title = "Sequential Scale (Viridis)") +
  theme_minimal()

## ----scale-diverging----------------------------------------------------------
ggplot(result) +
  geom_sf(aes(fill = signed_surprise)) +
  scale_fill_surprise_diverging() +
  labs(title = "Diverging Scale for Signed Surprise") +
  theme_minimal()

## ----scale-binned-------------------------------------------------------------
ggplot(result) +
  geom_sf(aes(fill = surprise)) +
  scale_fill_surprise_binned(n.breaks = 5) +
  labs(title = "Binned Scale (5 breaks)") +
  theme_minimal()

## ----scale-diverging-binned---------------------------------------------------
ggplot(result) +
  geom_sf(aes(fill = signed_surprise)) +
  scale_fill_surprise_diverging_binned(n.breaks = 7) +
  labs(title = "Diverging Binned Scale") +
  theme_minimal()

## ----stat-surprise------------------------------------------------------------
ggplot(nc) +
  stat_surprise(aes(geometry = geometry, observed = SID74, expected = BIR74)) +
  scale_fill_surprise_diverging() +
  labs(title = "stat_surprise() Computes Inline") +
  theme_minimal()

## ----geom-histogram-----------------------------------------------------------
result <- surprise(nc, observed = SID74, expected = BIR74)

ggplot(result, aes(x = surprise)) +
  geom_surprise_histogram(bins = 15) +
  labs(title = "Distribution of Surprise Values") +
  theme_minimal()

## ----geom-density-------------------------------------------------------------
ggplot(result, aes(x = signed_surprise)) +
  geom_surprise_density(fill = "#4575b4", alpha = 0.7) +
  geom_vline(xintercept = 0, linetype = "dashed") +
  labs(title = "Density of Signed Surprise") +
  theme_minimal()

## ----st-surprise--------------------------------------------------------------
result_sf <- st_surprise(nc, observed = SID74, expected = BIR74)
class(result_sf)

## ----st-density---------------------------------------------------------------
# st_density computes kernel density estimation for point patterns
# Returns a list with density grid
nc_centroids <- st_centroid(nc)
density_result <- st_density(nc_centroids, bandwidth = 0.5)

cat("Density result structure:\n")
cat("- x grid:", length(density_result$x), "values\n")
cat("- y grid:", length(density_result$y), "values\n")
cat("- z matrix:", dim(density_result$z)[1], "x", dim(density_result$z)[2], "\n")

## ----st-aggregate-------------------------------------------------------------
# Create larger regions to aggregate to
# Using a simple union of counties
nc_result <- surprise(nc, observed = SID74, expected = BIR74)

# Show surprise values from result
cat("Sample surprise values:\n")
print(head(nc_result[, c("NAME", "surprise", "signed_surprise")], 5))

## ----plot-sf-base, fig.height=5-----------------------------------------------
result <- surprise(nc, observed = SID74, expected = BIR74)
plot(result, which = "signed_surprise", main = "Base R Plot: Signed Surprise")

## ----plot-surprise-base-------------------------------------------------------
auto_result <- auto_surprise(c(50, 100, 150, 200, 75), c(10000, 50000, 100000, 25000, 15000))
plot(auto_result, type = "histogram", main = "Surprise Distribution")

## ----plot-model-space---------------------------------------------------------
space <- model_space(bs_model_uniform(), bs_model_baserate(nc$BIR74))
updated <- bayesian_update(space, nc$SID74)
plot(updated)

## ----plot-temporal-base-------------------------------------------------------
plot(temporal_result, type = "cumulative", main = "Cumulative Surprise Over Time")

## ----summary------------------------------------------------------------------
result <- surprise(nc, observed = SID74, expected = BIR74)
summary(result)

## ----data-canada--------------------------------------------------------------
data(canada_mischief, package = "bayesiansurpriser")
cat("Canada mischief dataset:\n")
cat("Rows:", nrow(canada_mischief), "\n")
cat("Columns:", paste(names(canada_mischief), collapse = ", "), "\n")

# Compute surprise on Canadian data
canada_result <- auto_surprise(
  canada_mischief$mischief_count,
  canada_mischief$population
)

cat("\nSurprise values by province:\n")
df_result <- data.frame(
  province = canada_mischief$name,
  surprise = round(canada_result$surprise, 4)
)
print(df_result[order(-df_result$surprise), ])

## ----data-counties------------------------------------------------------------
data(example_counties, package = "bayesiansurpriser")
cat("Example counties dataset:\n")
cat("Class:", class(example_counties)[1], "\n")
cat("Rows:", nrow(example_counties), "\n")
cat("Columns:", paste(names(example_counties), collapse = ", "), "\n")

## ----session------------------------------------------------------------------
sessionInfo()