## ----setup_1, include = FALSE-------------------------------------------------
local <- (Sys.getenv("BUILD_VIGNETTES") == "TRUE")

knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  eval=local,
  fig.width=6, 
  fig.height=4
)

library(ncdfgeom)

if(!require(nhdplusTools)) {
  warning("need nhdplusTools for this demo")
  knitr::opts_chunk$set(eval = FALSE)
}

org_options <- options(scipen = 9999)

## -----------------------------------------------------------------------------

gdptools_weights <- read.csv(system.file("extdata/gdptools_prl_out.csv", package = "ncdfgeom"), 
                             colClasses = c("character", "character", "numeric"))

gdptools_weights <- dplyr::rename(gdptools_weights, gdptools_wght = wght)

gage_id <- "USGS-01482100"
basin <- nhdplusTools::get_nldi_basin(list(featureSource = "nwissite", featureId = gage_id))
huc08 <- nhdplusTools::get_huc(id = na.omit(unique(gdptools_weights$huc8)), type = "huc08")
huc12 <- nhdplusTools::get_huc(id = na.omit(unique(gdptools_weights$huc12)), type = "huc12")

org_par <- par(mar = c(0, 0, 0, 0))
plot(sf::st_as_sfc(sf::st_bbox(huc12)))
plot(sf::st_geometry(basin), lwd = 4, add = TRUE)
plot(sf::st_simplify(sf::st_geometry(huc08), dTolerance = 500), add = TRUE, lwd = 2)
plot(sf::st_simplify(sf::st_geometry(huc12), dTolerance = 500), add = TRUE, lwd = 0.2, border = "grey")
par(org_par)

weights <- ncdfgeom::calculate_area_intersection_weights(
  x = sf::st_transform(dplyr::select(huc12, huc12), 6931),
  y = sf::st_transform(dplyr::select(huc08, huc8), 6931),
  normalize = TRUE
)

# NOTE: normalize = TRUE means these weights can not be used for "intensive" 
# weighted sums such as population per polygon. See below for more on this.

weights <- dplyr::left_join(weights, gdptools_weights, by = c("huc8", "huc12"))


## -----------------------------------------------------------------------------

weights$diff <- weights$w - weights$gdptools_wght

# make sure nothing is way out of whack
max(weights$diff, na.rm = TRUE)

# ensure the weights generally sum as we would expect.
sum(weights$gdptools_wght, na.rm = TRUE)
sum(weights$w, na.rm = TRUE)
length(unique(na.omit(weights$huc8)))

# see how many NA values we have in each.
sum(is.na(weights$w))
sum(is.na(weights$gdptools_wght))

# look at cases where gptools has NA and ncdfgeom does not
weights[is.na(weights$gdptools_wght) & !is.na(weights$w),]


## -----------------------------------------------------------------------------

library(dplyr)
library(sf)
library(ncdfgeom)

g <- list(rbind(c(-1,-1), c(1,-1), c(1,1), c(-1,1), c(-1,-1)))

blue1 = sf::st_polygon(g) * 0.8
blue2 = blue1 + c(1, 2)
blue3 = blue1 * 1.2 + c(-1, 2)

pink1 = sf::st_polygon(g)
pink2 = pink1 + 2
pink3 = pink1 + c(-0.2, 2)
pink4 = pink1 + c(2.2, 0)

blue = sf::st_sfc(blue1,blue2,blue3)

pink = sf::st_sfc(pink1, pink2, pink3, pink4)

plot(c(blue,pink), border = NA)
plot(blue, border = "#648fff", add = TRUE)
plot(pink, border = "#dc267f", add = TRUE)

blue <- sf::st_sf(blue, data.frame(idblue = c(1, 2, 3)))
pink <- sf::st_sf(pink, data.frame(idpink = c(7, 8, 9, 10)))

text(sapply(sf::st_geometry(blue), \(x) mean(x[[1]][,1]) + 0.4),
     sapply(sf::st_geometry(blue), \(x) mean(x[[1]][,2]) + 0.3),
     blue$idblue, col = "#648fff")

text(sapply(sf::st_geometry(pink), \(x) mean(x[[1]][,1]) + 0.4),
     sapply(sf::st_geometry(pink), \(x) mean(x[[1]][,2])),
     pink$idpink, col = "#dc267f")

sf::st_agr(blue) <- sf::st_agr(pink) <- "constant"
sf::st_crs(pink) <- sf::st_crs(blue) <- sf::st_crs(5070)

## -----------------------------------------------------------------------------

(blue_pink_norm_false <- 
calculate_area_intersection_weights(blue, pink, normalize = FALSE))

## -----------------------------------------------------------------------------
blue$val = c(30, 10, 20)
blue$area_blue <- as.numeric(sf::st_area(blue))

(result <- st_drop_geometry(blue) |>
  left_join(blue_pink_norm_false, by = "idblue"))

## -----------------------------------------------------------------------------

((10 * 0.375 * 2.56) + (20 * 0.604167 * 3.6864)) / ((0.375 * 2.56) + (0.604167 * 3.6864))


## -----------------------------------------------------------------------------
((10 * 0.375 * 2.56) + (20 * 0.604167 * 3.6864)) + (NA * 1 * 0.81) / 
  ((1 * 0.81) + (0.375 * 2.56) + (0.604167 * 3.6864))

## -----------------------------------------------------------------------------

((10 * 0.375) + (20 * 0.604167))


## -----------------------------------------------------------------------------
(result <- result |>
  group_by(idpink) |> # group so we get one row per target
  # now we calculate the value for each `pink` with fraction of the area of each
  # polygon in `blue` per polygon in `pink` with an equation like this:
  summarize(
    new_val_intensive = sum((val * w * area_blue)) / sum(w * area_blue),
    new_val_extensive = sum(val * w)))

## -----------------------------------------------------------------------------

(blue_pink_norm_true <-
calculate_area_intersection_weights(select(blue, idblue), pink, normalize = TRUE))


## -----------------------------------------------------------------------------
(result <- st_drop_geometry(blue) |>
    left_join(blue_pink_norm_true, by = "idblue"))

## -----------------------------------------------------------------------------
((10 * 0.24) + (20 * 0.5568)) / (0.24 + (0.5568))

## -----------------------------------------------------------------------------
(result <- result |>
    group_by(idpink) |> # group so we get one row per target
    # now we calculate the value for each `pink` with fraction of the area of each
    # polygon in `blue` per polygon in `pink` with an equation like this:
    summarize(
      new_val = sum((val * w)) / sum(w)))

## ----echo=FALSE---------------------------------------------------------------

g <- list(rbind(c(-1,-1), c(1,-1), c(1,1), c(-1,1), c(-1,-1)))

blue1 = st_polygon(g) * 0.75 + c(-.25, -.25)
blue2 = blue1 + 1.5
blue3 = blue1 + c(0, 1.5)
a4 = blue1 + c(1.5, 0)

pink1 = st_polygon(g)
pink2 = pink1 + 2
pink3 = pink1 + c(0, 2)
pink4 = pink1 + c(2, 0)

blue = st_sfc(blue1,blue2, blue3, a4)
pink = st_sfc(pink1, pink2, pink3, pink4)

pink <- st_sf(pink, data.frame(idpink = c(1, 2, 3, 4)))
blue <- st_sf(blue, data.frame(idblue = c(6, 7, 8, 9)))

plot(st_geometry(pink), border = "#dc267f", lwd = 3)
plot(st_geometry(blue), border = "#648fff", lwd = 3, add = TRUE)

text(sapply(st_geometry(blue), \(x) mean(x[[1]][,1]) + 0.4),
     sapply(st_geometry(blue), \(x) mean(x[[1]][,2]) + 0.3),
     blue$idblue, col = "#648fff")

text(sapply(st_geometry(pink), \(x) mean(x[[1]][,1]) - 0.4),
     sapply(st_geometry(pink), \(x) mean(x[[1]][,2]) - 0.5),
     pink$idpink, col = "#dc267f")

st_agr(blue) <- st_agr(pink) <- "constant"
st_crs(pink) <- st_crs(blue) <- st_crs(5070)

## ----echo = FALSE-------------------------------------------------------------
blue$val <- c(1, 2, 3, 4)
blue$blue_areasqkm <- 1.5 ^ 2

plot(blue["val"], reset = FALSE, pal = heat.colors)
plot(st_geometry(pink), border = "#dc267f", lwd = 3, add = TRUE, reset = FALSE)
plot(st_geometry(blue), border = "#648fff", lwd = 3, add = TRUE)

text(sapply(st_geometry(blue), \(x) mean(x[[1]][,1]) + 0.4),
     sapply(st_geometry(blue), \(x) mean(x[[1]][,2]) + 0.3),
     blue$idblue, col = "#648fff")

text(sapply(st_geometry(pink), \(x) mean(x[[1]][,1]) - 0.4),
     sapply(st_geometry(pink), \(x) mean(x[[1]][,2]) - 0.5),
     pink$idpink, col = "#dc267f")

## -----------------------------------------------------------------------------
# say we have data from `blue` that we want sampled to `pink`.
# this gives the percent of each `blue` that intersects each `pink`

(blue_pink <- calculate_area_intersection_weights(
  select(blue, idblue), select(pink, idpink), normalize = FALSE))

# NOTE: `w` sums to 1 per `blue` in all cases

summarize(group_by(blue_pink, idblue), w = sum(w))

# Since normalize is false, we apply weights like:
st_drop_geometry(blue) |>
  left_join(blue_pink, by = "idblue") |>
  group_by(idpink) |> # group so we get one row per `pink`
  # now we calculate the value for each `pink` with fraction of the area of each
  # polygon in `blue` per polygon in `pink` with an equation like this:
  summarize(
    new_val_intensive = sum( (val * w * blue_areasqkm) ) / sum(w * blue_areasqkm),
    new_val_extensive = sum( (val * w) ))

# NOTE: `w` is the fraction of the polygon in `blue`. We need to multiply `w` by the
# unique area of the polygon it is associated with to get the weighted mean weight.

# we can go in reverse if we had data from `pink` that we want sampled to `blue`

(pink_blue <- calculate_area_intersection_weights(
  select(pink, idpink), select(blue, idblue), normalize = FALSE))

# NOTE: `w` sums to 1 per `pink` (source) only where `pink` is fully covered by `blue` (target).

summarize(group_by(pink_blue, idpink), w = sum(w))

# Now let's look at what happens if we set normalize = TRUE. Here we
# get `blue` as source and `pink` as target but normalize the weights so
# the area of `blue` is built into `w`.

(blue_pink <- calculate_area_intersection_weights(
  select(blue, idblue), select(pink, idpink), normalize = TRUE))

# NOTE: if we summarize by `pink` (target) `w` sums to 1 only where there is full overlap.

summarize(group_by(blue_pink, idpink), w = sum(w))

# Since normalize is false, we apply weights like:
st_drop_geometry(blue) |>
  left_join(blue_pink, by = "idblue") |>
  group_by(idpink) |> # group so we get one row per `pink`
  # now we weight by the percent of each polygon in `pink` per polygon in `blue`
  summarize(new_val = sum( (val * w) ) / sum( w ))

# NOTE: `w` is the fraction of the polygon from `blue` overlapping the polygon from `pink`.
# The area of `blue` is built into the weight so we just sum the with times value per polygon.
# We can not calculate intensive weighted sums with this weight.


## ----cleanup, echo=FALSE------------------------------------------------------
options(org_options)
unlink("climdiv_prcp.nc")