# extinction_risk_1.R
# $Id: extinction_risk_1.R,v 1.24 2017/12/23 00:18:12 hako Exp $
#
# Author: Hiroshi Hakoyama <hako@affrc.go.jp>
# Copyright (c) 2013-2017 Hiroshi Hakoyama <hako@affrc.go.jp>, All rights reserved.
# 
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# 1. Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY 
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 
# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
# AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 
# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 
# ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# Description:
# Estimates the demographic parameters and the probability of 
# extinction within a specific time period, t, from a time series 
# of population size based on the Wiener-drift process model.
# An estimator for confidence interval of extinction risk  
# developed by Hakoyama (w-z method, in preparation) is implemented, 
# which is better than the estimator by the ordinary Delta method.
#
# Usage:
# ext1(dat, tt = 100, ne = 1, alpha = 0.05, verbose = FALSE, 
#      better.CL = FALSE, n.sim = 10000, alpha.sim = 0.05, qq.plot = FALSE, formatted = TRUE)
#
# Arguments:
# dat       data.frame of 2 variables: Time and Population size
# tt        a time period of interest
# ne        a lower (extinction) threshold of population size (ne >= 1)
# alpha     1 - confidence level
# verbose   If set to FALSE, give the ML estimate of the probability 
#           of extinction within a specific time period tt. If set to 
#           TRUE, give a more verbose output:
#           the ML estimate of the growth rate (mu),
#           the ML estimate of variance (s),
#           Current population size, xd = log(N(q) / ne),
#           Sample size, n = q + 1,
#           AIC for the distribution of N,
#           the ML estimate of the probability of extinction within 
#           a specific time period t (P),
#           a lower (1 - alpha) % confidence limit of parameter * 
#           (lower.CL.*), and 
#           an upper (1 - alpha) % confidence limit of parameter * 
#           (upper.CL.*).
# better.CL  If set to FALSE, give a CI by the w-z method.
#           If set to TRUE, give a better CI.
# n.sim     The number of iteration for the better CI method.
# alpha.sim Level of significance to decide the convergence of iteration.
# qq.plot   If set to TRUE, give a QQ plot for log(n_i/n_{i-1})/sqrt(tau_i) ~ N(0, 1).
# formatted If set to TRUE, give the result by formatted output. 
#           If set to FALSE, give a list of estimates.
# References:
#  R. Lande and S. H. Orzack. Extinction dynamics of age-structured
#  populations in a fluctuating environment. Proceedings of the 
#  National Academy of Sciences, 85(19):7418-7421, 1988.
#  B. Dennis, P. L. Munholland, and J. M. Scott. Estimation of 
#  growth and extinction parameters for endangered species. 
#  Ecological Monographs, 61:115-143, 1991.
#  H. Hakoyama (in preparation).
#

ext1 <- function(dat, tt = 100, ne = 1, alpha = 0.05, verbose = FALSE, better.CL = FALSE, n.sim = 10000, alpha.sim = 0.05, qq.plot = FALSE, formatted = TRUE) {
  yr <- dat[, 1] # Year
  n  <- dat[, 2] # Population size
  complete <- complete.cases(yr, n)
  yr <- yr[complete]
  n  <-  n[complete]
  ti <- yr - yr[1] # elapsed time, ti = \{t_0 = 0, t_1, \dots, t_q\}
  tau <- diff(ti) # time intervals, \tau_i = t_i - t_{i-1}
  delta.log.n <- diff(log(n))
  qq <- length(yr) - 1
  tq <- ti[length(ti)]
  nq <- n[length(n)]
  mu <- log(nq / n[1]) / tq # MLE of growth rate
  s <- sum((delta.log.n - mu * tau)^2 / tau) / qq # MLE of variance
  us <- qq * s / (qq - 1) # an unbiased estimate of variance
  xd <- log(nq / ne)
  lnl <- - sum(log(n[-1] * sqrt(2 * tau * pi))) - (qq / 2) * log(s) - sum((1 / tau) * (delta.log.n - mu * tau)^2) / (2 * s)
  AIC <- - 2 * lnl + 2 * 2 # AIC for the distribution of N (population size)

# \log(n_i/n_{i-1})/\sqrt{\tau_i} \sim N(0, 1)
  if (qq.plot == TRUE) {
    qqnorm(delta.log.n / tau)
    qqline(delta.log.n / tau)
  }

  lower.CL.mu <- mu - qt(1 - alpha / 2, qq - 1) * sqrt(us / tq)
  upper.CL.mu <- mu + qt(1 - alpha / 2, qq - 1) * sqrt(us / tq)
  lower.CL.s <- qq * s / qchisq(1 - alpha / 2, qq - 1)
  upper.CL.s <- qq * s / qchisq(alpha / 2, qq - 1)

  ww <- W(mu, xd, s, tt)
  zz <- Z(mu, xd, s, tt)
  pp <- Pr(ww, zz)

  if (better.CL == TRUE) {
    CL.P <- BetterConfidenceInterval(mu, xd, s, tt, tq, qq, alpha, n.sim, alpha.sim)
    } else {
    CL.P <- ConfidenceInterval(mu, xd, s, tt, tq, qq, alpha)
  }

  lower.CL.P <- CL.P[[1]]
  upper.CL.P <- CL.P[[2]]

  if (verbose == TRUE) {
    results <- list(ne = ne, tt = tt, verbose = verbose,
      alpha = alpha,
      AIC = AIC,
      sample.size = qq + 1,
      nq = nq,
      xd = xd,
      Growth.rate = mu,
      lower.CL.mu = lower.CL.mu,
      upper.CL.mu = upper.CL.mu,
      Variance = s, 
      lower.CL.s = lower.CL.s,
      upper.CL.s = upper.CL.s,
#      Unbiased.variance = us, 
      Extinction.probability = pp,
      lower.CL.P = lower.CL.P,
      upper.CL.P = upper.CL.P)
    if (formatted == TRUE) {
      class(results) <- "ext1"
    }
    return(results)
  } else {
    results <- list(ne = ne, tt = tt, verbose = verbose, 
      Extinction.probability = pp)
    if (formatted == TRUE) {
      class(results) <- "ext1"
    }
    return(results)
  }
}

#-------------------------------------------------------------------
W <- function(mu, xd, s, tt) (mu * tt + xd) / sqrt(s * tt)
#-------------------------------------------------------------------
Z <- function(mu, xd, s, tt) (- mu * tt + xd) / sqrt(s * tt)
#-------------------------------------------------------------------
Pr <- function(w, z) {
  if(z < 35) {
    pnorm(-w) + exp((z^2 - w^2) / 2) * pnorm(-z)
  } else {
    pnorm(-w) + exp(- w^2 / 2) * (sqrt(2) / (2 * sqrt(pi))) * 
    (1 / z - 1 / z^3 + 3 / z^5 - 15 / z^7 
    + 105 / z^9 - 945 / z^11 + 10395 / z^13)
  }
}
#-------------------------------------------------------------------
ConfidenceInterval <- function(mu, xd, s, tt, tq, qq, alpha) {
  den.1 <- sqrt(s * tt)
  w.est <- (mu * tt + xd) / den.1
  z.est <- (- mu * tt + xd) / den.1
  df <- qq - 1
  const.1 <- sqrt((qq - 1) * tq / (qq * tt))
  const.2 <- sqrt(tq / tt)
  FindCL <- function(tq, qq, tt, est, a, width = 10) {
    t.obs <- const.1 * est
    f <- function(x) pt(t.obs, df, const.2 * x) - a
    d.est <- est / width + 1
    uniroot(f, c(- d.est + est, d.est + est), extendInt = "yes")$root
  }
  lower.CL.w <- FindCL(tq, qq, tt, w.est, 1 - alpha / 2)
  upper.CL.w <- FindCL(tq, qq, tt, w.est, alpha / 2)
  lower.CL.z <- FindCL(tq, qq, tt, z.est, 1 - alpha / 2)
  upper.CL.z <- FindCL(tq, qq, tt, z.est, alpha / 2)
  lower.CL.P <- Pr(upper.CL.w, lower.CL.z)
  upper.CL.P <- Pr(lower.CL.w, upper.CL.z)
  c(lower.CL.P, upper.CL.P)
}
#-------------------------------------------------------------------
randomCI <- function(mu.obs, xd, s.obs, tt, tq, qq, alpha, ci.function = ConfidenceInterval) {
  mu <- rnorm(1, mean = mu.obs, sd = sqrt(s.obs / qq))
  s <-  rchisq(1, df = qq - 1) * s.obs / qq
  ci.function(mu, xd, s, tt, tq, qq, alpha)
}
#-------------------------------------------------------------------
RejectionRate.lower <- function(mu.obs, xd, s.obs, tt, tq, qq, alpha, gam, n.sim, ci.function = ConfidenceInterval) {
  P.obs <- Pr(W(mu.obs, xd, s.obs, tt), Z(mu.obs, xd, s.obs, tt))
  CI.dist <- replicate(n.sim, randomCI(mu.obs, xd, s.obs, tt, tq, qq, alpha, ci.function))
  complete <- complete.cases(CI.dist[1, ])
  CL <- CI.dist[1, ][complete]
  n.estimables <- length(CL)
  n.rejects <- sum(P.obs < CL)
  if (n.estimables > 0) {
  binom <- binom.test(n.rejects, n.estimables, p = gam / 2)
  } else {
  binom <- list(estimate = NaN, p.value = NaN)
  }
  c(binom$estimate[[1]], binom$p.value)
}
#-------------------------------------------------------------------
RejectionRate.upper <- function(mu.obs, xd, s.obs, tt, tq, qq, alpha, gam, n.sim, ci.function = ConfidenceInterval) {
  P.obs <- Pr(W(mu.obs, xd, s.obs, tt), Z(mu.obs, xd, s.obs, tt))
  CI.dist <- replicate(n.sim, randomCI(mu.obs, xd, s.obs, tt, tq, qq, alpha, ci.function))
  complete <- complete.cases(CI.dist[2, ])
  CL <- CI.dist[2, ][complete]
  n.estimables <- length(CL)
  n.rejects <- sum(P.obs > CL)
  if (n.estimables > 0) {
  binom <- binom.test(n.rejects, n.estimables, p = gam / 2)
  } else {
  binom <- list(estimate = NaN, p.value = NaN)
  }
  c(binom$estimate[[1]], binom$p.value)
}
#-------------------------------------------------------------------
BetterConfidenceInterval <- function(mu.obs, xd, s.obs, tt, tq, qq, alpha, n.sim, alpha.sim, ci.function = ConfidenceInterval, verbose = FALSE, min.iteration = 5) {
  P.obs <- Pr(W(mu.obs, xd, s.obs, tt), Z(mu.obs, xd, s.obs, tt))
  if (P.obs == 0 || P.obs == 1) {
    return(ci.function(mu.obs, xd, s.obs, tt, tq, qq, alpha))
  }

  rr.lower <- RejectionRate.lower(mu.obs, xd, s.obs, tt, tq, qq, alpha, alpha, n.sim, ci.function)
  rr.upper <- RejectionRate.upper(mu.obs, xd, s.obs, tt, tq, qq, alpha, alpha, n.sim, ci.function)
  if (rr.lower[[2]] > alpha.sim && rr.upper[[2]] > alpha.sim) {
#  	cat("No need of using the better CI method.", "\n")
  	return(ci.function(mu.obs, xd, s.obs, tt, tq, qq, alpha)) 
  }

  p.value.lower <- 0
  gamma.lower <- alpha
  i <- 0
   while (i < min.iteration || p.value.lower < alpha.sim) {
  	i <- i + 1
    rr.lower <- RejectionRate.lower(mu.obs, xd, s.obs, tt, tq, qq, gamma.lower, alpha, n.sim, ci.function)
    f.lower <- rr.lower[[1]]
    p.value.lower <- rr.lower[[2]]
    if (verbose == TRUE) cat("gamma =", gamma.lower, "f_hat(gamma) =", f.lower, "p-value =", p.value.lower, "\n")
    gamma.lower <- gamma.lower - (f.lower - alpha / 2) / i
  }

  p.value.upper <- 0
  gamma.upper <- alpha
  i <- 0
   while (i < min.iteration || p.value.upper < alpha.sim) {
  	i <- i + 1
    rr.upper <- RejectionRate.upper(mu.obs, xd, s.obs, tt, tq, qq, gamma.upper, alpha, n.sim, ci.function)
    f.upper <- rr.upper[[1]]
    p.value.upper <- rr.upper[[2]]
    if (verbose == TRUE) cat("gamma =", gamma.upper, "f_hat(gamma) =", f.upper, "p-value =", p.value.upper, "\n")
    gamma.upper <- gamma.upper - (f.upper - alpha / 2) / i
  }

c(ci.function(mu.obs, xd, s.obs, tt, tq, qq, gamma.lower)[[1]], ci.function(mu.obs, xd, s.obs, tt, tq, qq, gamma.upper)[[2]])
}
#-------------------------------------------------------------------
print.ext1 <- function(obj, digits = 5) {
  if (obj$verbose == TRUE) {
    output.est <- data.frame(
    c(formatC(obj$Growth.rate, digits = digits), 
      formatC(obj$Variance, digits = digits), 
      formatC(obj$nq, digits = digits), 
      formatC(obj$xd, digits = digits),
      formatC(obj$sample.size, digits = digits),
      formatC(obj$AIC, digits = digits),
      formatC(obj$Extinction.probability, digits = digits)), 
    c(paste("(",formatC(obj$lower.CL.mu, digits = digits),", ", 
      formatC(obj$upper.CL.mu, digits = digits),")", sep = ""), 
      paste("(",formatC(obj$lower.CL.s, digits = digits),", ", 
      formatC(obj$upper.CL.s, digits = digits),")", sep = ""), 
      "-",
      "-",
      "-",
      "-",
      paste("(",formatC(obj$lower.CL.P, digits = digits),", ", 
      formatC(obj$upper.CL.P, digits = digits),")", sep = "")))
    dimnames(output.est) <- list(
      c("Growth rate:",
        "Variance:",
        "Current population size, nq:",
        "xd = ln(nq / ne):",
        "Sample size, q + 1:",
        "AIC for the distribution of N:",
        paste("Probability of decline to", obj$ne, "within", obj$tt, "years:")),
      c("Estimate", paste(as.character(100 * (1 - obj$alpha)), "% CI")))
    print(output.est)
  } else {
    output.est <- data.frame(
      c(formatC(obj$Extinction.probability, digits = digits)))
    dimnames(output.est) <- list(
      c(paste("Probability of decline to", obj$ne, "within", obj$tt, "years:")),
      c("Estimate"))
    print(output.est)
  }
}
#
# Examples
# Yellowstone grizzly bears (from Dennis et al., 1991)
  dat <- data.frame(Year = c(1959, 1960, 1961, 1962, 1963, 1964, 1965, 1966, 1967, 1968, 1969, 1970, 1971, 1972, 1973, 1974, 1975, 1976, 1977, 1978, 1979, 1980, 1981, 1982, 1983, 1984, 1985, 1986, 1987), 
 Population = c(44, 47, 46, 44, 46, 45, 46, 40, 39, 39, 42, 44, 41, 40, 33, 36, 34, 39, 35, 34, 38, 36, 37, 41, 39, 51, 47, 57, 47))
# The probability of extinction (of decline to population size 1) within 100 years
# ext1(dat, t = 100)
# The probability of decline to 10 individuals within 100 years
# ext1(dat, t = 100, ne = 10, verbose = TRUE)
# with QQ-plot
# ext1(dat, t = 100, ne = 10, verbose = TRUE, qq.plot = T)
# The probability of decline to 1 individuals within 100 years with a better confidence interval
# ext1(dat, t = 100, ne = 1, verbose = TRUE, better.CL = TRUE)