===================================================================
RCS file: /cvs/ext/extinction_risk_1.R,v
retrieving revision 1.6
retrieving revision 1.20
diff -u -p -r1.6 -r1.20
--- ext/extinction_risk_1.R	2015/07/02 06:51:04	1.6
+++ ext/extinction_risk_1.R	2015/08/15 06:50:20	1.20
@@ -1,5 +1,5 @@
-# extinction_risk_1.R, ver. 1.6 2015/6/24
-# $Id: extinction_risk_1.R,v 1.6 2015/07/02 06:51:04 hako Exp $
+# extinction_risk_1.R
+# $Id: extinction_risk_1.R,v 1.20 2015/08/15 06:50:20 hako Exp $
 #
 # Author: Hiroshi Hakoyama <hako@affrc.go.jp>
 # Copyright (c) 2013-2015 Hiroshi Hakoyama <hako@affrc.go.jp>, All rights reserved.
@@ -27,147 +27,115 @@
 # ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #
 # Description:
-# Estimates the demographic parameters and the probability of extinction within 
-# a specific time period, t, from a time series of population size based on the 
-# Wiener-drift process model (Dennis et al., 1991).
+# Estimates the demographic parameters and the probability of 
+# extinction within a specific time period, t, from a time series 
+# of population size based on the Wiener-drift process model.
+# An estimator for confidence interval of extinction risk  
+# developed by Hakoyama (w-z method, in preparation) is implemented, 
+# which is better than the estimator from the ordinary Delta method.
 #
 # Usage:
-# ext1(dat, t = 100, ne = 1, verbose = FALSE)
+# ext1(dat, tt = 100, ne = 1, alpha = 0.05, verbose = FALSE, 
+#      exact.CL = FALSE, n.sim = 10000, alpha.sim = 0.05, qq.plot = FALSE, formatted = TRUE)
 #
 # Arguments:
-# dat		data.frame of 2 variables: Time and Population size
-# t			a time period of interest
-# ne		a lower (extinction) threshold of population size
-# verbose	If set to FALSE, give the ML estimate of the probability of extinction
-# within a specific time period t. If set to TRUE, give a more verbose output:
-# the ML estimate of the growth rate (mu),
-# the ML estimate of variance (s),
-# the unbiased estimate of variance (us),
-# the ML estimate of the probability attaining the threshold ne (psi),
-# the ML estimate of the conditional mean time to reach the threshold, given 
-# the threshold is reached (theta),
-# the ML estimate of the conditional probability of extinction within a specific 
-# time period t, given the threshold is reached (G),
-# the ML estimate of the probability of extinction within a specific time period t (G*psi), and
-# a lower 95 % confidence limit of parameter * (Low.CL.*), and
-# a higher 95 % confidence limit of parameter * (High.CL.*)
-# formatted	If set to TRUE, give the result by formatted output. If set to FALSE, give a list of estimates.
-#
+# dat       data.frame of 2 variables: Time and Population size
+# tt        a time period of interest
+# ne        a lower (extinction) threshold of population size (ne >= 1)
+# alpha     1 - confidence level
+# verbose   If set to FALSE, give the ML estimate of the probability 
+#           of extinction within a specific time period tt. If set to 
+#           TRUE, give a more verbose output:
+#           the ML estimate of the growth rate (mu),
+#           the ML estimate of variance (s),
+#           Current population size, xd = log(N(q) / ne),
+#           Sample size, n = q + 1,
+#           AIC for the distribution of X = log(N),
+#           the ML estimate of the probability of extinction within 
+#           a specific time period t (P),
+#           a lower alpha % confidence limit of parameter * 
+#           (lower.CL.*), and 
+#           an upper alpha % confidence limit of parameter * 
+#           (upper.CL.*).
+# exact.CL  If set to FALSE, give a CI by the w-z method.
+#           If set to TRUE, give an asymptotically exact CI.
+# n.sim     The number of iteration for the asymptotically exact CI.
+# alpha.sim Level of significance to decide the convergence of iteration.
+# qq.plot   If set to TRUE, give a QQ plot for delta.log.n.
+# formatted If set to TRUE, give the result by formatted output. 
+#           If set to FALSE, give a list of estimates.
 # References:
-#  R. Lande and S. H. Orzack. Extinction dynamics of age-structured populations
-# in a fluctuating environment. Proceedings of the National Academy of
-# Sciences, 85(19):7418-7421, 1988.
-# B. Dennis, P. L. Munholland, and J. M. Scott. Estimation of growth and
-# extinction parameters for endangered species. Ecological Monographs, 
-# 61:115-143, 1991.
+#  R. Lande and S. H. Orzack. Extinction dynamics of age-structured
+#  populations in a fluctuating environment. Proceedings of the 
+#  National Academy of Sciences, 85(19):7418-7421, 1988.
+#  B. Dennis, P. L. Munholland, and J. M. Scott. Estimation of 
+#  growth and extinction parameters for endangered species. 
+#  Ecological Monographs, 61:115-143, 1991.
+#  H. Hakoyama (in preparation).  
 #
 
-ext1 <- function(dat, t = 100, ne = 1, alpha = 0.05, verbose = FALSE, formatted = TRUE) {
+ext1 <- function(dat, tt = 100, ne = 1, alpha = 0.05, verbose = FALSE, exact.CL = FALSE, n.sim = 10000, alpha.sim = 0.05, qq.plot = FALSE, formatted = TRUE) {
   yr <- ts(dat[, 1], start = c(dat[, 1][1])) # Year
   ps <- ts(dat[, 2], start = c(dat[, 1][1])) # Population size
   complete <- complete.cases(yr, ps)
   yr <- yr[complete]
   ps <- ps[complete]
   tau <- diff(yr) # time intervals, \tau_i = t_i - t_{i-1}
-  w <- diff(log(ps)) # W_i = \log(N(t_i)/N(t_{i-1})) = X(t_i) - X(t_{i-1})
-  q <- length(yr) - 1 # yr = \{t_0, t_1, \dots, t_q\}
+  delta.log.n <- diff(log(ps))
+  qq <- length(yr) - 1 # yr = \{t_0, t_1, \dots, t_q\}
   tq <- sum(tau)
-  mu <- sum(w) / tq # ML estimate of growth rate
-  s <- (1 / q) * sum((w - mu * tau)^2 / tau) # ML estimate of variance
-  us <- q * s / (q - 1) # an unbiased estimate of variance
+  mu <- sum(delta.log.n) / tq # MLE of growth rate
+  s <- sum((delta.log.n - mu * tau)^2 / tau) / qq # MLE of variance
+  us <- qq * s / (qq - 1) # an unbiased estimate of variance
   xd <- log(ps[length(ps)] / ne)
-  psi <- function(xd, mu, s) {
-  	# The probability that the process will attain the threshold
-    ifelse (mu <= 0, 1, exp(- 2 * mu * xd / s))
+  l <- - log(2 * pi * s) * qq / 2 - sum((delta.log.n - mu)^2) / (2 * s) 
+  AIC.X <- - 2 * l + 4 # AIC for the distribution of X = log(N)
+
+  if (qq.plot == TRUE) {
+    qqnorm(delta.log.n)
+    qqline(delta.log.n)
   }
-  var.for.psi <- function(xd, mu, s) {
-    ifelse (mu <= 0, 0, (4 * xd^2 / s) * (2 *(q -1) * mu^2 / (q^2 * s)))
+
+  lower.CL.mu <- mu - qt(1 - alpha / 2, qq - 1) * sqrt(us / tq)
+  upper.CL.mu <- mu + qt(1 - alpha / 2, qq - 1) * sqrt(us / tq)
+  lower.CL.s <- qq * s / qchisq(1 - alpha / 2, qq - 1)
+  upper.CL.s <- qq * s / qchisq(alpha / 2, qq - 1)
+
+  ww <- W(mu, xd, s, tt)
+  zz <- Z(mu, xd, s, tt)
+  pp <- Pr(ww, zz)
+
+  if (exact.CL == TRUE) {
+    CL.P <- AsymptoticallyExactConfidenceInterval(mu, xd, s, tt, tq, qq, alpha, n.sim, alpha.sim)
+    } else {
+    CL.P <- ConfidenceInterval(mu, xd, s, tt, tq, qq, alpha)
   }
-  Low.CL.psi <- function(xd, mu, s) {
-    ifelse (mu <= 0, 1, exp(- (2 * mu * xd / s) - qnorm(1 - alpha / 2) * sqrt(var.for.psi(xd, mu, s))))
-  }
-  High.CL.psi <- function(xd, mu, s) {
-    ifelse (mu <= 0, 1, exp(- (2 * mu * xd / s) + qnorm(1 - alpha / 2) * sqrt(var.for.psi(xd, mu, s))))
-  }
-  theta <- xd / abs(mu)
-  Low.CL.mu  <- mu - qt(1 - alpha / 2, q - 1) * sqrt(us / tq)
-  High.CL.mu <- mu + qt(1 - alpha / 2, q - 1) * sqrt(us / tq)
-  Low.CL.s  <- q * s / qchisq(1 - alpha / 2, q - 1)
-  High.CL.s <- q * s / qchisq(alpha / 2, q - 1)
-  var.mu <- s / tq
-  var.s  <- 2 * s^2 * (q - 1) / q^2
-  erfc <- function (x) 2 * pnorm(-sqrt(2) * x)
-  dGpsidmu <- - (xd / s) * 
-    exp(- (2 * mu * xd) / s) * 
-    erfc((- mu * t + xd) / sqrt(2 * s * t))
-  dGpsids  <- (exp(- (mu * t + xd)^2 / (2 * s * t)) * t * xd) /
-    (sqrt(2 * pi) * (s * t)^(3/2)) + 
-    ((mu * xd) / s^2) * exp(- (2 * mu * xd) / s) *
-    erfc((- mu * t + xd) / sqrt(2 * s * t))
-  G <- function(t, xd, mu, s) {
-  	# G = Pr[T <= t]: See Eq. (16) and Appendix of Dennis et al. (1991).
-  	# Note that there is a typo in Eq. (16) of Dennis et al. (1991).
-    a <- xd / sqrt(s)
-    b <- abs(mu) / sqrt(s)
-    y <- (b * t - a) / sqrt(t)
-    z <- (b * t + a) / sqrt(t)
-    d0 <-   0.2316419
-    d1 <-   0.319381530
-    d2 <- - 0.356563782
-    d3 <-   1.781477937
-    d4 <- - 1.821255978
-    d5 <-   1.330274429
-    qz <- 1 / (1 + d0 * z)
-    g <- ifelse(z >= 4, pnorm(y) + dnorm(y) *
-      (1 -
-        (1 / z^2) +
-        (1 * 3 / z^4) -
-        (1 * 3 * 5 / z^6) +
-        (1 * 3 * 5 * 7 / z^8) -
-        (1 * 3 * 5 * 7 * 9 / z^10) +
-        (1 * 3 * 5 * 7 * 9 * 11 / z^12) -
-        (1 * 3 * 5 * 7 * 9 * 11 * 13 / z^14)
-      ) / z,
-      pnorm(y) +
-      dnorm(y) * (d1 * qz + d2 * qz^2 + d3 * qz^3 + d4 * qz^4 + d5 * qz^5))
-    gg <- pnorm((-xd + abs(mu) * t) / sqrt(s * t)) + exp(2 * xd * abs(mu) / s) *
-          pnorm((-xd - abs(mu) * t) / sqrt(s * t))
-    ifelse(is.nan(gg), g, gg)
-  }
-  H <- function(t, xd, mu, s) {
-  	log(G(t, xd, mu, s) * psi(xd, mu, s) / (1 - G(t, xd, mu, s) * psi(xd, mu, s)))
-  }
-  Gpsi <- G(t, xd, mu, s) * psi(xd, mu, s)
-  var.H <- var.mu * (dGpsidmu / (Gpsi * (1 - Gpsi)))^2 + 
-           var.s  * (dGpsids  / (Gpsi * (1 - Gpsi)))^2
-  hh <- H(t, xd, mu, s)
-  Low.CL.Gpsi  <- 1 / (1 + exp(- hh + qnorm(1 - alpha / 2) * sqrt(var.H)))
-  High.CL.Gpsi <- 1 / (1 + exp(- hh - qnorm(1 - alpha / 2) * sqrt(var.H)))
+
+  lower.CL.P <- CL.P[[1]]
+  upper.CL.P <- CL.P[[2]]
+
   if (verbose == TRUE) {
-    results <- list(ne = ne, t = t, verbose = verbose,
+    results <- list(ne = ne, tt = tt, verbose = verbose,
+      AIC.X = AIC.X,
+      n = qq + 1,
       xd = xd,
       Growth.rate = mu,
-      Low.CL.mu = Low.CL.mu,
-      High.CL.mu = High.CL.mu,
+      lower.CL.mu = lower.CL.mu,
+      upper.CL.mu = upper.CL.mu,
       Variance = s, 
-      Low.CL.s = Low.CL.s,
-      High.CL.s = High.CL.s,
-      Low.CL.psi = Low.CL.psi(xd, mu, s),
-      High.CL.psi = High.CL.psi(xd, mu, s),
-      Unbiased.variance = us, 
-      Probability.attaining.the.threshold = psi(xd, mu, s), 
-      Conditional.mean.extinction.time = theta, 
-      Conditional.extinction.probability = G(t, xd, mu, s), 
-      Extinction.probability = G(t, xd, mu, s) * psi(xd, mu, s),
-      Low.CL.Gpsi = Low.CL.Gpsi,
-      High.CL.Gpsi = High.CL.Gpsi)
+      lower.CL.s = lower.CL.s,
+      upper.CL.s = upper.CL.s,
+#      Unbiased.variance = us, 
+      Extinction.probability = pp,
+      lower.CL.P = lower.CL.P,
+      upper.CL.P = upper.CL.P)
     if (formatted == TRUE) {
       class(results) <- "ext1"
     }
     return(results)
   } else {
-    results <- list(ne = ne, t = t, verbose = verbose, 
-      Extinction.probability = G(t, xd, mu, s) * psi(xd, mu, s))
+    results <- list(ne = ne, tt = tt, verbose = verbose, 
+      Extinction.probability = pp)
     if (formatted == TRUE) {
       class(results) <- "ext1"
     }
@@ -175,53 +143,136 @@ ext1 <- function(dat, t = 100, ne = 1, alpha = 0.05, v
   }
 }
 
+#-------------------------------------------------------------------
+W <- function(mu, xd, s, tt) (mu * tt + xd) / sqrt(s * tt)
+#-------------------------------------------------------------------
+Z <- function(mu, xd, s, tt) (- mu * tt + xd) / sqrt(s * tt)
+#-------------------------------------------------------------------
+Pr <- function(w, z) {
+  if(z < 35) {
+    pnorm(-w) + exp((z^2 - w^2) / 2) * pnorm(-z)
+  } else {
+    pnorm(-w) + exp(- w^2 / 2) * (sqrt(2) / (2 * sqrt(pi))) * 
+    (1 / z - 1 / z^3 + 3 / z^5 - 15 / z^7 
+    + 105 / z^9 - 945 / z^11 + 10395 / z^13)
+  }
+}
+#-------------------------------------------------------------------
+FindCL <- function(tq, qq, tt, est, a, width = 10) {
+  t.obs <- sqrt((qq - 1) * tq / (qq * tt)) * est
+  df <- qq - 1
+  const <- sqrt(tq / tt)
+  f <- function(x) pt(t.obs, df, const * x) - a
+  d.est <- est / width + 1
+  uniroot(f, c(- d.est + est, d.est + est), extendInt = "yes")$root
+}
+#-------------------------------------------------------------------
+ConfidenceInterval <- function(mu, xd, s, tt, tq, qq, alpha) {
+  den1 <- sqrt(s * tt)
+  w.est <- (mu * tt + xd) / den1
+  z.est <- (- mu * tt + xd) / den1
+  lower.CL.w <- FindCL(tq, qq, tt, w.est, 1 - alpha / 2)
+  upper.CL.w <- FindCL(tq, qq, tt, w.est, alpha / 2)
+  lower.CL.z <- FindCL(tq, qq, tt, z.est, 1 - alpha / 2)
+  upper.CL.z <- FindCL(tq, qq, tt, z.est, alpha / 2)
+  lower.CL.P <- Pr(upper.CL.w, lower.CL.z)
+  upper.CL.P <- Pr(lower.CL.w, upper.CL.z)
+  c(lower.CL.P, upper.CL.P)
+}
+#-------------------------------------------------------------------
+randomCI <- function(mu.obs, xd, s.obs, tt, tq, qq, alpha, ci.function = ConfidenceInterval) {
+  mu <- rnorm(1, mean = mu.obs, sd = sqrt(s.obs / qq))
+  s <-  rchisq(1, df = qq - 1) * s.obs / qq
+  ci.function(mu, xd, s, tt, tq, qq, alpha)
+}
+#-------------------------------------------------------------------
+RejectionRate <- function(mu.obs, xd, s.obs, tt, tq, qq, alpha, gam, n.sim, ci.function = ConfidenceInterval) {
+  CI.dist <- replicate(n.sim, randomCI(mu.obs, xd, s.obs, tt, tq, qq, alpha, ci.function))
+  P.obs <- Pr(W(mu.obs, xd, s.obs, tt), Z(mu.obs, xd, s.obs, tt))
+  ci <- ci.function(mu.obs, xd, s.obs, tt, tq, qq, alpha)
+  complete <- complete.cases(CI.dist[1, ], CI.dist[2, ])
+  lower <- CI.dist[1, ][complete]
+  upper <- CI.dist[2, ][complete]
+  n.estimables <- length(lower)
+  n.rejects <- (sum(P.obs < lower) + sum(P.obs > upper))
+  if (n.estimables > 0) {
+  binom <- binom.test(n.rejects, n.estimables, p = gam)
+  } else {
+  binom <- list(estimate = NaN, p.value = NaN)
+  }
+  c(binom$estimate[[1]], binom$p.value, ci)
+}
+#-------------------------------------------------------------------
+AsymptoticallyExactConfidenceInterval <- function(mu.obs, xd, s.obs, tt, tq, qq, alpha, n.sim, alpha.sim, ci.function = ConfidenceInterval) {
+  P.obs <- Pr(W(mu.obs, xd, s.obs, tt), Z(mu.obs, xd, s.obs, tt))
+  if (P.obs == 0 || P.obs == 1) {
+    return(c(ci.function(mu.obs, xd, s.obs, tt, tq, qq, alpha), NaN, NaN))
+  }
+  res <- RejectionRate(mu.obs, xd, s.obs, tt, tq, qq, alpha, alpha, n.sim, ci.function)
+  if (is.nan(res[[1]])) return(c(NaN, NaN, alpha, 0))
+  gg <- res[[1]]
+  pp <- res[[2]]
+  ai <- alpha
+  i <- 0
+  while (pp < alpha.sim) {
+    i <- i + 1
+    res <- RejectionRate(mu.obs, xd, s.obs, tt, tq, qq, ai, alpha, n.sim, ci.function)
+    if (is.nan(res[[1]])) return(c(NaN, NaN, ai, i))
+    gg <- res[[1]]
+    pp <- res[[2]]
+    ai <- ai * alpha / gg
+    if (ai < 10^(-10) || ai > 1) return(c(NaN, NaN, ai, i))
+    if (i > 20) return(c(NaN, NaN, ai, i))
+  }
+  c(res[[3]], res[[4]], ai, i)
+}
+#-------------------------------------------------------------------
 print.ext1 <- function(obj, digits = 5) {
-#  cat("\nExtinction risk\n")
   if (obj$verbose == TRUE) {
     output.est <- data.frame(
     c(formatC(obj$Growth.rate, digits = digits), 
       formatC(obj$Variance, digits = digits), 
-      formatC(obj$Unbiased.variance, digits = digits),
-      formatC(obj$Probability.attaining.the.threshold, digits = digits),
-#      formatC(obj$Conditional.mean.extinction.time, digits = digits),
-#      formatC(obj$Conditional.extinction.probability, digits = digits),
+      formatC(obj$xd, digits = digits),
+      formatC(obj$n, digits = digits),
+      formatC(obj$AIC.X, digits = digits),
       formatC(obj$Extinction.probability, digits = digits)), 
-    c(paste("(",formatC(obj$Low.CL.mu, digits = digits),", ", 
-      formatC(obj$High.CL.mu, digits = digits),")", sep = ""), 
-      paste("(",formatC(obj$Low.CL.s, digits = digits),", ", 
-      formatC(obj$High.CL.s, digits = digits),")", sep = ""), 
-      paste("(",formatC(obj$Low.CL.s, digits = digits),", ", 
-      formatC(obj$High.CL.s, digits = digits),")", sep = ""),
-      paste("(",formatC(obj$Low.CL.psi, digits = digits),", ", 
-      formatC(obj$High.CL.psi, digits = digits),")", sep = ""),
-#      "-",
-      paste("(",formatC(obj$Low.CL.Gpsi, digits = digits),", ", 
-      formatC(obj$High.CL.Gpsi, digits = digits),")", sep = "")))
+    c(paste("(",formatC(obj$lower.CL.mu, digits = digits),", ", 
+      formatC(obj$upper.CL.mu, digits = digits),")", sep = ""), 
+      paste("(",formatC(obj$lower.CL.s, digits = digits),", ", 
+      formatC(obj$upper.CL.s, digits = digits),")", sep = ""), 
+      "-",
+      "-",
+      "-",
+      paste("(",formatC(obj$lower.CL.P, digits = digits),", ", 
+      formatC(obj$upper.CL.P, digits = digits),")", sep = "")))
     dimnames(output.est) <- list(
       c("Growth rate:",
         "Variance:",
-        "Unbiased variance:",
-        "Probability attaining the threshold:",
-#        "Conditional mean extinction time:",
-#        "Conditional extinction probability:",
-        paste("Probability of decline to", obj$ne, "within", obj$t, "years:")),
+        "Current population size, x_d = log(N(q) / n_e):",
+        "Sample size, n = q + 1:",
+        "AIC for the distribution of X = log(N):",
+        paste("Probability of decline to", obj$ne, "within", obj$tt, "years:")),
       c("Estimate","95% confidence interval"))
     print(output.est)
   } else {
     output.est <- data.frame(
       c(formatC(obj$Extinction.probability, digits = digits)))
     dimnames(output.est) <- list(
-      c(paste("Probability of decline to", obj$ne, "within", obj$t, "years:")),
+      c(paste("Probability of decline to", obj$ne, "within", obj$tt, "years:")),
       c("Estimate"))
-  	print(output.est)
+    print(output.est)
   }
 }
 #
 # Examples
 # Yellowstone grizzly bears (from Dennis et al., 1991)
- # dat <- data.frame(Year = c(1959, 1960, 1961, 1962, 1963, 1964, 1965, 1966, 1967, 1968, 1969, 1970, 1971, 1972, 1973, 1974, 1975, 1976, 1977, 1978, 1979, 1980, 1981, 1982, 1983, 1984, 1985, 1986, 1987), 
-# Population = c(44, 47, 46, 44, 46, 45, 46, 40, 39, 39, 42, 44, 41, 40, 33, 36, 34, 39, 35, 34, 38, 36, 37, 41, 39, 51, 47, 57, 47))
+  # dat <- data.frame(Year = c(1959, 1960, 1961, 1962, 1963, 1964, 1965, 1966, 1967, 1968, 1969, 1970, 1971, 1972, 1973, 1974, 1975, 1976, 1977, 1978, 1979, 1980, 1981, 1982, 1983, 1984, 1985, 1986, 1987), 
+ # Population = c(44, 47, 46, 44, 46, 45, 46, 40, 39, 39, 42, 44, 41, 40, 33, 36, 34, 39, 35, 34, 38, 36, 37, 41, 39, 51, 47, 57, 47))
 # The probability of extinction (of decline to population size 1) within 100 years
-# ext1(dat, 100)
+# ext1(dat, t = 100)
 # The probability of decline to 10 individuals within 100 years
-# ext1(dat, 100, 10, verbose = TRUE)
+# ext1(dat, t = 100, ne = 10, verbose = TRUE)
+# with QQ-plot
+# ext1(dat, t = 100, ne = 10, verbose = TRUE, qq.plot = T)
+# The probability of decline to 1 individuals within 100 years with an asymptotically exact confidence interval
+# ext1(dat, t = 100, ne = 1, verbose = TRUE, exact.CL = TRUE)
\ No newline at end of file