--- title: "ASH-based wavelet shrinkage for heteroskedastic models" date: "`r Sys.Date()`" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{smashr demo} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, echo = FALSE, message = FALSE} knitr::opts_chunk$set(collapse = TRUE,comment = "#",fig.align = "center", fig.cap = " ",dpi = 120,results = "hold") ``` In this vignette, we assume a simple parametric model of the form $$ y_i = \mu_i + \epsilon_i, $$ where $\epsilon_i \sim N(0,\sigma_i^2)$, and both $\mu$ and $\sigma$ are unknowns. We use the "smash" procedure (SMoothing via Adaptive SHrinkage) to estimate both the mean and the variances. Here we present a brief demonstration of the method. We first look at mean estimation, which is our primary focus. A sample mean function is presented, as well as a couple of different variance functions. Our method is compared against a few other simple methods. ## Set up environment We begin by loading the MASS, smashr, EbayesThresh and wavethresh packages. ```{r load-pkgs, message=FALSE} library(MASS) library(smashr) library(EbayesThresh) library(wavethresh) ``` ## Preparations First, we define the mean function used to simulate the data. ```{r define-spike-function} spike.f <- function(x) (0.75 * exp(-500 * (x - 0.23)^2) + 1.5 * exp(-2000 * (x - 0.33)^2) + 3 * exp(-8000 * (x - 0.47)^2) + 2.25 * exp(-16000 * (x - 0.69)^2) + 0.5 * exp(-32000 * (x - 0.83)^2)) n <- 1024 t <- 1:n/n mu.s <- spike.f(t) ``` Define a few other functions which will be used in the code chunks below. ```{r define-functions} mse <- function(x, y) mean((x - y)^2) l2norm <- function(x) sum(x^2) mise <- function(x, y, r) 10000 * mean(apply(x - rep(1, r) %o% y, 1, l2norm)/l2norm(y)) sig.est.func <- function(x, n) sqrt(2/(3 * (n - 2)) * sum((1/2 * x[1:(n - 2)] - x[2:(n - 1)] + 1/2 * x[3:n])^2)) ``` Define a function for the default wavelet thresholding method. ```{r define-wavelet-default} waveti.u <- function(x, filter.number = 10, family = "DaubLeAsymm", min.level = 3, noise.level) { TT = length(x) thresh = noise.level * sqrt(2 * log(TT)) x.w = wavethresh::wd(x, filter.number, family, type = "station") x.w.t = threshold(x.w, levels = (min.level):(x.w$nlevels - 1), policy = "manual", value = thresh, type = "hard") x.w.t.r = AvBasis(convert(x.w.t)) return(x.w.t.r) } ``` Define another function for the default EbayesThresh method. ```{r define-ebayesthresh-default} waveti.ebayes <- function(x, filter.number = 10, family = "DaubLeAsymm", min.level = 3, noise.level) { n = length(x) J = log2(n) x.w = wd(x, filter.number, family, type = "station") for (j in min.level:(J - 1)) { x.pm = ebayesthresh(accessD(x.w, j), sdev = noise.level) x.w = putD(x.w, j, x.pm) } mu.est = AvBasis(convert(x.w)) return(mu.est) } ``` For the first demonstration, define the mean and variance functions, and set the signal to noise ratio. ```{r mean-and-variance-functions} mu.t <- (1 + mu.s)/5 rsnr <- sqrt(1) var1 <- rep(1, n) var2 <- (1e-04 + 4 * (exp(-550 * (t - 0.2)^2) + exp(-200 * (t - 0.5)^2) + exp(-950 * (t - 0.8)^2)))/1.35 ``` ## Constant variance example We first look at the case of constant variance. ```{r constant-variance-example} set.seed(327) sigma.ini <- sqrt(var1) sigma.t <- sigma.ini/mean(sigma.ini) * sd(mu.t)/rsnr^2 X.s <- matrix(rnorm(10 * n, mu.t, sigma.t), nrow = 10, byrow = TRUE) mu.est <- apply(X.s, 1, smash.gaus) mu.est.tivar.ash <- apply(X.s, 1, ti.thresh, method = "smash") mu.est.tivar.mad <- apply(X.s, 1, ti.thresh, method = "rmad") mu.est.ti <- matrix(0, 10, n) mu.est.ti.ebayes <- matrix(0, 10, n) for (i in 1:10) { sig.est = sig.est.func(X.s[i, ], n) mu.est.ti[i, ] = waveti.u(X.s[i, ], noise.level = sig.est) mu.est.ti.ebayes[i, ] = waveti.ebayes(X.s[i, ], noise.level = sig.est) } ``` Assess the accuracy of the results. ```{r calc-mise} cat("SMASH:",mise(t(mu.est), mu.t, 10),"\n") cat("TI thresholding with variance estimated from smash:", mise(t(mu.est.tivar.ash), mu.t, 10),"\n") cat("TI thresholding with variance estimated from running MAD:", mise(t(mu.est.tivar.mad), mu.t, 10),"\n") cat("TI thresholding with constant variance (estimated):", mise(mu.est.ti, mu.t, 10),"\n") cat("EBayes with constant variance (estimated):", mise(mu.est.ti.ebayes, mu.t, 10),"\n") ``` Plot the estimated mean functions against the ground-truth function (in black). ```{r plots-1, fig.width=5, fig.height=4} plot(mu.t, xlab = "", ylab = "", type = "l") lines(mu.est[, 1], col = 2) lines(mu.est.tivar.mad[, 1], col = 3) lines(mu.est.ti[1, ], col = 4) lines(mu.est.ti.ebayes[1, ], col = 6) legend("topright", legend = c("smash", "ti_rmad", "ti_homo", "ebayes_homo"), fill = c(2, 3, 4, 6)) ``` ## Non-constant variance example Generate the data for this example. ```{r non-constant-variance-example} sigma.ini = sqrt(var2) sigma.t = sigma.ini/mean(sigma.ini) * sd(mu.t)/rsnr^2 set.seed(327) X.s = matrix(rnorm(10 * n, mu.t, sigma.t), nrow = 10, byrow = TRUE) mu.est = apply(X.s, 1, smash.gaus) mu.est.tivar.ash = apply(X.s, 1, ti.thresh, method = "smash") mu.est.tivar.mad = apply(X.s, 1, ti.thresh, method = "rmad") mu.est.ti = matrix(0, 10, n) mu.est.ti.ebayes = matrix(0, 10, n) for (i in 1:10) { sig.est = sig.est.func(X.s[i, ], n) mu.est.ti[i, ] = waveti.u(X.s[i, ], noise.level = sig.est) mu.est.ti.ebayes[i, ] = waveti.ebayes(X.s[i, ], noise.level = sig.est) } ``` Assess the accuracy of the results. ```{r calc-mise-2} cat("SMASH:",mise(t(mu.est), mu.t, 10),"\n") cat("TI thresholding with variance estimated from smash:", mise(t(mu.est.tivar.ash), mu.t, 10),"\n") cat("TI thresholding with variance estimated from running MAD:", mise(t(mu.est.tivar.mad), mu.t, 10),"\n") cat("TI thresholding with constant variance (estimated):", mise(mu.est.ti, mu.t, 10),"\n") cat("EBayes with constant variance (estimated):", mise(mu.est.ti.ebayes, mu.t, 10) ,"\n") ``` Plot the estimated mean functions against the ground-truth function (in black). ```{r plots-2, fig.width=5, fig.height=4} plot(mu.t, xlab = "", ylab = "", type = "l") lines(mu.est[, 1], col = 2) lines(mu.est.tivar.mad[, 1], col = 3) lines(mu.est.ti[1, ], col = 4) lines(mu.est.ti.ebayes[1, ], col = 6) legend("topright", legend = c("smash", "ti_rmad", "ti_homo", "ebayes_homo"), fill = c(2, 3, 4, 6)) ``` ## Estimating the variance function Next we look at variance estimation. Because variance estimation is much harder, we use a larger sample size. In this example, we set $n = 4096$. ```{r estimated-variance-example} n = 2^12 t = 1:n/n mu.s = spike.f(t) ``` Define the test functions and generate the data. ```{r est-var-functions} pos = c(0.1, 0.13, 0.15, 0.23, 0.25, 0.4, 0.44, 0.65, 0.76, 0.78, 0.81) hgt = c(4, 5, 3, 4, 5, 4.2, 2.1, 4.3, 3.1, 5.1, 4.2) wth = c(0.005, 0.005, 0.006, 0.01, 0.01, 0.03, 0.01, 0.01, 0.005, 0.008, 0.005) mu.b = rep(0, n) for (j in 1:length(pos)) { mu.b = mu.b + hgt[j]/((1 + (abs(t - pos[j])/wth[j]))^4) } dop.f = function(x) sqrt(x * (1 - x)) * sin((2 * pi * 1.05)/(x + 0.05)) mu.dop = dop.f(t) var1.ini = ((3 - 20 * t) * (t >= 0 & t < 0.1) + (20 * t - 1) * (t >= 0.1 & t < 0.25) + (4 + (1 - 4 * t) * 18/19) * (t >= 0.25 & t < 0.725) + (2.2 + 10 * (t - 0.725)) * (t >= 0.725 & t < 0.89) + (3.85 - 85 * (t - 0.89)/11) * (t >= 0.89 & t <= 1)) var1 = var1.ini/sqrt(var(var1.ini)) var2.ini = (1 + 4 * (exp(-550 * (t - 0.2)^2) + exp(-200 * (t - 0.5)^2) + exp(-950 * (t - 0.8)^2))) var2 = var2.ini/sqrt(var(var2.ini)) var3 = mu.b/sqrt(var(mu.b)) var4 = (mu.dop + 2)/sqrt(var(mu.dop)) sigma.t.1 = sqrt(var1) sigma.t.2 = sqrt(var2) sigma.t.3 = sqrt(var3) sigma.t.4 = sqrt(var4) set.seed(327) X.s.1 = rnorm(n, 0, sigma.t.1) set.seed(327) X.s.2 = rnorm(n, 0, sigma.t.2) set.seed(327) X.s.3 = rnorm(n, 0, sigma.t.3) set.seed(327) X.s.4 = rnorm(n, 0, sigma.t.4) ``` Fit the SMASH model to these data. ```{r est-var-smash} var.est.1 = smash.gaus(X.s.1, v.est = TRUE) var.est.2 = smash.gaus(X.s.2, v.est = TRUE) var.est.3 = smash.gaus(X.s.3, v.est = TRUE) var.est.4 = smash.gaus(X.s.4, v.est = TRUE) ``` Evaluate the fit of these models. ```{r est-var-mse} mse(var.est.1, sigma.t.1^2) mse(var.est.2, sigma.t.2^2) mse(var.est.3, sigma.t.3^2) mse(var.est.4, sigma.t.4^2) ``` These plots show the ground-truth variance (black) against the variance functions estimated by SMASH (red). ```{r plots-3, fig.width=5.5, fig.height=5} par(mfrow = c(2, 2)) plot(sigma.t.1^2, xlab = "", ylab = "", type = "l", main = "V1") lines(var.est.1, col = 2) plot(sigma.t.2^2, xlab = "", ylab = "", type = "l", main = "V2") lines(var.est.2, col = 2) plot(sigma.t.3^2, xlab = "", ylab = "", type = "l", main = "V3") lines(var.est.3, col = 2) plot(sigma.t.4^2, xlab = "", ylab = "", type = "l", main = "V4") lines(var.est.4, col = 2) ``` ## Unevenly spaced data In this section, we examine how this wavelet-based approach handles unevenly spaced data. Wavelet methods are not optimized for unevenly spaced data, but still works. We first look at the case when the data are normally spaced. ```{r unevenly-spaced} n = 1024 t = sort(rnorm(1024, 0, 1)) t = (t - min(t))/(max(t) - min(t)) mu.s = spike.f(t) ``` Define mean function, signal-to-noise ratio, and the variance functions. ```{r unevenly-spaced-functions} mu.t = (1 + mu.s)/5 rsnr = sqrt(1) var1 = rep(1, n) var2 = (1e-04 + 4 * (exp(-550 * (t - 0.2)^2) + exp(-200 * (t - 0.5)^2) + exp(-950 * (t - 0.8)^2)))/1.35 ``` ### Unevenly spaced data with constant variance First, we look at the case of constant variance. ```{r unevenly-spaced-const-var} sigma.ini = sqrt(var1) sigma.t = sigma.ini/mean(sigma.ini) * sd(mu.t)/rsnr^2 set.seed(327) X.s = matrix(rnorm(10 * n, mu.t, sigma.t), nrow = 10, byrow = TRUE) ``` Apply the methods on the data. ```{r unevenly-spaced-fit-models} mu.est = apply(X.s, 1, smash.gaus) mu.est.tivar.ash = apply(X.s, 1, ti.thresh, method = "smash") mu.est.tivar.mad = apply(X.s, 1, ti.thresh, method = "rmad") mu.est.ti = matrix(0, 10, n) mu.est.ti.ebayes = matrix(0, 10, n) for (i in 1:10) { sig.est = sig.est.func(X.s[i, ], n) mu.est.ti[i, ] = waveti.u(X.s[i, ], noise.level = sig.est) mu.est.ti.ebayes[i, ] = waveti.ebayes(X.s[i, ], noise.level = sig.est) } ``` Assess the accuracy of the estimates. ```{r calc-mise-3} cat("SMASH:",mise(t(mu.est), mu.t, 10),"\n") cat("TI thresholding with variance estimated from smash:", mise(t(mu.est.tivar.ash), mu.t, 10),"\n") cat("TI thresholding with variance estimated from running MAD:", mise(t(mu.est.tivar.mad), mu.t, 10),"\n") cat("TI thresholding with constant variance (estimated):", mise(mu.est.ti, mu.t, 10),"\n") cat("EBayes with constant variance (estimated):", mise(mu.est.ti.ebayes, mu.t, 10),"\n") ``` Assess the accuracy over a grid. ```{r calc-mise-grid} xgrid = 1:n/n mu.est.inter = matrix(0, 10, n) mu.est.tivar.ash.inter = matrix(0, 10, n) mu.est.tivar.mad.inter = matrix(0, 10, n) mu.est.ti.inter = matrix(0, 10, n) mu.est.ti.ebayes.inter = matrix(0, 10, n) for (i in 1:10) { mu.est.inter[i, ] = approx(t, mu.est[, i], xgrid)$y mu.est.tivar.ash.inter[i, ] = approx(t, mu.est.tivar.ash[, i], xgrid)$y mu.est.tivar.mad.inter[i, ] = approx(t, mu.est.tivar.mad[, i], xgrid)$y mu.est.ti.inter[i, ] = approx(t, mu.est.ti[i, ], xgrid)$y mu.est.ti.ebayes.inter[i, ] = approx(t, mu.est.ti.ebayes[i, ], xgrid)$y } mu.t.inter = (spike.f(xgrid) + 1)/5 mise(mu.est.inter, mu.t.inter, 10) mise(mu.est.tivar.ash.inter, mu.t.inter, 10) mise(mu.est.tivar.mad.inter, mu.t.inter, 10) mise(mu.est.ti.inter, mu.t.inter, 10) mise(mu.est.ti.ebayes.inter, mu.t.inter, 10) ``` Plot the estimated mean functions against the ground-truth function (in black). ```{r plots-4, fig.width=5, fig.height=4} par(mfrow = c(1, 1)) plot(t, mu.t, type = "l") lines(t, mu.est[, 1], col = 2) lines(t, mu.est.tivar.mad[, 1], col = 3) lines(t, mu.est.ti[1, ], col = 4) lines(t, mu.est.ti.ebayes[1, ], col = 6) legend("topright", legend = c("smash", "ti_rmad", "ti_homo", "ebayes_homo"), fill = c(2, 3, 4, 6)) ``` ### Unevenly spaced data with non-constant variance We then look at the case of non-constant variance: ```{r unevenly-spaced-nonconstant-var} sigma.ini = sqrt(var2) sigma.t = sigma.ini/mean(sigma.ini) * sd(mu.t)/rsnr^2 set.seed(327) X.s = matrix(rnorm(10 * n, mu.t, sigma.t), nrow = 10, byrow = TRUE) ``` ```{r unevenly-spaced-nonconstant-var-fit-models} mu.est = apply(X.s, 1, smash.gaus) mu.est.tivar.ash = apply(X.s, 1, ti.thresh, method = "smash") mu.est.tivar.mad = apply(X.s, 1, ti.thresh, method = "rmad") mu.est.ti = matrix(0, 10, n) mu.est.ti.ebayes = matrix(0, 10, n) for (i in 1:10) { sig.est = sig.est.func(X.s[i, ], n) mu.est.ti[i, ] = waveti.u(X.s[i, ], noise.level = sig.est) mu.est.ti.ebayes[i, ] = waveti.ebayes(X.s[i, ], noise.level = sig.est) } ``` Assess the accuracy of the estimates. ```{r calc-mise-4} cat("SMASH:",mise(t(mu.est), mu.t, 10),"\n") cat("TI thresholding with variance estimated from smash:", mise(t(mu.est.tivar.ash), mu.t, 10),"\n") cat("TI thresholding with variance estimated from running MAD:", mise(t(mu.est.tivar.mad), mu.t, 10),"\n") cat("TI thresholding with constant variance (estimated):", mise(mu.est.ti, mu.t, 10),"\n") cat("EBayes with constant variance (estimated):", mise(mu.est.ti.ebayes, mu.t, 10),"\n") ``` Assess the accuracy over a grid. ```{r calc-mise-grid-2} xgrid = 1:n/n mu.est.inter = matrix(0, 10, n) mu.est.tivar.ash.inter = matrix(0, 10, n) mu.est.tivar.mad.inter = matrix(0, 10, n) mu.est.ti.inter = matrix(0, 10, n) mu.est.ti.ebayes.inter = matrix(0, 10, n) for (i in 1:10) { mu.est.inter[i, ] = approx(t, mu.est[, i], xgrid)$y mu.est.tivar.ash.inter[i, ] = approx(t, mu.est.tivar.ash[, i], xgrid)$y mu.est.tivar.mad.inter[i, ] = approx(t, mu.est.tivar.mad[, i], xgrid)$y mu.est.ti.inter[i, ] = approx(t, mu.est.ti[i, ], xgrid)$y mu.est.ti.ebayes.inter[i, ] = approx(t, mu.est.ti.ebayes[i, ], xgrid)$y } mu.t.inter = (spike.f(xgrid) + 1)/5 mise(mu.est.inter, mu.t.inter, 10) mise(mu.est.tivar.ash.inter, mu.t.inter, 10) mise(mu.est.tivar.mad.inter, mu.t.inter, 10) mise(mu.est.ti.inter, mu.t.inter, 10) mise(mu.est.ti.ebayes.inter, mu.t.inter, 10) ``` Plot the estimated mean functions against the ground-truth function (in black). ```{r plots-5, fig.width=5, fig.height=4} par(mfrow = c(1, 1)) plot(t, mu.t, xlab = "", ylab = "", type = "l") lines(t, mu.est[, 1], col = 2) lines(t, mu.est.tivar.mad[, 1], col = 3) lines(t, mu.est.ti[1, ], col = 4) lines(t, mu.est.ti.ebayes[1, ], col = 6) legend("topright", legend = c("smash", "ti_rmad", "ti_homo", "ebayes_homo"), fill = c(2, 3, 4, 6)) ``` ## Poisson-distributed data example Now we consider the case in which the data are distributed according to a Poisson process. ```{r poisson-example} n <- 1024 t <- c(0, rexp(n - 1, 1)) t <- cumsum(t) t <- (t - min(t))/(max(t) - min(t)) mu.s <- spike.f(t) ``` Next, define the mean and variance functions. ```{r poisson-example-mean-and-variance} mu.t <- (1 + mu.s)/5 rsnr <- sqrt(1) var1 <- rep(1, n) var2 <- (1e-04 + 4 * (exp(-550 * (t - 0.2)^2) + exp(-200 * (t - 0.5)^2) + exp(-950 * (t - 0.8)^2)))/1.35 ``` ### Constant variance We first look at the case of constant variance. ```{r poisson-constant-variance} sigma.ini = sqrt(var1) sigma.t = sigma.ini/mean(sigma.ini) * sd(mu.t)/rsnr^2 set.seed(327) X.s = matrix(rnorm(10 * n, mu.t, sigma.t), nrow = 10, byrow = TRUE) mu.est = apply(X.s, 1, smash.gaus) mu.est.tivar.ash = apply(X.s, 1, ti.thresh, method = "smash") mu.est.tivar.mad = apply(X.s, 1, ti.thresh, method = "rmad") mu.est.ti = matrix(0, 10, n) mu.est.ti.ebayes = matrix(0, 10, n) for (i in 1:10) { sig.est = sig.est.func(X.s[i, ], n) mu.est.ti[i, ] = waveti.u(X.s[i, ], noise.level = sig.est) mu.est.ti.ebayes[i, ] = waveti.ebayes(X.s[i, ], noise.level = sig.est) } ``` Assess accuracy of the results. ```{r calc-mise-poisson-1} cat("SMASH:",mise(t(mu.est), mu.t, 10),"\n") cat("TI thresholding with variance estimated from smash:", mise(t(mu.est.tivar.ash), mu.t, 10),"\n") cat("TI thresholding with variance estimated from running MAD:", mise(t(mu.est.tivar.mad), mu.t, 10),"\n") cat("TI thresholding with constant variance (estimated):", mise(mu.est.ti, mu.t, 10),"\n") cat("EBayes with constant variance (estimated):", mise(mu.est.ti.ebayes, mu.t, 10),"\n") ``` Assess accuracy over a grid. ```{r calc-mise-grid-poisson-1} xgrid = 1:n/n mu.est.inter = matrix(0, 10, n) mu.est.tivar.ash.inter = matrix(0, 10, n) mu.est.tivar.mad.inter = matrix(0, 10, n) mu.est.ti.inter = matrix(0, 10, n) mu.est.ti.ebayes.inter = matrix(0, 10, n) for (i in 1:10) { mu.est.inter[i, ] = approx(t, mu.est[, i], xgrid)$y mu.est.tivar.ash.inter[i, ] = approx(t, mu.est.tivar.ash[, i], xgrid)$y mu.est.tivar.mad.inter[i, ] = approx(t, mu.est.tivar.mad[, i], xgrid)$y mu.est.ti.inter[i, ] = approx(t, mu.est.ti[i, ], xgrid)$y mu.est.ti.ebayes.inter[i, ] = approx(t, mu.est.ti.ebayes[i, ], xgrid)$y } mu.t.inter = (spike.f(xgrid) + 1)/5 mise(mu.est.inter, mu.t.inter, 10) mise(mu.est.tivar.ash.inter, mu.t.inter, 10) mise(mu.est.tivar.mad.inter, mu.t.inter, 10) mise(mu.est.ti.inter, mu.t.inter, 10) mise(mu.est.ti.ebayes.inter, mu.t.inter, 10) ``` Plot the estimated mean functions against the ground-truth function (in black). ```{r plots-6, fig.width=5, fig.height=4} par(mfrow = c(1, 1)) plot(t, mu.t, xlab = "", ylab = "", type = "l") lines(t, mu.est[, 1], col = 2) lines(t, mu.est.tivar.mad[, 1], col = 3) lines(t, mu.est.ti[1, ], col = 4) lines(t, mu.est.ti.ebayes[1, ], col = 6) legend("topright",legend = c("smash", "ti_rmad", "ti_homo", "ebayes_homo"), fill = c(2, 3, 4, 6)) ``` ### Non-constant variance We then look at the case of non-constant variance. ```{r poisson-nonconstant-var} sigma.ini = sqrt(var2) sigma.t = sigma.ini/mean(sigma.ini) * sd(mu.t)/rsnr^2 set.seed(327) X.s = matrix(rnorm(10 * n, mu.t, sigma.t), nrow = 10, byrow = TRUE) mu.est = apply(X.s, 1, smash.gaus) mu.est.tivar.ash = apply(X.s, 1, ti.thresh, method = "smash") mu.est.tivar.mad = apply(X.s, 1, ti.thresh, method = "rmad") mu.est.ti = matrix(0, 10, n) mu.est.ti.ebayes = matrix(0, 10, n) for (i in 1:10) { sig.est = sig.est.func(X.s[i, ], n) mu.est.ti[i, ] = waveti.u(X.s[i, ], noise.level = sig.est) mu.est.ti.ebayes[i, ] = waveti.ebayes(X.s[i, ], noise.level = sig.est) } ``` Assess accuracy of the results. ```{r calc-mise-poisson-2} cat("SMASH:",mise(t(mu.est), mu.t, 10),"\n") cat("TI thresholding with variance estimated from smash:", mise(t(mu.est.tivar.ash), mu.t, 10),"\n") cat("TI thresholding with variance estimated from running MAD:", mise(t(mu.est.tivar.mad), mu.t, 10),"\n") cat("TI thresholding with constant variance (estimated):", mise(mu.est.ti, mu.t, 10),"\n") cat("EBayes with constant variance (estimated):", mise(mu.est.ti.ebayes, mu.t, 10),"\n") ``` Assess accuracy over a grid. ```{r calc-mise-grid-poisson-2} xgrid = 1:n/n mu.est.inter = matrix(0, 10, n) mu.est.tivar.ash.inter = matrix(0, 10, n) mu.est.tivar.mad.inter = matrix(0, 10, n) mu.est.ti.inter = matrix(0, 10, n) mu.est.ti.ebayes.inter = matrix(0, 10, n) for (i in 1:10) { mu.est.inter[i, ] = approx(t, mu.est[, i], xgrid)$y mu.est.tivar.ash.inter[i, ] = approx(t, mu.est.tivar.ash[, i], xgrid)$y mu.est.tivar.mad.inter[i, ] = approx(t, mu.est.tivar.mad[, i], xgrid)$y mu.est.ti.inter[i, ] = approx(t, mu.est.ti[i, ], xgrid)$y mu.est.ti.ebayes.inter[i, ] = approx(t, mu.est.ti.ebayes[i, ], xgrid)$y } mu.t.inter = (spike.f(xgrid) + 1)/5 mise(mu.est.inter, mu.t.inter, 10) mise(mu.est.tivar.ash.inter, mu.t.inter, 10) mise(mu.est.tivar.mad.inter, mu.t.inter, 10) mise(mu.est.ti.inter, mu.t.inter, 10) mise(mu.est.ti.ebayes.inter, mu.t.inter, 10) ``` ```{r plots-7, fig.width=5, fig.height=4} par(mfrow = c(1, 1)) plot(t, mu.t, xlab = "", ylab = "", type = "l") lines(t, mu.est[, 1], col = 2) lines(t, mu.est.tivar.mad[, 1], col = 3) lines(t, mu.est.ti[1, ], col = 4) lines(t, mu.est.ti.ebayes[1, ], col = 6) legend("topright", legend = c("smash", "ti_rmad", "ti_homo", "ebayes_homo"), fill = c(2, 3, 4, 6)) ``` ## Simulation example from Fan & Yao (1998) Now we look at some of the simulations in a couple of papers, as well as the datasets used in them. The first one is from [Fan & Yao (1998)](https://doi.org/10.1093/biomet/85.3.645). The simulation is as described in Example 2. To deal with the fact that the sample size is not a sample size of 2, we first reflect the right portion of the data about the right endpoint so that it is a power of 2. To make the data periodic, we then reflect the new data about the right endpoint again. Our results are shown below. ```{r fan-yao} mu.mad = 0 var.mad = 0 cat("Running 400 simulations.\n") for (i in 1:400) { cat(i,"") x = sort(runif(200, -2, 2)) err = rnorm(200) sigma.t = 0.4 * exp(-2 * x^2) + 0.2 mu.t = x + 2 * exp(-16 * x^2) y = mu.t + sigma.t * err xgrid = seq(-1.8, 1.8, length.out = 101) mu.t.inter = xgrid + 2 * exp(-16 * xgrid^2) var.t.inter = (0.4 * exp(-2 * xgrid^2) + 0.2)^2 y.exp = c(y, y[200:145]) y.data = c(y.exp, y.exp[256:1]) mu.est = smash.gaus(y.data) mu.est = mu.est[1:200] var.est = smash.gaus(y.data, v.est = TRUE) var.est = var.est[1:200] mu.est.inter = approx(x, mu.est, xgrid, "linear")$y var.est.inter = approx(x, var.est, xgrid, "linear")$y mu.mad[i] = 1/101 * sum(abs(mu.est.inter - mu.t.inter)) var.mad[i] = 1/101 * sum(abs(var.est.inter - var.t.inter)) } cat("\n") ``` Show the model fitting results. ```{r fig.width=5, fig.height=4} boxplot(var.mad) plot(x, mu.t, xlab = "", ylab = "", type = "l", main = "mean function") lines(x, mu.est[1:200], col = 2) plot(x, sigma.t^2, xlab = "", ylab = "", type = "l", main = "var function") lines(x, var.est[1:200], col = 2) ``` ## Treasury Bill example from Fan & Yao (1998) We also apply SMASH to the dataset in Example 1 of the Fan & Yao (1998) paper. To take into account replicates, we take the median of the response at the same point. The results are shown below. ```{r load-treas-data} data(treas) y = ar(treas,FALSE,5)$res y = y[!is.na(y)] x = sort(treas[5:(length(treas) - 1)]) y = y[order(treas[5:(length(treas) - 1)])] ``` Plot the Treasury bill data. ```{r plot-treas-data, fig.width=5, fig.height=4} plot(x, y) ``` ## Average replicate data ```{r average-treas-data} x.mod = unique(x) y.mod = 0 for (i in 1:length(x.mod)) { y.mod[i] = median(y[x == x.mod[i]]) } y.exp = c(y.mod, y.mod[length(y.mod):(2 * length(y.mod) - 2^10 + 1)]) y.final = c(y.exp, y.exp[length(y.exp):1]) y.est = smash.gaus(y.final) y.est.var = smash.gaus(y.final, v.est = TRUE) ``` More plots of the data. ```{r plot-treas-results, fig.width=5.5, fig.height=5} par(mfrow = c(2, 2)) plot(treas, xlab = "year", ylab = "interest rate", type = "l") plot(x, y) lines(x.mod, y.est[1:(length(y.mod))], xlab = "X", ylab = "Y", col = 2) plot(x.mod, y.est[1:(length(y.mod))], xlab = "X", ylab = "Y", type = "l", ylim = c(-0.3, 0.3)) plot(x.mod, sqrt(y.est.var[1:(length(y.mod))]), xlab = "X", ylab = "volatility", type = "l") ``` ## Heavisine example from Delouille *et al* (2004) We now examine the example from Delouille et al. (2004). Specifically, we compare with the heavisine function used in section 6, as the actual function is readily available. The results for $n=200$ for both the homoskedastic and the heteroskedastic cases are given. ```{r heavisine} n = 200 mse.hsm.c = 0 mse.hsm.n = 0 for (i in 1:500) { xt = sort(rnorm(n, 0.5, 0.2)) xt = (xt - min(xt))/(max(xt) - min(xt)) mu.hsm = 4 * sin(4 * pi * xt) - 2 * sign(xt - 0.3) - 2 * sign(0.72 - xt) mu.t = mu.hsm var1 = rep(1, n) var2 = (xt <= 0.5) + 2 * (xt > 0.5) sigma.ini = sqrt(var1) rsnr = sqrt(4) sigma.t = sigma.ini/mean(sigma.ini) * sd(mu.t)/rsnr^2 y = rnorm(n, mu.t, sigma.t) y.exp = c(y, y[200:145]) y.final = c(y.exp, y.exp[256:1]) mu.est <- smash.gaus(y.final) mu.est = mu.est[1:n] mse.hsm.c[i] = mse(mu.est, mu.t) sigma.ini = sqrt(var2) rsnr = sqrt(4) sigma.t = sigma.ini/mean(sigma.ini) * sd(mu.t)/rsnr^2 y = rnorm(n, mu.t, sigma.t) y.exp = c(y, y[200:145]) y.final = c(y.exp, y.exp[256:1]) mu.est <- smash.gaus(y.final) mu.est = mu.est[1:n] mse.hsm.n[i] = mse(mu.est, mu.t) } sqrt(quantile(mse.hsm.c, seq(0.25, 0.75, 0.25))) sqrt(quantile(mse.hsm.n, seq(0.25, 0.75, 0.25))) ``` ## Motorcycle example from Delouille *et al* (2004) We also look at the motorcycle in the same paper. We plot the data points and fit, as well as the estimated variances. ```{r, eval=FALSE} data(mcycle) x.ini = sort(mcycle$times) y.ini = mcycle$accel[order(mcycle$times)] x = unique(x.ini) y = 0 for (i in 1:length(x)) { y[i] = median(y.ini[x.ini == x[i]]) } y.exp = c(y, y[length(y):(2 * length(y) - 128 + 1)]) y.final = c(y.exp, y.exp[length(y.exp):1]) y.est = smash.gaus(y.final) y.est = y.est[1:length(y)] y.var.est = smash.gaus(y.final, v.est = TRUE) y.var.est = y.var.est[1:length(y)] ``` ## Session info This is the version of R and the packages that were used to generate the results shown above. ```{r session-info} sessionInfo() ```