This is an automatically generated document related to the paper “DEGREES OF FREEDOM FOR NONLINEAR LEAST SQUARES ESTIMATION” by Niels Richard Hansen and Alexander Sokol.
library(smde)
## Loading required package: expm
## Loading required package: Matrix
## Loading required package: lattice
##
## Attaching package: 'expm'
##
## The following object is masked from 'package:Matrix':
##
## expm
##
## Loading required package: Rcpp
library(parallel)
library(expm)
library(ggplot2)
library(gridExtra)
## Loading required package: grid
library(reshape2)
load("config.RData")
B <- config$B
round(expm(B), 2) ## The matrix exponential of the rate matrix B
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11]
## [1,] 0.98 -0.01 -0.02 -0.02 -0.02 -0.03 -0.03 -0.03 -0.03 0.00 0.00
## [2,] 0.10 1.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## [3,] 0.17 0.00 1.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## [4,] -0.08 0.00 0.20 1.00 0.00 0.00 0.00 0.00 0.00 0.02 0.00
## [5,] 0.24 0.00 0.00 0.00 1.00 0.00 0.00 0.00 0.00 0.00 0.00
## [6,] 0.26 0.00 0.00 0.00 0.00 1.00 0.00 0.00 0.00 0.00 0.00
## [7,] 0.19 0.00 -0.05 0.00 0.00 0.00 1.00 0.00 0.00 0.00 0.00
## [8,] 0.29 0.00 0.00 0.00 0.00 0.00 0.00 1.00 0.00 0.00 0.00
## [9,] 0.31 0.00 0.00 0.00 0.00 0.00 0.00 0.00 1.00 0.00 0.00
## [10,] 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.94 -0.03
## [11,] 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.31 0.99
## [12,] 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.32 -0.01
## [13,] -0.03 0.00 -0.40 0.00 0.00 0.00 0.00 0.00 0.00 0.28 0.00
## [14,] 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.34 -0.01
## [15,] 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.35 -0.01
## [16,] 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.36 -0.01
## [17,] 0.02 0.00 0.20 0.00 0.00 0.00 0.00 0.00 0.00 0.40 -0.01
## [18,] 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.37 -0.01
## [19,] 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.38 -0.01
## [20,] 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.38 -0.01
## [,12] [,13] [,14] [,15] [,16] [,17] [,18] [,19] [,20]
## [1,] 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## [2,] 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## [3,] 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## [4,] 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.10 0.00
## [5,] 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## [6,] 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## [7,] 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## [8,] 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## [9,] 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## [10,] -0.03 -0.03 -0.03 -0.03 -0.04 -0.04 -0.04 -0.04 -0.04
## [11,] -0.01 -0.01 -0.01 -0.01 -0.01 -0.01 -0.01 -0.01 -0.01
## [12,] 0.99 -0.01 -0.01 -0.01 -0.01 -0.01 -0.01 -0.01 -0.01
## [13,] 0.00 0.99 -0.31 -0.01 -0.01 -0.01 -0.01 -0.01 -0.01
## [14,] -0.01 -0.01 0.99 -0.01 -0.01 -0.01 -0.01 -0.01 -0.01
## [15,] -0.01 -0.01 -0.01 0.99 -0.01 -0.01 -0.01 -0.01 -0.01
## [16,] -0.01 -0.01 -0.01 -0.01 0.99 -0.01 -0.01 -0.01 -0.01
## [17,] -0.01 -0.01 0.19 -0.01 -0.01 0.99 -0.01 -0.01 -0.01
## [18,] -0.01 -0.01 -0.01 -0.01 -0.01 -0.01 0.99 -0.01 -0.01
## [19,] -0.01 -0.01 -0.01 -0.01 -0.01 -0.01 -0.01 0.99 -0.01
## [20,] -0.01 -0.01 -0.01 -0.01 -0.01 -0.01 -0.01 -0.01 0.99
Download and load the binary R configuration file to run these computations locally or view the human readable version of the configurations.
round(expm(config$t * B), 2)
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11]
## [1,] 0.98 -0.01 -0.02 -0.02 -0.02 -0.03 -0.03 -0.03 -0.03 0.00 0.00
## [2,] 0.10 1.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## [3,] 0.17 0.00 1.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## [4,] -0.08 0.00 0.20 1.00 0.00 0.00 0.00 0.00 0.00 0.02 0.00
## [5,] 0.24 0.00 0.00 0.00 1.00 0.00 0.00 0.00 0.00 0.00 0.00
## [6,] 0.26 0.00 0.00 0.00 0.00 1.00 0.00 0.00 0.00 0.00 0.00
## [7,] 0.19 0.00 -0.05 0.00 0.00 0.00 1.00 0.00 0.00 0.00 0.00
## [8,] 0.29 0.00 0.00 0.00 0.00 0.00 0.00 1.00 0.00 0.00 0.00
## [9,] 0.31 0.00 0.00 0.00 0.00 0.00 0.00 0.00 1.00 0.00 0.00
## [10,] 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.94 -0.03
## [11,] 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.31 0.99
## [12,] 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.32 -0.01
## [13,] -0.03 0.00 -0.40 0.00 0.00 0.00 0.00 0.00 0.00 0.28 0.00
## [14,] 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.34 -0.01
## [15,] 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.35 -0.01
## [16,] 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.36 -0.01
## [17,] 0.02 0.00 0.20 0.00 0.00 0.00 0.00 0.00 0.00 0.40 -0.01
## [18,] 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.37 -0.01
## [19,] 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.38 -0.01
## [20,] 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.38 -0.01
## [,12] [,13] [,14] [,15] [,16] [,17] [,18] [,19] [,20]
## [1,] 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## [2,] 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## [3,] 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## [4,] 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.10 0.00
## [5,] 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## [6,] 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## [7,] 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## [8,] 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## [9,] 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## [10,] -0.03 -0.03 -0.03 -0.03 -0.04 -0.04 -0.04 -0.04 -0.04
## [11,] -0.01 -0.01 -0.01 -0.01 -0.01 -0.01 -0.01 -0.01 -0.01
## [12,] 0.99 -0.01 -0.01 -0.01 -0.01 -0.01 -0.01 -0.01 -0.01
## [13,] 0.00 0.99 -0.31 -0.01 -0.01 -0.01 -0.01 -0.01 -0.01
## [14,] -0.01 -0.01 0.99 -0.01 -0.01 -0.01 -0.01 -0.01 -0.01
## [15,] -0.01 -0.01 -0.01 0.99 -0.01 -0.01 -0.01 -0.01 -0.01
## [16,] -0.01 -0.01 -0.01 -0.01 0.99 -0.01 -0.01 -0.01 -0.01
## [17,] -0.01 -0.01 0.19 -0.01 -0.01 0.99 -0.01 -0.01 -0.01
## [18,] -0.01 -0.01 -0.01 -0.01 -0.01 -0.01 0.99 -0.01 -0.01
## [19,] -0.01 -0.01 -0.01 -0.01 -0.01 -0.01 -0.01 0.99 -0.01
## [20,] -0.01 -0.01 -0.01 -0.01 -0.01 -0.01 -0.01 -0.01 0.99
EB <- expm(config$t * B)
xpm <- ypm <- seq(-0.25, 0.5, 0.01)
L1norm <- vector("list", 2)
L1norm[[1]] <- L1norm[[2]] <- matrix(0, length(xpm), length(ypm))
for(i in seq_along(xpm)) {
for(j in seq_along(ypm)) {
tmp <- EB
tmp[1, 1] <- EB[1, 1] + xpm[i]
tmp[2, 2] <- EB[2, 2] + ypm[j]
L1norm[[1]][i, j] <- sum(abs(logm(tmp) / config$t))
tmp <- EB
tmp[2, 1] <- EB[2, 1] + xpm[i]
tmp[1, 2] <- EB[1, 2] + ypm[j]
L1norm[[2]][i, j] <- sum(abs(logm(tmp) / config$t))
}
}
temp <- data.frame(x = rep(EB[1, 1] + xpm, times = length(ypm)),
y = rep(EB[2, 2] + ypm, each = length(xpm)),
z = as.vector(L1norm[[1]]))
temp2 <- data.frame(x = EB[1, 1], y = EB[2, 2])
p1 <- ggplot(aes(x = x, y = y, z = z), data = temp) +
stat_contour(size = 1, bins = 20) +
geom_point(data = temp2, aes(z = NULL), size = 4, color = "red") +
xlab("a") + ylab("d")
temp <- data.frame(x = rep(EB[2, 1] + xpm, times = length(ypm)),
y = rep(EB[1, 2] + ypm, each = length(xpm)),
z = as.vector(L1norm[[2]]))
temp2 <- data.frame(x = EB[2, 1], y = EB[1, 2])
p2 <- ggplot(aes(x = x, y = y, z = z), data = temp) +
stat_contour(size = 1, bins = 20) +
geom_point(data = temp2, aes(z = NULL), size = 4, color = "red") +
xlab("b") + ylab("c")
grid.arrange(p1, p2, ncol = 2)
## plot(eigen(B)$values)
### Simulation of data
set.seed(config$seed)
t <- seq(0, 5 * config$t, 0.01)
x0 <- seq(-5, 5, length.out = config$d)
y <- matrix(0, nrow = config$d, ncol = length(t))
for (i in seq_along(t)){
y[, i] <- expm(t[i] * B) %*% x0
}
p1 <- qplot(t, value, data = cbind(t, melt(t(y))),
geom = "line", color = factor(Var2)) +
scale_color_discrete(guide = "none") +
geom_vline(xintercept = config$t) +
ylab(expression(Y[i]))
y <- y + matrix(rnorm(d * length(t), 0, config$sigma), d, length(t))
p2 <- qplot(t, value, data = cbind(t, melt(t(y))),
geom = "line", color = factor(Var2)) +
scale_color_discrete(guide = "none") +
geom_vline(xintercept = config$t) +
ylab(expression(Y[i]))
grid.arrange(p1, p2, ncol = 2)
set.seed(config$seed)
x <- y <- vector("list", config$N)
for (i in 1:config$N) {
x[[i]] <- matrix(rnorm(config$n, 0, config$designSigma), config$d, config$m)
y[[i]] <- expm(config$t * config$B) %*% x[[i]] +
matrix(rnorm(config$n, 0, config$sigma), config$d, config$m)
}
The following computations use mclapply
for parallel computations on multiple
cores. If you run the code from within a GUI, overide the use of multiple cores
by setting config$nrCores <- 1L
.
pb <- progressBar(config$N, file = config$pb[1], width = 80L,
pid = TRUE, totalBar = FALSE)
## pb <- progressBar(config$N, file = "", width = 80L) ## For console usage
smdeModels <- mclapply(1:config$N, function(i) {
## Model setup
model <- multModel(y = y[[i]], x = x[[i]],
t = config$t,
weights = config$weights,
sigma = config$sigma)
## We record the time usage for the estimation for later analysis
time <- system.time(
model <- fit(model,
method = config$method,
nlambda = config$nlambda,
lambda.min.ratio = config$lambda.min.ratio)
)
model$time <- time
model$dfMethod <- 'con'
pb$up()
model
},
mc.cores = config$nrCores
)
pb$kill()
The object smdeModels
contains a list of models each containing a sequence of penalty parameters \(\lambda\) and
a corresponding sequence of parameter estimates. Before any further computations are carried out,
the models are screened for errors that occured during the estimation.
status <- numeric(config$N)
for(i in 1:config$N) {
model <- smdeModels[[i]]
if(!is.null(model$status))
status[i] <- model$status
}
nrCond <- sum(status > 0)
if (nrCond > 0) {
## For models with status = 3, an unknown error has occured
## during the coordinate descent.
if (any(status == 3)) {
smdeModels <- smdeModels[status < 3]
config$N <- length(smdeModels)
}
if (file.exists("messages.txt"))
file.remove("messages.txt")
for (i in which(status > 0))
cat("i = ", i, " ", smdeModels[[i]]$msg, "\n",
file = "messages.txt", append = TRUE)
}
There were 0 notable conditions or errors. Check the messages, if this number is not 0.
## Settings for computing threshold estimator
mint <- min(abs(config$B[abs(config$B) > 0])) / 3
maxt <- mean(abs(config$B[abs(config$B) > 0]))
thres <- seq(mint, maxt, length.out = 25)
## Initialization
riskTrue <- riskTilde <- oneNorm <- matrix(0, config$N, config$nlambda)
riskHatMLE <- riskMLE <- oneNormMLE <- numeric(config$N)
riskThres <- oneNormThres <- matrix(0, config$N, length(thres) + 1)
We compute the risk estimates for each model. The computation of SURE using Theorem 4 is the most demanding, and is implemented to take advantage of multiple cores.
## The SURE estimator for l1-constrained estimation based on the
## divergence formula in Theorem 4
pb <- progressBar(config$N, file = config$pb[2],
width = 80L, pid = TRUE, totalBar = FALSE)
riskHat <- mclapply(1:config$N, function(i) {
pb$up()
model <- smdeModels[[i]]
apply(model$Blambda, 2, function(B)
riskHat(model, B)
)
},
mc.cores = config$nrCores
)
pb$kill()
riskHat <- t(simplify2array(riskHat))
pb <- progressBar(config$N, file = config$pb[3],
width = 80L, pid = TRUE, totalBar = FALSE)
## Error: The progress bar cannot create the '.__progressBar579817/'
## directory. The file/directory already exists.
for(i in seq_along(smdeModels)) {
model <- smdeModels[[i]]
Bhat <- model$Blambda
## Actual observed loss with knowledge of the true parameter
riskTrue[i, ] <- apply(Bhat, 2, risk, model = model, par0 = config$B)
## The SURE estimator based on the locally 'flat' divergence approximation
riskTilde[i, ] <- apply(Bhat, 2, function(B)
riskHat(model, B, df = dfEff(model, B, method = 'nonzero') - 1))
## The weighted l1-norms
oneNorm[i, ] <- apply(Bhat, 2, penalty, lambda = model$w)
## Computation of thresholded estimators, their actual observed loss
## and their weighted l1-norm. If the MLE does not exist, a surrogate
## estimate is chosen as the least penalized estimate.
if (model$status == 2) { ## The MLE does not exist
Bhat <- Bhat[, ncol(Bhat)]
} else {
Bhat <- model$B ## The MLE
}
## The risk and weighted l1-norms for the MLE
riskMLE[i] <- risk(model, par0 = config$B, par = Bhat)
riskHatMLE[i] <- riskHat(model, par = Bhat,
df = dfEff(model, Bhat, method = 'nonzero'))
oneNormMLE[i] <- penalty(Bhat, model$w)
Bhat <- cbind(as.vector(Bhat),
sapply(thres, function(t) {Bhat[abs(Bhat) < t] <- 0; Bhat}))
riskThres[i, ] <- apply(Bhat, 2, risk, model = model, par0 = config$B)
oneNormThres[i, ] <- apply(Bhat, 2, penalty, lambda = model$w)
pb$up()
}
riskThresMean <- colMeans(riskThres)
oneNormThres <- colMeans(oneNormThres)
In the computations above the risk estimates are obtained for each simulation for a random set of \(\ell_1\)-norm constraints. We next compute the risk estimates at a fixed number of \(\ell_1\)-norm constraints by using spline interpolation.
## Function that does linear interpolation
inter <- function(riskest, fixeds, upper) {
fixeds <- sort(fixeds)
result <- matrix(0, nrow = nrow(riskest), ncol = length(fixeds))
for(i in 1:nrow(riskest)) {
s <- oneNorm[i, ]
r <- riskest[i, ]
s <- c(s, oneNormMLE[i])
r <- c(r, upper[i])
result[i, ] <- approx(s, r, xout = fixeds, rule = 2)$y
}
result
}
## First we compute a preliminary interpolation on the interval (0, smax)
## to determine a suitable range of the constraint s.
smin <- 0
smax <- quantile(oneNormMLE, 0.75)
fixeds <- seq(smin, smax, length.out = 40)
riskTrueInter <- inter(riskTrue, fixeds, riskMLE)
riskInter <- colMeans(riskTrueInter)
riskMax <- 3 * min(riskInter)
smin <- min(fixeds[riskInter < riskMax])
## Having selected the final range of the constaint s, we compute the
## final interpolation at a fixed set of 40 values and average across
## the replications.
fixeds <- seq(smin, smax, length.out = 40)
riskTrueMean <- colMeans(inter(riskTrue, fixeds, riskMLE))
riskHatInter <- inter(riskHat, fixeds, riskHatMLE)
riskHatMean <- colMeans(riskHatInter)
riskTildeInter <- inter(riskTilde, fixeds, riskHatMLE)
riskTildeMean <- colMeans(riskTildeInter)
## Restrict the thresholded estimator to the same range
index <- riskThresMean < riskMax & oneNormThres >= smin & oneNormThres <= smax
if (any(index)) {
riskThresMean <- riskThresMean[index]
oneNormThres <- oneNormThres[index]
} else {
riskThresMean <- riskThresMean[1]
oneNormThres <- oneNormThres[1]
}
Based on the SURE estimate of the risk we choose the constraint by minimizing the estimated risk. For this data adaptive choice of the constraint we compute the actual loss and its average across the replications (the estimate of the risk of the estimator obtained by adaptive constraint selection). We also compute the average of the SURE minimum, which we expect is a negatively biased estimate of the actual risk of the adaptive estimator.
adptRisk <- mean(riskTrueMean[apply(riskHatInter, 1, which.min)])
adptRiskHat <- mean(apply(riskHatInter, 1, min))
The following plot shows how the \(\ell_1\)-constrained estimators perform in terms
of their risk, and how they compare to the MLE and the thresholded estimators in general.
It also shows the average of the two risk estimates that are to be compared with the actual risk.
It finally shows the risk of the estimator based on the adaptive choice of constraint.
## Manually constructed legend
scaleGuide <- scale_color_manual(
values = c("orange", "green", "blue", "purple", "purple", "orange", "green"),
guide = guide_legend(NULL, label.position = "top",
keywidth = 3.6, label.hjust = 0.5,
override.aes = list(
linetype = c(1, 0, 0, 2, 0, 2, 2),
shape = c(NA, 19, 19, NA, 19, NA, NA))),
labels = c(expression(paste("Risk(s)")),
expression(paste("E(", widehat(Risk), "(s))")),
expression(paste("E(", widetilde(Risk), "(s))")),
"Risk (MLE)",
"Risk (thres)",
expression(paste("Risk (", hat(B)[hat(s)], ")")),
expression(paste("E(", widehat(Risk), "(", hat(s), "))")))
)
select <- seq(1, length(fixeds), by = 2)
qplot(fixeds, riskTrueMean, color = factor("A"), geom = "line", size = I(1)) +
geom_hline(aes(yintercept = riskThresMean[1], color = factor("S")),
size = 1, linetype = 2) +
geom_hline(aes(yintercept = adptRisk, color = factor("U")),
size = 1, linetype = 2) +
geom_hline(aes(yintercept = adptRiskHat, color = factor("V")),
size = 1, linetype = 2) +
geom_point(aes(x = fixeds[select], y = riskTildeMean[select],
color = factor("C")), size = 4) +
geom_point(aes(x = fixeds[select], y = riskHatMean[select],
color = factor("B")), size = 3) +
geom_point(aes(x = oneNormThres, y = riskThresMean, color = factor("T")),
size = 3) +
scaleGuide +
expand_limits(y = 0) +
xlab("s") +
ylab("Risk") +
theme(legend.position = "top")
The next figure zooms in on the possible bias of the risk estimates. Recall that one estimate is based on an unbiased estimate of the Stein degrees of freedom, whereas the other is based on an approximation. In either case, the major result, Theorem 2, states that the Stein degress of freedom is, in general, smaller than the effective degrees of freedom for non-linear least squares.
n0 <- length(riskTrueMean)
riskHatsd <- apply(riskHatInter, 2, sd)
riskTildesd <- apply(riskTildeInter, 2, sd)
biasData <- data.frame(
s = c(fixeds, fixeds),
risk = c(riskHatMean, riskTildeMean) - riskTrueMean,
type = rep(c("riskHat", "riskTilde"), each = n0),
low = -1.96 * c(riskHatsd, riskTildesd) / sqrt(config$N),
up = 1.96 * c(riskHatsd, riskTildesd) / sqrt(config$N)
)
ggplot(data = biasData, aes(s, risk, fill = type)) +
geom_hline(yintercept = 0, color = "orange", size = 1) +
geom_ribbon(aes(ymax = risk + up, ymin = risk + low), alpha = 0.2) +
geom_point(aes(color = type)) +
scale_color_manual(values = c("green", "blue"), guide = "none") +
scale_fill_manual(values = c("green", "blue"), guide = "none") +
ylab("Risk bias") +
theme(legend.position = "top") +
facet_grid(type ~ ., labeller = function(...)
(c(expression(paste(widehat(Risk), "(s)")),
expression(paste(widetilde(Risk), "(s)"))))
)
The estimator of the rate matrix \(B\) gives rise to a structure estimator of the non-zero entries. This is not discussed in any detail in the paper. Here we compute the structure estimates using the estimator that minimizes the estimated risk. We tabulate the distribution of the number of non-zero entries (the sparseness) of this estimator, and we compute the cross-tabulation of the estimated and true non-zero entries.
strHat <- vector("list", config$N)
for (i in 1:config$N) {
strHat[[i]] <- smdeModels[[i]]$Blambda[, which.min(riskHat[i, ])] != 0
}
adptnz <- table(sapply(strHat, sum)) / config$N
qplot(as.numeric(names(adptnz)), as.numeric(adptnz),
geom = "bar", width = I(0.8), stat = "identity") +
xlab("Number of non-zero parameters") +
ylab("Relative frequency")
## Cross-tabulation of estimated and true non-zero entries.
addmargins(table(as.numeric(unlist(strHat)),
rep(as.numeric(config$B != 0), config$N))) / config$N
##
## 0 1 Sum
## 0 279.53 8.72 288.25
## 1 77.47 34.28 111.75
## Sum 357.00 43.00 400.00
timings <-t(sapply(smdeModels, function(model) model$time))[, c(1,3)]
## Treating extreme elapsed times specially
tmax <- 2 * quantile(timings[, "elapsed"], 0.95)
filter <- timings[, "elapsed"] < tmax
timingsSmall <- melt(as.data.frame(timings[filter, ]))
timingsLarge <- melt(as.data.frame(timings[!filter, ]))
p1 <- ggplot(aes(x = value), data = timingsSmall) +
geom_histogram(binwidth = 0.1) + xlab("seconds") +
facet_wrap(~ variable, ncol = 1)
if (all(filter)) {
p1
} else {
p2 <- ggplot(aes(x = value), data = timingsLarge) +
geom_histogram(binwidth = 0.1) + xlab("seconds") +
facet_wrap(~ variable, ncol = 1) + scale_x_log10()
grid.arrange(p1, p2, ncol = 2)
}
There were 0 extreme times.
sessionInfo()
## R version 3.0.2 (2013-09-25)
## Platform: x86_64-unknown-linux-gnu (64-bit)
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] grid parallel stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] reshape2_1.2.2 gridExtra_0.9.1 ggplot2_0.9.3.1 smde_0.3
## [5] Rcpp_0.11.0 expm_0.99-1 Matrix_1.0-14 lattice_0.20-23
## [9] xtable_1.7-1 markdown_0.6.4 knitr_1.5 digest_0.6.4
##
## loaded via a namespace (and not attached):
## [1] codetools_0.2-8 colorspace_1.2-4 dichromat_2.0-0
## [4] evaluate_0.5.1 formatR_0.10 gtable_0.1.2
## [7] labeling_0.2 MASS_7.3-29 munsell_0.4.2
## [10] plyr_1.8 proto_0.3-10 RColorBrewer_1.0-5
## [13] scales_0.2.3 stringr_0.6.2 tools_3.0.2