Dear R users, I took a closer look to the AIC-procedure for determining the order m of an VAR(m) process of the function ar() in package stats.
>From the original code (source: >http://cran.r-project.org/src/base/R-2/R-2.15.3.tar.gz -> >R-2.15.3\src\library\stats\R\ar.R -> lines 95 to 98) [...] cal.aic <- function() { # omits mean params, that is constant adj det <- abs(prod(diag(qr(EA)$qr))) return(n.used * log(det) + 2 * m * nser * nser) } [...] With: nser: number of time series n.used: number of observations in one of nser time series m: number of parameters in one single equation out of nser equations of the VAR(m) model This function computes "n.used * log(det)..." but in my oppinion it has to be "nser * n.used * log(det)...". Do I understand here something completely wrong? #Example to show that it makes a difference (I use the original code but shorten it a bit to the multivariate case): #Read in functions var.yw.aic.original() and var.yw.aic.modified() var.yw.aic.original <- function (x) { x <- as.matrix(x) xm <- colMeans(x) x <- sweep(x, 2L, xm, check.margin=FALSE) n.used <- nrow(x) nser <- ncol(x) order.max <- floor(10 * log10(n.used)) xacf <- acf(x, type="covariance", lag.max=order.max, plot=FALSE, demean=TRUE)$acf snames <- colnames(x) A <- B <- array(0, dim = c(order.max + 1L, nser, nser)) A[1L, , ] <- B[1L, , ] <- diag(nser) EA <- EB <- xacf[1L, , , drop = TRUE] xaic <- numeric(order.max + 1L) solve.yw <- function(m) { betaA <- betaB <- 0 for (i in 0L:m) { betaA <- betaA + A[i + 1L, , ] %*% xacf[m + 2L - i, , ] betaB <- betaB + B[i + 1L, , ] %*% t(xacf[m + 2L - i, , ]) } KA <- -t(qr.solve(t(EB), t(betaA))) KB <- -t(qr.solve(t(EA), t(betaB))) EB <<- (diag(nser) - KB %*% KA) %*% EB EA <<- (diag(nser) - KA %*% KB) %*% EA Aold <- A Bold <- B for (i in seq_len(m + 1L)) { A[i + 1L, , ] <<- Aold[i + 1L, , ] + KA %*% Bold[m + 2L - i, , ] B[i + 1L, , ] <<- Bold[i + 1L, , ] + KB %*% Aold[m + 2L - i, , ] } } cal.aic <- function() { det <- abs(prod(diag(qr(EA)$qr))) return(n.used * log(det) + 2 * m * nser * nser) } cal.resid <- function() { resid <- array(0, dim = c(n.used - order, nser)) for (i in 0L:order) resid <- resid + x[(order - i + 1L):(n.used - i), , drop = FALSE] %*% t(ar[i + 1L, , ]) return(rbind(matrix(NA, order, nser), resid)) } order <- 0L for (m in 0L:order.max) { xaic[m + 1L] <- cal.aic() if(xaic[m + 1L] == min(xaic[seq_len(m + 1L)])) { ar <- A order <- m var.pred <- EA * n.used/(n.used - nser * (m + 1L)) } if(m < order.max) solve.yw(m) } xaic <- xaic - min(xaic) names(xaic) <- 0L:order.max resid <- cal.resid() if(order) { ar <- -ar[2L:(order + 1L), , , drop = FALSE] dimnames(ar) <- list(seq_len(order), snames, snames) } else ar <- array(0, dim=c(0L, nser, nser), dimnames=list(NULL, snames, snames)) colnames(resid) <- colnames(x) order } var.yw.aic.modified <- function (x) { x <- as.matrix(x) xm <- colMeans(x) x <- sweep(x, 2L, xm, check.margin=FALSE) n.used <- nrow(x) nser <- ncol(x) order.max <- floor(10 * log10(n.used)) xacf <- acf(x, type="covariance", lag.max=order.max, plot=FALSE, demean=TRUE)$acf snames <- colnames(x) A <- B <- array(0, dim = c(order.max + 1L, nser, nser)) A[1L, , ] <- B[1L, , ] <- diag(nser) EA <- EB <- xacf[1L, , , drop = TRUE] xaic <- numeric(order.max + 1L) solve.yw <- function(m) { betaA <- betaB <- 0 for (i in 0L:m) { betaA <- betaA + A[i + 1L, , ] %*% xacf[m + 2L - i, , ] betaB <- betaB + B[i + 1L, , ] %*% t(xacf[m + 2L - i, , ]) } KA <- -t(qr.solve(t(EB), t(betaA))) KB <- -t(qr.solve(t(EA), t(betaB))) EB <<- (diag(nser) - KB %*% KA) %*% EB EA <<- (diag(nser) - KA %*% KB) %*% EA Aold <- A Bold <- B for (i in seq_len(m + 1L)) { A[i + 1L, , ] <<- Aold[i + 1L, , ] + KA %*% Bold[m + 2L - i, , ] B[i + 1L, , ] <<- Bold[i + 1L, , ] + KB %*% Aold[m + 2L - i, , ] } } cal.aic <- function() { det <- abs(prod(diag(qr(EA)$qr))) return(nser * n.used * log(det) + 2 * m * nser * nser) } cal.resid <- function() { resid <- array(0, dim = c(n.used - order, nser)) for (i in 0L:order) resid <- resid + x[(order - i + 1L):(n.used - i), , drop = FALSE] %*% t(ar[i + 1L, , ]) return(rbind(matrix(NA, order, nser), resid)) } order <- 0L for (m in 0L:order.max) { xaic[m + 1L] <- cal.aic() if(xaic[m + 1L] == min(xaic[seq_len(m + 1L)])) { ar <- A order <- m var.pred <- EA * n.used/(n.used - nser * (m + 1L)) } if(m < order.max) solve.yw(m) } xaic <- xaic - min(xaic) names(xaic) <- 0L:order.max resid <- cal.resid() if(order) { ar <- -ar[2L:(order + 1L), , , drop = FALSE] dimnames(ar) <- list(seq_len(order), snames, snames) } else ar <- array(0, dim=c(0L, nser, nser), dimnames=list(NULL, snames, snames)) colnames(resid) <- colnames(x) order } #Data for the example x <- c(-0.00658, 0.00802, 0.00642, 0.00412, -0.00618, 0.00462, -0.00168, -0.00148, -0.00108, -0.00608, -0.00058, -0.00318, -0.00528, -0.00218, -0.00688, 0.00022, 0.00472, -0.00028, 0.00362, 0.00122, 0.00852, -0.00118) y <- c(-0.0046, 0.0018, 0.0088, 0.0051, -0.0020, 0.0018, 0.0083, -0.0012, 0.0096, -0.0014, -0.0035, -0.0107, -0.0052, -0.0027, -0.0095, 0.0008, 0.0070, -0.0018, 0.0031, -0.0019, 0.0097, -0.0022) #Calculations ar(cbind(x, y))$order #m=0 var.yw.aic.original(cbind(x, y)) #m=0 var.yw.aic.modified(cbind(x, y)) #m=13 Is my understanding of the function calc.aic() inside ar() wrong? I hope someone can help me to understand why ar() uses "n.used" instead of "nser * n.used"? Best Andreas ______________________________________________ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.