Hello, I am trying to make a slight modification to the way gam (mgcv) works. I want to modify the G$X matrix, which is the design matrix, to accommodate an estimator that I am trying to program. I am working with panel data, and I want to take all continuous variables, including basis functions evaluated at covariate variables, subtract their groupwise means, and then add in their overall mean. (This is an alternative to having a dummy variable for every single group, and I'm working in a context where random effects aren't appropriate). I'd then do the same thing with G$y. Afterwards I want to pass the modified G$X and G$Y matrices to estimate.gam, which takes G as an argument.
As far as I can tell, G$X is organized to have the parametric terms (including factors) at the front with column names, and nonparametric terms at the end without column names. So identifying which terms to apply the transformation to should be straightforward. I've had some luck tweaking the internals of mgcv before, changing the default plotting behavior of vis.gam. But I am having trouble doing so with gam itself. To test things out, I did the following: * type "gam" into the terminal, to get the printout of the function's code. * add a line below line 70: G$X = G$X^2 (just to see if I can make an arbitrary transformation) * change the function name from gam to mod.gam, run it, and then run mod.gam on an arbitrary dataset. When I do so, I get the following error: Error in mod.gam(y ~ s(x)) : could not find function "variable.summary" It turns out that variable.summary, along with gam.setup, estimate.gam, and perhaps other constituents of gam do NOT have help pages, nor does typing them into the terminal reveal their code. Am I encountering functions compiled from some non-R language (C)? Regardless, I would greatly appreciate any help with my problem of transforming the model matrix before fitting. I'd very much prefer to use mgcv, with all of the work that has obviously gone into it, rather than reinventing the wheel. My code for reproducing the error is below. Thanks in advance for any help. Best, Andrew library(mgcv) gam mod.gam = function (formula, family = gaussian(), data = list(), weights = NULL, subset = NULL, na.action, offset = NULL, method = "GCV.Cp", optimizer = c("outer", "newton"), control = list(), scale = 0, select = FALSE, knots = NULL, sp = NULL, min.sp = NULL, H = NULL, gamma = 1, fit = TRUE, paraPen = NULL, G = NULL, in.out = NULL, ...) { control <- do.call("gam.control", control) if (is.null(G)) { gp <- interpret.gam(formula) cl <- match.call() mf <- match.call(expand.dots = FALSE) mf$formula <- gp$fake.formula mf$family <- mf$control <- mf$scale <- mf$knots <- mf$sp <- mf$min.sp <- mf$H <- mf$select <- mf$gamma <- mf$method <- mf$fit <- mf$paraPen <- mf$G <- mf$optimizer <- mf$in.out <- mf$... <- NULL mf$drop.unused.levels <- TRUE mf[[1]] <- as.name("model.frame") pmf <- mf mf <- eval(mf, parent.frame()) if (nrow(mf) < 2) stop("Not enough (non-NA) data to do anything meaningful") terms <- attr(mf, "terms") vars <- all.vars(gp$fake.formula[-2]) inp <- parse(text = paste("list(", paste(vars, collapse = ","), ")")) if (!is.list(data) && !is.data.frame(data)) data <- as.data.frame(data) dl <- eval(inp, data, parent.frame()) names(dl) <- vars var.summary <- variable.summary(gp$pf, dl, nrow(mf)) rm(dl) pmf$formula <- gp$pf pmf <- eval(pmf, parent.frame()) pterms <- attr(pmf, "terms") if (is.character(family)) family <- eval(parse(text = family)) if (is.function(family)) family <- family() if (is.null(family$family)) stop("family not recognized") if (family$family[1] == "gaussian" && family$link == "identity") am <- TRUE else am <- FALSE if (!control$keepData) rm(data) G <- gam.setup(gp, pterms = pterms, data = mf, knots = knots, sp = sp, min.sp = min.sp, H = H, absorb.cons = TRUE, sparse.cons = 0, select = select, idLinksBases = control$idLinksBases, scale.penalty = control$scalePenalty, paraPen = paraPen) G$var.summary <- var.summary G$family <- family if (ncol(G$X) > nrow(G$X) + nrow(G$C)) stop("Model has more coefficients than data") G$terms <- terms G$pterms <- pterms G$mf <- mf G$cl <- cl G$am <- am if (is.null(G$offset)) G$offset <- rep(0, G$n) G$min.edf <- G$nsdf - dim(G$C)[1] if (G$m) for (i in 1:G$m) G$min.edf <- G$min.edf + G$smooth[[i]]$null.space.dim G$formula <- formula environment(G$formula) <- environment(formula) } if (!fit) return(G) G$conv.tol <- control$mgcv.tol G$max.half <- control$mgcv.half G$X = G$X^2 object <- estimate.gam(G, method, optimizer, control, in.out, scale, gamma, ...) if (!is.null(G$L)) { object$full.sp <- as.numeric(exp(G$L %*% log(object$sp) + G$lsp0)) names(object$full.sp) <- names(G$lsp0) } names(object$sp) <- names(G$sp) object$paraPen <- G$pP object$formula <- G$formula object$var.summary <- G$var.summary object$cmX <- G$cmX object$model <- G$mf object$na.action <- attr(G$mf, "na.action") object$control <- control object$terms <- G$terms object$pterms <- G$pterms object$assign <- G$assign object$contrasts <- G$contrasts object$xlevels <- G$xlevels object$offset <- G$offset if (!is.null(G$Xcentre)) object$Xcentre <- G$Xcentre if (control$keepData) object$data <- data object$df.residual <- nrow(G$X) - sum(object$edf) object$min.edf <- G$min.edf if (G$am && !(method %in% c("REML", "ML", "P-ML", "P-REML"))) object$optimizer <- "magic" else object$optimizer <- optimizer object$call <- G$cl class(object) <- c("gam", "glm", "lm") object } x = rnorm(100) y = exp(y)+rnorm(100) m = gam(y~s(x)) modm = mod.gam(y~s(x)) [[alternative HTML version deleted]] ______________________________________________ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.