When attempting to merge 3 data frames, one of which has fewer columns than the others, rbind.data.frame correctly refuses to perform the bind. However, the error message given is a bit obscure due to a logical bug in the match.names() internal function to rbind.data.frame.
Illustration: ## Three data frames with same column variable names: > foo <- data.frame(v1 = c('a', 'b'), v2 = c(1, 2), v3 = ordered(c('x', 'y'))) > bar <- data.frame(v2 = c(3, 4), v3 = ordered(c('x', 'y')), v1 = c('c', 'd')) > baz <- data.frame(v1 = c('a', 'e'), v2 = c(5, 6), v3 = ordered(c('x', 'z'))) > rbind(foo, bar, baz) v1 v2 v3 1 a 1 x 2 b 2 y 3 c 3 x 4 d 4 y 5 a 5 x 6 e 6 z > ## All is fine. ## Third data frame has a different third column variable > fifi <- data.frame(v1 = c('a', 'e'), v2 = c(9, 10), v4 = ordered(c('y', 'z'))) > rbind(foo, bar, fifi) Error in match.names(clabs, names(xi)) : names do not match previous names: v4 > ## Output looks fine. ## Third data frame is missing a column. > gaga <- data.frame(v1 = c('a', 'e'), v2 = c(7, 8)) > rbind(foo, bar, gaga) Error in paste(nmi[nii == 0], collapse = ", ") : object "nii" not found In addition: Warning message: longer object length is not a multiple of shorter object length in: clabs == nmi > This somewhat cryptic error message results because the second portion of the second if() test clause in the match.names() function is not evaluated, as it is the second argument to function '&&' and the first argument (length(nmi) == length(clabs)) evaluates to FALSE. Thus variable nii is not defined and the stop() command at the end of match.names() throws an (unintended?) error. If I switch the order of these two arguments to '&&' (see my test function sm.rbind.data.frame below) I get what appears to be the intended output given an input data frame with a missing column (relative to the prior data frames): > sm.rbind.data.frame(foo, bar, gaga) Error in match.names(clabs, names(xi)) : names do not match previous names: In addition: Warning message: longer object length is not a multiple of shorter object length in: clabs == nmi Is this a possible fix for rbind.data.frame, or would the changed order break something else? ## Modified function handles all the above test cases properly > sm.rbind.data.frame(foo, bar, baz) v1 v2 v3 1 a 1 x 2 b 2 y 3 c 3 x 4 d 4 y 5 a 5 x 6 e 6 z > sm.rbind.data.frame(foo, bar, fifi) Error in match.names(clabs, names(xi)) : names do not match previous names: v4 > sm.rbind.data.frame(foo, bar, gaga) Error in match.names(clabs, names(xi)) : names do not match previous names: In addition: Warning message: longer object length is not a multiple of shorter object length in: clabs == nmi > ### rbind.data.frame with modified match.names() sm.rbind.data.frame <- function (..., deparse.level = 1) { match.names <- function(clabs, nmi) { if (all(clabs == nmi)) NULL ### Switched order of args to '&&' in following if() test clause else if (all(nii <- match(nmi, clabs, 0)) && length(nmi) == length(clabs)) { m <- pmatch(nmi, clabs, 0) if (any(m == 0)) stop("names do not match previous names") m } else stop("names do not match previous names:\n\t", paste(nmi[nii == 0], collapse = ", ")) } Make.row.names <- function(nmi, ri, ni, nrow) { if (nchar(nmi) > 0) { if (ni == 0) character(0) else if (ni > 1) paste(nmi, ri, sep = ".") else nmi } else if (nrow > 0 && identical(ri, 1:ni)) as.integer(seq.int(from = nrow + 1, length = ni)) else ri } allargs <- list(...) allargs <- allargs[sapply(allargs, length) > 0] n <- length(allargs) if (n == 0) return(structure(list(), class = "data.frame", row.names = integer())) nms <- names(allargs) if (is.null(nms)) nms <- character(length(allargs)) cl <- NULL perm <- rows <- rlabs <- vector("list", n) nrow <- 0 value <- clabs <- NULL all.levs <- list() for (i in 1:n) { xi <- allargs[[i]] nmi <- nms[i] if (is.matrix(xi)) allargs[[i]] <- xi <- as.data.frame(xi) if (inherits(xi, "data.frame")) { if (is.null(cl)) cl <- oldClass(xi) ri <- attr(xi, "row.names") ni <- length(ri) if (is.null(clabs)) clabs <- names(xi) else { pi <- match.names(clabs, names(xi)) if (!is.null(pi)) perm[[i]] <- pi } rows[[i]] <- seq.int(from = nrow + 1, length = ni) rlabs[[i]] <- Make.row.names(nmi, ri, ni, nrow) nrow <- nrow + ni if (is.null(value)) { value <- unclass(xi) nvar <- length(value) all.levs <- vector("list", nvar) has.dim <- logical(nvar) facCol <- logical(nvar) ordCol <- logical(nvar) for (j in 1:nvar) { xj <- value[[j]] if (!is.null(levels(xj))) { all.levs[[j]] <- levels(xj) facCol[j] <- TRUE } else facCol[j] <- is.factor(xj) ordCol[j] <- is.ordered(xj) has.dim[j] <- length(dim(xj)) == 2 } } else for (j in 1:nvar) { xij <- xi[[j]] if (is.null(pi) || is.na(jj <- pi[[j]])) jj <- j if (facCol[jj]) { if (length(lij <- levels(xij)) > 0) { all.levs[[jj]] <- unique(c(all.levs[[jj]], lij)) ordCol[jj] <- ordCol[jj] & is.ordered(xij) } else if (is.character(xij)) all.levs[[jj]] <- unique(c(all.levs[[jj]], xij)) } } } else if (is.list(xi)) { ni <- range(sapply(xi, length)) if (ni[1] == ni[2]) ni <- ni[1] else stop("invalid list argument: all variables should have the same length") rows[[i]] <- ri <- as.integer(seq.int(from = nrow + 1, length = ni)) nrow <- nrow + ni rlabs[[i]] <- Make.row.names(nmi, ri, ni, nrow) if (length(nmi <- names(xi)) > 0) { if (is.null(clabs)) clabs <- nmi else { tmp <- match.names(clabs, nmi) if (!is.null(tmp)) perm[[i]] <- tmp } } } else if (length(xi) > 0) { rows[[i]] <- nrow <- nrow + 1 rlabs[[i]] <- if (nchar(nmi) > 0) nmi else as.integer(nrow) } } nvar <- length(clabs) if (nvar == 0) nvar <- max(sapply(allargs, length)) if (nvar == 0) return(structure(list(), class = "data.frame", row.names = integer())) pseq <- 1:nvar if (is.null(value)) { value <- list() value[pseq] <- list(logical(nrow)) } names(value) <- clabs for (j in 1:nvar) if (length(lij <- all.levs[[j]]) > 0) value[[j]] <- factor(as.vector(value[[j]]), lij, ordered = ordCol[j]) if (any(has.dim)) { rmax <- max(unlist(rows)) for (i in (1:nvar)[has.dim]) if (!inherits(xi <- value[[i]], "data.frame")) { dn <- dimnames(xi) rn <- dn[[1]] if (length(rn) > 0) length(rn) <- rmax pi <- dim(xi)[2] length(xi) <- rmax * pi value[[i]] <- array(xi, c(rmax, pi), list(rn, dn[[2]])) } } for (i in 1:n) { xi <- unclass(allargs[[i]]) if (!is.list(xi)) if (length(xi) != nvar) xi <- rep(xi, length.out = nvar) ri <- rows[[i]] pi <- perm[[i]] if (is.null(pi)) pi <- pseq for (j in 1:nvar) { jj <- pi[j] xij <- xi[[j]] if (has.dim[jj]) { value[[jj]][ri, ] <- xij rownames(value[[jj]])[ri] <- rownames(xij) } else { value[[jj]][ri] <- if (is.factor(xij)) as.vector(xij) else xij if (!is.null(nm <- names(xij))) names(value[[jj]])[ri] <- nm } } } rlabs <- unlist(rlabs) if (any(duplicated(rlabs))) rlabs <- make.unique(as.character(unlist(rlabs)), sep = "") if (is.null(cl)) { as.data.frame(value, row.names = rlabs) } else { class(value) <- cl attr(value, "row.names") <- rlabs value } } Steven McKinney Statistician Molecular Oncology and Breast Cancer Program British Columbia Cancer Research Centre email: [EMAIL PROTECTED] tel: 604-675-8000 x7561 BCCRC Molecular Oncology 675 West 10th Ave, Floor 4 Vancouver B.C. V5Z 1L3 Canada ______________________________________________ R-devel@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-devel