Reporting a possible inconsistency or bug in handling stringsAsFactors in as.data.frame.table()
Here is a simple test > options()$stringsAsFactors [1] TRUE > x<-c("a","b","c","a","b") > d<-as.data.frame(table(x)) > d x Freq 1 a 2 2 b 2 3 c 1 > class(d$x) [1] "factor" > d2<-as.data.frame(table(x),stringsAsFactors=F) > class(d2$x) [1] “character" > options(stringsAsFactors=F) > options()$stringsAsFactors [1] FALSE > d3<-as.data.frame(table(x)) > d3 x Freq 1 a 2 2 b 2 3 c 1 > class(d3$x) [1] “factor" > d4<-as.data.frame(table(x),stringsAsFactors=F) > class(d4$x) [1] “character" # Display the code showing the different stringsAsFactors handling in table and matrix: > as.data.frame.table function (x, row.names = NULL, ..., responseName = "Freq", stringsAsFactors = TRUE, sep = "", base = list(LETTERS)) { ex <- quote(data.frame(do.call("expand.grid", c(dimnames(provideDimnames(x, sep = sep, base = base)), KEEP.OUT.ATTRS = FALSE, stringsAsFactors = stringsAsFactors)), Freq = c(x), row.names = row.names)) names(ex)[3L] <- responseName eval(ex) } <bytecode: 0x28769f8> <environment: namespace:base> > as.data.frame.matrix function (x, row.names = NULL, optional = FALSE, make.names = TRUE, ..., stringsAsFactors = default.stringsAsFactors()) { d <- dim(x) nrows <- d[[1L]] ncols <- d[[2L]] ic <- seq_len(ncols) dn <- dimnames(x) if (is.null(row.names)) row.names <- dn[[1L]] collabs <- dn[[2L]] if (any(empty <- !nzchar(collabs))) collabs[empty] <- paste0("V", ic)[empty] value <- vector("list", ncols) if (mode(x) == "character" && stringsAsFactors) { for (i in ic) value[[i]] <- as.factor(x[, i]) } else { for (i in ic) value[[i]] <- as.vector(x[, i]) } autoRN <- (is.null(row.names) || length(row.names) != nrows) if (length(collabs) == ncols) names(value) <- collabs else if (!optional) names(value) <- paste0("V", ic) class(value) <- "data.frame" if (autoRN) attr(value, "row.names") <- .set_row_names(nrows) else .rowNamesDF(value, make.names = make.names) <- row.names value } <bytecode: 0x29995c0> <environment: namespace:base> > sessionInfo() R version 3.5.2 (2018-12-20) Platform: x86_64-pc-linux-gnu (64-bit) Running under: CentOS Linux 7 (Core) Matrix products: default BLAS: /usr/lib64/libblas.so.3.4.2 LAPACK: /usr/lib64/liblapack.so.3.4.2 locale: [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8 [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8 [7] LC_PAPER=en_US.UTF-8 LC_NAME=C [9] LC_ADDRESS=C LC_TELEPHONE=C [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C attached base packages: [1] stats graphics grDevices utils datasets methods base loaded via a namespace (and not attached): [1] compiler_3.5.2 tools_3.5.2 Thanks, Joe [[alternative HTML version deleted]] ______________________________________________ R-devel@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-devel