Dear R-devel, I've made some potential extensions to writeForeignSAS in 'foreign' that I wanted to pass along if anyone is interested. I've attached the diff -u output against the version found in foreign_0.8-15 and an .R file with my changes. (In this .R file, the function is named writeForeignSAS7 to simplify testing/comparisons.)
I've tried to alter the current version as little as possible while making the following changes: * Try to convert data.frame names to SAS-legal names and allow the user to specify an 8- or 32-character limit. * For factors, try to convert the variable name to a SAS-legal 8-character name not ending in a digit * Read in 'datafile' with DSD specified in the INFILE statement. SAS says this "changes how SAS treats delimiters when list input is used and sets the default delimiter to a comma. When you specify DSD, SAS treats two consecutive delimiters as a missing value and removes quotation marks from character values." The point of this is the added safety of using 'quote=TRUE' when writing 'datafile' via write.table * Functionality to write out Dates and read them in with an INFORMAT statement * Functionality to write out datetime variables (assuming a class of POSIXct) and read them in with an INFORMAT statement * In order to handle character variables a bit better, use a LENGTH statement to tell SAS the maximum character width of values in the variable. Without this, some character values can be truncated. If it'd be helpful to make any changes or add anything, I'd be happy try to do so. Finally, some testing code that works in SAS 6.12, 8.2, and 9. d <- structure(list(a.b = as.integer(c(1, 2)), alphabetsoup = structure(as.integer(c(1, 2)), .Label = c("A", "B"), class = "factor"), datevar1 = structure(c(13342, 12977), class = "Date"), datetimevar1 = structure(c(1152802685, 1152716285), class = c("POSIXt", "POSIXct")), charactervariable = c("L", "Last, First")), .Names = c("a.b", "alphabetsoup", "datevar1", "datetimevar1", "charactervariable"), row.names = c("1", "2"), class = "data.frame") require(foreign) ### adQuote here to (temporarily) avoid ':::' adQuote <- function (x) paste("\"", x, "\"", sep = "") dfile <- file.path(tempdir(), "test.dat") cfile <- file.path(tempdir(), "test.sas") write.foreign(d, datafile = dfile, codefile = cfile, package = "SAS7", validvarname = "V6") file.show(dfile) file.show(cfile) Sincerely, Stephen :::::::::::::::::::::::::::::::::: Stephen Weigand Division of Biostatistics Mayo Clinic Rochester, Minn., USA Phone (507) 266-1650, fax 284-9542
--- writeForeignSAS.R Fri Feb 17 03:30:53 2006 +++ /tmp/writeForeignSAS.R Thu Jul 13 12:24:24 2006 @@ -1,21 +1,52 @@ -writeForeignSAS<-function(df,datafile,codefile,dataname="rdata"){ +make.SAS.names <- function(varnames, validvarname = c("V7", "V6")){ + validvarname <- match.arg(validvarname) + nmax <- if(validvarname == "V7") 32 else 8 + x <- sub("^([0-9])", "_\\1", varnames) + x <- gsub("[^a-zA-Z0-9_]", "_", x) + x <- abbreviate(x, minlength = nmax) + + if (any(nchar(x) > nmax) || any(duplicated(x))) + stop("Cannot uniquely abbreviate the variable names to ", + nmax, " or fewer characters") + names(x) <- varnames + x +} + +make.SAS.formats <- function(varnames){ + x <- sub("^([0-9])", "_\\1", varnames) + x <- gsub("[^a-zA-Z0-9_]", "_", x) + x <- sub("([0-9])$", "\\1f", x) # can't end in digit so append 'f' + x <- abbreviate(x, minlength = 8) + + if(any(nchar(x) > 8) || any(duplicated(x))) + stop("Cannot uniquely abbreviate format names to conform to ", + " eight-character limit and not ending in a digit") + names(x) <- varnames + x +} + +writeForeignSAS7<-function(df,datafile,codefile,dataname="rdata", + validvarname = c("V7", "V6")){ factors <- sapply(df, is.factor) strings <- sapply(df, is.character) - + dates <- sapply(df, FUN = function(x) inherits(x, "Date")) + datetimes <- sapply(df, FUN = function(x) inherits(x, "POSIXct")) + varlabels <- names(df) - varnames <- abbreviate(names(df), 8) - if (any(sapply(varnames, nchar) > 8)) - stop("Cannot abbreviate the variable names to eight or fewer letters") - if (any(abbreviated <- (varnames != varlabels))) - message("Some variable names were abbreviated.") + varnames <- make.SAS.names(names(df), validvarname = validvarname) + if (any(varnames != varlabels)) + message("Some variable names were abbreviated or otherwise altered.") dfn<-df if (any(factors)) dfn[factors]<-lapply(dfn[factors], as.numeric) + if (any(datetimes)) + dfn[datetimes] <- lapply(dfn[datetimes], + FUN = function(x) format(x, "%d%b%Y %H:%M:%S")) write.table(dfn, file = datafile, row = FALSE, col = FALSE, - sep = ",", quote = FALSE, na = ".") + sep = ",", quote = TRUE, na = "") lrecl<-max(sapply(readLines(datafile),nchar))+4 cat("* Written by R;\n", file=codefile) @@ -22,24 +53,50 @@ cat("* ",deparse(sys.call(-2))[1],";\n\n",file=codefile,append=TRUE) if (any(factors)){ cat("PROC FORMAT;\n",file=codefile,append=TRUE) - for(v in 1:ncol(df)){ - if (factors[v]){ - cat("value ",varnames[v],"\n",file=codefile,append=TRUE) - values<-levels(df[[v]]) + fmtnames <- make.SAS.formats(varnames[factors]) + fmt.values <- lapply(df[, factors, drop = FALSE], levels) + names(fmt.values) <- fmtnames + for (f in fmtnames){ + cat("value",f,"\n",file=codefile,append = TRUE) + values<-fmt.values[[f]] for(i in 1:length(values)){ cat(" ",i,"=",adQuote(values[i]),"\n",file=codefile,append=TRUE) } cat(";\n\n",file=codefile,append=TRUE) - } - } + } } cat("DATA ",dataname,";\n",file=codefile,append=TRUE) + + if (any(strings)){ + cat("LENGTH", file = codefile, append = TRUE) + lengths <- sapply(df[,strings, drop = FALSE], + FUN = function(x) max(nchar(x))) + names(lengths) <- varnames[strings] + for(v in varnames[strings]) + cat("\n", v, "$", lengths[v],file=codefile,append=TRUE) + cat("\n;\n\n", file = codefile, append = TRUE) + } + + if (any(dates)){ + cat("INFORMAT", file = codefile, append = TRUE) + for(v in varnames[dates]) + cat("\n", v, file = codefile, append = TRUE) + cat("\n YYMMDD10.\n;\n\n", file = codefile, append = TRUE) + } + + if (any(datetimes)){ + cat("INFORMAT", file = codefile, append = TRUE) + for(v in varnames[datetimes]) + cat("\n", v, file = codefile, append = TRUE) + cat("\n DATETIME18.\n;\n\n", file = codefile, append = TRUE) + } + cat("INFILE ",adQuote(datafile), - "\n DELIMITER=','", + "\n DSD", "\n LRECL=",lrecl,";\n", file=codefile,append=TRUE) - + cat("INPUT",file=codefile,append=TRUE) for(v in 1:ncol(df)){ cat("\n",varnames[v],file=codefile,append=TRUE) @@ -49,16 +106,26 @@ cat("\n;\n",file=codefile,append=TRUE) for(v in 1:ncol(df)){ - if (abbreviated[v]) + if (varnames[v] != names(varnames)[v]) cat("LABEL ",varnames[v],"=",adQuote(varlabels[v]),";\n", file=codefile,append=TRUE) - } - - for(v in 1:ncol(df)){ - if(factors[v]) - cat("FORMAT ",varnames[v],paste(varnames[v],".",sep=""),";\n", + } + + if (any(factors)){ + for (f in 1:length(fmtnames)) + cat("FORMAT", names(fmtnames)[f],paste(fmtnames[f],".",sep = ""),";\n", file=codefile,append=TRUE) } - + + if (any(dates)){ + for(v in varnames[dates]) + cat("FORMAT", v, "yymmdd10.;\n", file = codefile, append = TRUE) + } + + if (any(datetimes)){ + for(v in varnames[datetimes]) + cat("FORMAT", v, "datetime18.;\n", file = codefile, append = TRUE) + } + cat("RUN;\n",file=codefile,append=TRUE) }
make.SAS.names <- function(varnames, validvarname = c("V7", "V6")){ validvarname <- match.arg(validvarname) nmax <- if(validvarname == "V7") 32 else 8 x <- sub("^([0-9])", "_\\1", varnames) x <- gsub("[^a-zA-Z0-9_]", "_", x) x <- abbreviate(x, minlength = nmax) if (any(nchar(x) > nmax) || any(duplicated(x))) stop("Cannot uniquely abbreviate the variable names to ", nmax, " or fewer characters") names(x) <- varnames x } make.SAS.formats <- function(varnames){ x <- sub("^([0-9])", "_\\1", varnames) x <- gsub("[^a-zA-Z0-9_]", "_", x) x <- sub("([0-9])$", "\\1f", x) # can't end in digit so append 'f' x <- abbreviate(x, minlength = 8) if(any(nchar(x) > 8) || any(duplicated(x))) stop("Cannot uniquely abbreviate format names to conform to ", " eight-character limit and not ending in a digit") names(x) <- varnames x } writeForeignSAS7<-function(df,datafile,codefile,dataname="rdata", validvarname = c("V7", "V6")){ factors <- sapply(df, is.factor) strings <- sapply(df, is.character) dates <- sapply(df, FUN = function(x) inherits(x, "Date")) datetimes <- sapply(df, FUN = function(x) inherits(x, "POSIXct")) varlabels <- names(df) varnames <- make.SAS.names(names(df), validvarname = validvarname) if (any(varnames != varlabels)) message("Some variable names were abbreviated or otherwise altered.") dfn<-df if (any(factors)) dfn[factors]<-lapply(dfn[factors], as.numeric) if (any(datetimes)) dfn[datetimes] <- lapply(dfn[datetimes], FUN = function(x) format(x, "%d%b%Y %H:%M:%S")) write.table(dfn, file = datafile, row = FALSE, col = FALSE, sep = ",", quote = TRUE, na = "") lrecl<-max(sapply(readLines(datafile),nchar))+4 cat("* Written by R;\n", file=codefile) cat("* ",deparse(sys.call(-2))[1],";\n\n",file=codefile,append=TRUE) if (any(factors)){ cat("PROC FORMAT;\n",file=codefile,append=TRUE) fmtnames <- make.SAS.formats(varnames[factors]) fmt.values <- lapply(df[, factors, drop = FALSE], levels) names(fmt.values) <- fmtnames for (f in fmtnames){ cat("value",f,"\n",file=codefile,append = TRUE) values<-fmt.values[[f]] for(i in 1:length(values)){ cat(" ",i,"=",adQuote(values[i]),"\n",file=codefile,append=TRUE) } cat(";\n\n",file=codefile,append=TRUE) } } cat("DATA ",dataname,";\n",file=codefile,append=TRUE) if (any(strings)){ cat("LENGTH", file = codefile, append = TRUE) lengths <- sapply(df[,strings, drop = FALSE], FUN = function(x) max(nchar(x))) names(lengths) <- varnames[strings] for(v in varnames[strings]) cat("\n", v, "$", lengths[v],file=codefile,append=TRUE) cat("\n;\n\n", file = codefile, append = TRUE) } if (any(dates)){ cat("INFORMAT", file = codefile, append = TRUE) for(v in varnames[dates]) cat("\n", v, file = codefile, append = TRUE) cat("\n YYMMDD10.\n;\n\n", file = codefile, append = TRUE) } if (any(datetimes)){ cat("INFORMAT", file = codefile, append = TRUE) for(v in varnames[datetimes]) cat("\n", v, file = codefile, append = TRUE) cat("\n DATETIME18.\n;\n\n", file = codefile, append = TRUE) } cat("INFILE ",adQuote(datafile), "\n DSD", "\n LRECL=",lrecl,";\n", file=codefile,append=TRUE) cat("INPUT",file=codefile,append=TRUE) for(v in 1:ncol(df)){ cat("\n",varnames[v],file=codefile,append=TRUE) if(strings[v]) cat(" $ ",file=codefile,append=TRUE) } cat("\n;\n",file=codefile,append=TRUE) for(v in 1:ncol(df)){ if (varnames[v] != names(varnames)[v]) cat("LABEL ",varnames[v],"=",adQuote(varlabels[v]),";\n", file=codefile,append=TRUE) } if (any(factors)){ for (f in 1:length(fmtnames)) cat("FORMAT", names(fmtnames)[f],paste(fmtnames[f],".",sep = ""),";\n", file=codefile,append=TRUE) } if (any(dates)){ for(v in varnames[dates]) cat("FORMAT", v, "yymmdd10.;\n", file = codefile, append = TRUE) } if (any(datetimes)){ for(v in varnames[datetimes]) cat("FORMAT", v, "datetime18.;\n", file = codefile, append = TRUE) } cat("RUN;\n",file=codefile,append=TRUE) }
______________________________________________ R-devel@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-devel