An alternative solution that allows you to break it down by categories is in
the functions below.  Comments/suggestions welcome and encouraged.

Note that much thanks is due to those who responded to an earlier post of
mine on a similar topic.

To use (assuming you have a data.frame falled pathDist with columns
distances (your numeric data of interest), Capacity, Surface, and SITE
(vectors of categories)):
ops=c(quote(mean),quote(median),quote(sd),quote(length))
ops.desc=list(mean="Mean",median="Median",sd="S.D.",length="N")
pathDist.indices=list
(Surface=pathDist$Surface,Capacity=pathDist$Capacity,Site=pathDist$SITE)
pathDist.bytable=bytable(pathDist$distances,pathDist.indices,ops=ops,
ops.desc=ops.desc,na.rm=TRUE)
pathDist.table=latex.table.by(pathDist.bytable,num.by.vars=length(
pathDist.indices),caption="Path Characteristics")
print(pathDist.table,
type="latex",file="E:/xdrive/projects/Ghana_air/backpack_data/plots/pathDist.table.tex",
include.rownames = FALSE, include.colnames = TRUE, sanitize.text.function =
force, tabular.environment='longtable', floating=FALSE)





# Make a table by group
# Usage:
#   print(latex.table.by(test.df), include.rownames = FALSE,
include.colnames = TRUE, sanitize.text.function = force)
#   then add \usepackage{multirow} to the preamble of your LaTeX document
#   for longtable support, add ,tabular.environment='longtable' to the print
command (plus add in ,floating=FALSE), then \usepackage{longtable} to the
LaTeX preamble
latex.table.by = function(df,num.by.vars=1,...) {
    # first num.by.vars groups must be sorted and in descending order of
priority
    if(!is.numeric(num.by.vars) | length(num.by.vars)!=1) {
        stop("num.by.vars must be a number")
    }
    # Create a by.vars vector
    by.vars=1:num.by.vars

    numcols=length(colnames(df))
    df.original=df

    # Initialize our clines variable (gives the start column of the cline
for each row)
    clines = rep(num.by.vars+1,length(df[[1]]))
    # - Make grouping columns multirow - #
    for(b in rev(by.vars)) {

        # Create a groups variable for all by.vars up to the current one
        groups=rep("",length(df[[b]]))
        for(by.vars.index in 1:b) {
            groups = paste(groups,df.original[[by.vars.index]],sep="")
        }
        # Add multirow code to current column according to the groups
pattern
        df[[b]] <- as.character(df[[b]])
        rle.lengths <- rle(groups)$lengths
        first <- !duplicated(groups)
        df[[b]][!first] <- ""
        df[[b]][first] <- paste("\\multirow{", rle.lengths, "}{*}{",
df[[b]][first], "}")

        # Store this by.var's information in the clines variable
        clines[first]=b
    }

    # Specify horizontal lines wherever all combinations of grouping
variables change
    df[[1]]<-paste("\\cline{",clines,"-",numcols,"}",df[[1]],sep="")


    align.by.vars = sapply(list(rep("|c", (length(by.vars)+1)
)),paste,collapse="")
    align.other.vars = sapply(list(rep("r|", (length(colnames(df))-length(
by.vars)) )),paste,collapse="")
    align.df = paste("|", align.by.vars , "|" , align.other.vars ,sep="")

    xt=xtable(df, align = align.df,...)


    return(xt)

}


bytable = function(datavec,indices,ops=c(quote(mean)),ops.desc=list
(mean="Mean"),na.rm=TRUE,...) {
    groups=as.character()
    combinations.others=c()

    # indices should be a list of grouping vectors, just like you would pass
to -by-, but with sensible names for each vector
    if(!is.list(indices)) {
        stop("indices needs to be a list")
    }
    # Create a selector variable from the indices given as a list
    if(length(indices) > 1) {
        for(indexnum in length(indices):1) {
            groups=paste(groups,indices[[indexnum]],sep="")
        }
    }
    if(length(indices)==1) {
        groups=indices[[1]]
    }
    first=!duplicated(groups)

    # Initialize data frame with grouping variables (indices)
    bynames=dimnames(by(datavec,indices,function(x) x=1)) # run a dummy by
statement to get the name order out...highly inefficient...could use
indices.levels=lapply(indices,function(x) x[!duplicated(x)]) instead, as
long as we're sure the ordering is the same
    for(indexnum in length(indices):1) {
        # get the number of combinations of other index levels after this
one (e.g. the number of replicates we need to make of each one in this
index)
        others.selector=rep(TRUE,length(indices))
        others.selector[length(indices):indexnum]=FALSE
        numcombinations.others = prod(unlist(subset(lapply(bynames,length),
others.selector)))
        # Replicate each level of this index the number of existing
combinations of other indices
        newcolumn=rep(bynames[[indexnum]],each=numcombinations.others)

        if(indexnum==length(indices)) { # first run
            by.df=data.frame(newcolumn)
        }
        if(indexnum!=length(indices)) {
            # newcolumn is too short by some multiple so we have to fix that
            newcolumn=rep(newcolumn, length(rownames(by.df))/length(newcolumn)
)
            # now attach our new column
            by.df=cbind(by.df,newcolumn)
        }
    }

    colnames(by.df)<-rev(names(indices))



    # Run -by- for each operation
    for(op in ops) {
        by.df[[deparse(op)]]=as.numeric(by(datavec,indices,eval(op)))
        colnames(by.df)[ colnames(by.df)==deparse(op) ] = ops.desc
[[deparse(op)]]
    }

    if(na.rm) {
        #this assumes that the NA's in the last one will be the same as the
NA's in all ops
        by.df=subset(by.df,!is.na(by.df[[length(by.df)]]))
    }

    return(by.df)
}

        [[alternative HTML version deleted]]

______________________________________________
R-help@r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.

Reply via email to