Thanks again for all the help, now I was able to write the function I need:

namax <- function(m,mp) {
# arguments: matrix, maximum percentage of NA values allowed in rows/colums
        c1 <- 0
        c2 <- 0
        repeat {
                nas1 <- rowMeans(is.na(m))
                nas2 <- colMeans(is.na(m))
                if (max(nas1)<mp/100 && max(nas2)<mp/100) {break}
                if (max(nas1)>max(nas2)) {
                        idx <- which(nas1==max(nas1))[1]
                        iname <- sub('[[:space:]]+$', '', names(idx))
                        cat("Row",iname,"contains",max(nas1)*100,"percent 
NAs.\n")
                        m <- m[-idx,]
                        c1 <- c1 + 1
                }
                else {
                        idx <- which(nas2==max(nas2))[1]
                        iname <- sub('[[:space:]]+$', '', names(idx))
                        cat("Column",iname,"contains",max(nas2)*100,"percent 
NAs.\n")
                        m <- m[,-idx]
                        c2 <- c2 + 1
                }
        }
        cat("Removed",c1,"rows and",c2,"columns.\n")
        return(m)
}

-- 
View this message in context: 
http://www.nabble.com/NA-values-trimming-tp24339399p24352605.html
Sent from the R help mailing list archive at Nabble.com.

______________________________________________
R-help@r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.

Reply via email to