Thanks again for all the help, now I was able to write the function I need:
namax <- function(m,mp) {
# arguments: matrix, maximum percentage of NA values allowed in rows/colums
c1 <- 0
c2 <- 0
repeat {
nas1 <- rowMeans(is.na(m))
nas2 <- colMeans(is.na(m))
if (max(nas1)<mp/100 && max(nas2)<mp/100) {break}
if (max(nas1)>max(nas2)) {
idx <- which(nas1==max(nas1))[1]
iname <- sub('[[:space:]]+$', '', names(idx))
cat("Row",iname,"contains",max(nas1)*100,"percent
NAs.\n")
m <- m[-idx,]
c1 <- c1 + 1
}
else {
idx <- which(nas2==max(nas2))[1]
iname <- sub('[[:space:]]+$', '', names(idx))
cat("Column",iname,"contains",max(nas2)*100,"percent
NAs.\n")
m <- m[,-idx]
c2 <- c2 + 1
}
}
cat("Removed",c1,"rows and",c2,"columns.\n")
return(m)
}
--
View this message in context:
http://www.nabble.com/NA-values-trimming-tp24339399p24352605.html
Sent from the R help mailing list archive at Nabble.com.
______________________________________________
[email protected] mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.