Hi AK,
Thanks very much.
I did send you another email with a larger Sample.zip file. The
Quantilecode.R which you initially developed for a smaller sample.zip did
not complete the task when I used it for a larger data set. Please check to
rectify the error message.
Thanks,
Atem.
-- Original Message --
From : arun
To : R. Help;
Cc : Zilefac Elvis;
Sent : 14-04-2014 18:57
Subject : Re: Quantile and rowMean from multiple files in a folder
Hi Atem,
I guess this is what you wanted.
###Q1:
###
###working directory: Observed
#Only one file per Site. Assuming this is the case for the full dataset, then
I guess there is no need to average
dir.create("final")
lst1 <- split(list.files(pattern = ".csv"), gsub("\\_.*", "", list.files(patter
n = ".csv")))
lst2 <- lapply(lst1,function(x1) lapply(x1, function(x2) {lines1 <- readLines(
x2); header1 <- lines1[1:2]; dat1 <- read.table(text=lines1,header=FALSE,sep=",
",stringsAsFactors=FALSE, skip=2); colnames(dat1) <- Reduce(paste,strsplit(head
er1,","));dat1[-c(nrow(dat1),nrow(dat1)-1),]}))
#different number of rows
sapply(seq_along(lst2),function(i){lstN <- lapply(lst2[[i]],function(x) x[,-1]
);sapply(lstN,function(x) nrow(x))})
#[1] 9 9 9 8 2 9
#difference in number of columns
sapply(seq_along(lst2),function(i) {sapply(lst2[[i]],function(x) ncol(x))})
#[1] 157 258 258 98 157 258
library(plyr)
library(stringr)
lst3 <- setNames(lapply(seq_along(lst2),function(i) {lapply(lst2[[i]],function(
x) {names(x)[-1] <- paste(names(x)[-1], names(lst1)[i],sep="_"); names(x) <- st
r_trim(names(x)); x})[[1]]}), names(lst1))
df1 <- join_all(lst3,by="Year")
dim(df1)
#[1]9 1181
sapply(split(names(df1)[-1] ,gsub(".*\\_","",names(df1)[-1])),function(x) {df2
<- df1[,x];df3 <- data.frame(Percentiles=paste0(seq(0,100, by=1) ,"%"), numcolw
ise(function(y) quantile(y,seq(0,1,by=0.01),na.rm=TRUE))(df2),stringsAsFactors=
FALSE);ncol(df3) })
#G100 G101 G102 G103 G104 G105
# 157 258 258 98 157 258
lst4 <- split(names(df1)[-1] ,gsub(".*\\_","",names(df1)[-1]))
lapply(seq_along(lst4),function(i) {df2 <- df1[,lst4[[i]]]; df3 <- data.frame(P
ercentiles=paste0(seq(0,100, by=1) ,"%"), numcolwise(function(y) quantile(y,seq
(0,1,by=0.01),na.rm=TRUE))(df2),stringsAsFactors=FALSE);df3[1:3,1:3]; write.csv
(df3,paste0(paste(getwd(), "final",paste(names(lst1)[[i]],"Quantile",sep="_"),s
ep="/"),".csv"),row.names=FALSE,quote=FALSE)})
ReadOut1 <- lapply(list.files(recursive=TRUE)[grep("Quantile",list.files(recurs
ive=TRUE))],function(x) read.csv(x,header=TRUE,stringsAsFactors=FALSE))
sapply(ReadOut1,dim)
# [,1] [,2] [,3] [,4] [,5] [,6]
#[1,] 101 101 101 101 101 101
#[2,] 157 258 258 98 157 258
lapply(ReadOut1,function(x) x[1:2,1:3])[1:3]
#[[1]]
# Percentiles pav.DJF_G100 pav.MAM_G100
#1 0%0 0.640500
#2 1%0 0.664604
#
#[[2]]
# Percentiles txav.DJF_G101 txav.MAM_G101
#1 0% -13.8756 4.742400
#2 1% -13.8140 4.817184
#
#[[3]]
# Percentiles txav.DJF_G102 txav.MAM_G102
#1 0% -15.05000 4.520700
#2 1% -14.96833 4.543828
#
###Q2:
###Observed data
dir.create("Indices")
names1 <- unlist(lapply(ReadOut1,function(x)
names(x)[-1]))
names2 <- gsub("\\_.*","",names1)
names3 <- unique(gsub("[.]", " ", names2))
res <- do.call(rbind,lapply(seq_along(lst4),function(i) {df2 <- df1[,lst4[[i]]]
;vec1 <- colMeans(df2,na.rm=TRUE); vec2 <- rep(NA,length(names3));names(vec2) <
- paste(names3,names(lst4)[[i]],sep="_"); vec2[names(vec2) %in% names(vec1)] <-
vec1; names(vec2) <- gsub("\\_.*","",names(vec2)); vec2 }))
lapply(seq_len(ncol(res)),function(i) {mat1 <- t(res[,i,drop=FALSE]);colnames(m
at1) <- names(lst4); write.csv(mat1,paste0(paste(getwd(),"Indices", gsub(" ","_
",rownames(mat1)),sep="/"),".csv"),row.names=FALSE,quote=FALSE)})
##Output2:
ReadOut2 <- lapply(list.files(recursive=TRUE)[grep("Indices",list.files(recursi
ve=TRUE))],function(x) read.csv(x,header=TRUE,stringsAsFactors=FALSE))
length(ReadOut2)
#[1] 257
list.files(recursive=TRUE)[grep("Indices",list.files(recursive=TRUE))][1]
#[1] "Indices/pav_ANN.csv"
res[,"pav ANN",drop=FALSE]
# pav ANN
#[1,] 1.298811
#[2,] 7.642922
#[3,] 6.740011
#[4,] NA
#[5,] 1.296650
#[6,] 6.887622
ReadOut2[[1]]
# G100 G101 G102 G103G104 G105
#1 1.298811 7.642922 6.740011 NA 1.29665 6.887622
###Sample data
###Working directory changed to "sample"
dir.create("Indices_colMeans")
lst1 <- split(list.files(pattern=".csv"),gsub("\\_.*","",list.files(pattern=".c
sv")))
lst2 <- lapply(lst1,function(x1) lapply(x1, function(x2) {lines1 <- readLines(
x2); header1 <- lines1[1:2]; dat1 <- read.table(text=lines1,header=FALSE,sep=",
",stringsAsFactors=FALSE, skip=2); colnames(dat1) <- Reduce(paste,strsplit(head
er1,","));dat1[-c(nrow(dat1),nrow(dat1)-1),]}))
res1