Dear R users, A colleague of mine asked me how to write a script (an executable text file containing R code) in R. After I showed him, he said that after extensive searching of the R archives, he had not found anything like these techniques.
He suggested that I share these methods to enable others to leverage R as a better alternative to bash/perl scripts. So in the interest of giving back to the R community, and with all humility, I offer the following small demonstration of one method for creating scripts of R code that are executable from the (at least Linux) command line. I don't make any warrantees that this will work for you, but if it helps somebody at least get starting utilizing R effectively in scripts, then great! Best regards, Jason -- Jason E. Aten, Ph.D. # file: scriptdemo.rsh #!/bin/bash exec R --vanilla -q --slave -e "source(file=pipe(\"tail -n +4 $0\"))" --args $@ #debug: exec R --vanilla --verbose -e "source(file=pipe(\"tail -n +4 $0\"))" --args $@ ### The above line starts R and then reads in this script, starting at line 4: # # scriptdemo.rsh : a simple filter script to demonstrate how to write a script in R that # reads stdin and utilizes command line argv. Also shows how to use ppp() to do # bash scripting like variable substitution, which is really just syntactic # sugar. But sugar can be sweet. # # NB: Only tested on Linux, YMMV, and you may have to adapt to your OS. If it breaks, you # get to keep both pieces. # 1st point of note: notice the exec R invocation above, with the pipe and tail combo. # This file becomes the program read into R. If it is set chmod+x then you can execute this file. pp=function(...) paste(sep="",...) script="scriptdemo.rsh" usage=pp(script,": put help info here") argv = commandArgs(trailingOnly=TRUE) # --help if(any(argv=="--help")) { cat(usage) quit(save="no",status=0) } # 2nd point of note: this is how to read stdin inside a script: # # slurp in all the input r=readLines("stdin") bad=grep("^#",r) # remove comments # write out lines that didn't start with # cat(r[setdiff(1:length(r),bad)],sep="\n") # 3rd point of note: if you want nice bash shell scripting string substitution and backticking # you can use my ppp() function. Note it's not well vectorized at the moment, so it will expect # variables that are substituted from the environment to be of length 1. # A bit hackish in places (sure the |@|@| and 34HERE43 stuff makes me wince), but it gets the job done, # as it's meant as a proof of concept. ########################## # utility functions leading up to final definition of ppp() : shell scripting like facilities for R # Skip to the end of this file to see what ppp() does for you. ########################## # delete one trailing whitespace chomp=function(x) { n=nchar(x) a=substr(x,n,n) w=which(a==" " | a == "\n" | a=="\t") if (length(w)) { x[w]=substr(x[w],1,n[w]-1) } x } # delete one leading whitespace prechomp=function(x) { n=nchar(x) a=substr(x,1,1) w=which(a==" " | a == "\n" | a == "\t") if (length(w)) { x[w]=substr(x[w],2,n[w]) } x } # eliminate whitespace leading/trailing from a string trim=function(x) { y=chomp(x) while(any(y!=x)) { x=y y=chomp(x) } y=prechomp(x) while(any(y!=x)) { x=y y=prechomp(x) } x } strsplit2=function(x,split,...) { # detect trailing split : and add "" afterwards, so we know if it was there. a=strsplit(pp(x,"|@|@|"),split,...) lapply(a,function(x) gsub("|@|@|","",x,fixed=TRUE)) } strsplit3=function(x,split,keepsplit=FALSE,...) { if (keepsplit) { repstring="34HERE43" if (length(grep(repstring,x))) { die(repstring, " repstring already found. Arg! Aborting") } # sanity check # note where we want to split, using \\1 backref to keep the original a=gsub(pattern=pp("(",split,")") ,replacement=pp(repstring,"\\1"),x) } else { a=x repstring=split } b=strsplit2(a,repstring,...) # split, keeping the original delimiters } pp=function(...) paste(...,sep="") # pp() must be defined in outermost scope for ppp() to work replacer=function(s,begin.string="${",end.string="}",keepend=FALSE,require.end=TRUE) { translate.env=function(x) { if (exists(x)) return(x) a=Sys.getenv(x) if (a!="") return(pp("\"",a,"\"")) x } parts=strsplit2(s,begin.string,fixed=T)[[1]] if (length(parts) < 2 || all(parts=="")) return(s) if (any(trim(parts[-1])=="")) { warning(pp("ppp::replacer(): found begin.string '",begin.string,"' in '",s,"' but had empty/blankspace/end of string following it.")) return(s) } collap=c() collap[1]=parts[1] for (i in 2:length(parts)) { tmp=strsplit3(parts[i],end.string,keepsplit=keepend)[[1]] if (length(tmp)==1) { if (require.end) { warning(pp("ppp::replacer(): could not find end.string '",end.string,"' in string '",s,"' and require.end=TRUE, so karping.")) collap[(i-1)*2]=parts[i] collap[(i-1)*2+1]="" } else { collap[(i-1)*2]=pp("\",",translate.env(tmp[1]),",\"") # allow newline to terminate as well, if end not required collap[(i-1)*2+1]="" } } else { collap[(i-1)*2]=pp("\",",translate.env(tmp[1]),",\"") # collect the rest of parts[i] following tmp[1] and the end.string (assumes end.string is only ever length 1) collap[(i-1)*2+1]=substr(parts[i],nchar(tmp[1])+1+(1-as.numeric(keepend)),nchar(parts[i])) } } text=pp("pp(\"",pp(collap,collapse=""),"\")") # sys.frame(-2) is necessary to get definitions from calling function before where we were defined. if (sys.nframe() > 1) { ftext=eval(parse(text=text),envir=sys.frame(-2)) } else { ftext=eval(parse(text=text)) } ftext } pp=function(...) paste(...,sep="") # must be defined in outermost scope for ppp() to work # shell like string interpolation... ppp("fill in ${myvar} here after `hostname` is $myvar") ppp=function(...) { sa=paste(sep="",...) res=c() for (j in 1:length(sa)) { s=sa[j] s2=replacer(s,"${","}") terminators="\t| |\\.|`|\\$|\\{|\\}|\\(|\\)|<|>|\\|" s3=replacer(s2,"$",terminators,keepend=TRUE,require.end=FALSE) # !require.end allows end of line termination res[j]=s3 } do.sys.expecting.output=function(cmd) { got=pp(system(intern=T,cmd),collapse="\n") if (got=="") die("do.sys() on '",cmd,"' returned no output.") got } # now check for backtick system call requests as well, *after* variable substitution is all finished. bt=grep("`",res) if(length(bt)) { sa=res[bt] for (j in 1:length(sa)) { s=sa[j] parts=strsplit2(s,"`",fixed=T)[[1]] if (length(parts) < 3) { res[bt[j]]=s; next; } collap=c() collap[1]=parts[1] for (i in seq(2,length(parts)-1,2)) { cmd=parts[i] collap[i]=do.sys.expecting.output(cmd) collap[i+1]=parts[i+1] } text=pp(collap,collapse="") res[bt[j]]=text } # for j } #end if length(bt) res } # # now demonstrate the use of ppp() in a scripting context: # today="date" month=3 year=2010 show=list() show$syntax = 43 Sys.setenv(AN_ENV_VAR="greetings (I'm an env var!)") demo=ppp("Getting $AN_ENV_VAR from the environment, on `$today`, substituting ${show$syntax} in named lists is also possible. `cal $month $year| head` ") cat("here's the demo output\n") cat(demo,sep="\n") ## # output of this demo script when run, show how to use stdin and ppp() ## ## ## m...@host:~/uns/bin$ cat ~/tmp/test | template2.rsh ## not comment 1 ## not comment 2 ## not comment 3 ## not comment 4 ## not comment 5 ## here's the demo output ## Getting greetings (I'm an env var!) from the environment, on Mon Mar 29 10:23:49 CDT 2010, substituting 43 in named lists is also possible. March 2010 ## Su Mo Tu We Th Fr Sa ## 1 2 3 4 5 6 ## 7 8 9 10 11 12 13 ## 14 15 16 17 18 19 20 ## 21 22 23 24 25 26 27 ## 28 29 30 31 ## ## m...@host:~/uns/bin$ cat ~/tmp/test ## # comment 1 ## not comment 1 ## not comment 2 ## # comment 2 ## not comment 3 ## not comment 4 ## not comment 5 ## # comment 3 [[alternative HTML version deleted]] ______________________________________________ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.