Dear R users,
A colleague of mine asked me how to write a script (an executable text file
containing R code) in R. After I showed
him, he said that after extensive searching of the R archives, he had not
found anything like these techniques.
He suggested that I share these methods to enable others to leverage R as a
better alternative to bash/perl scripts.
So in the interest of giving back to the R community, and with all humility,
I offer the
following small demonstration of one method for creating scripts of R code
that are
executable from the (at least Linux) command line.
I don't make any warrantees that this will work for you, but if it helps
somebody at least
get starting utilizing R effectively in scripts, then great!
Best regards,
Jason
--
Jason E. Aten, Ph.D.
# file: scriptdemo.rsh
#!/bin/bash
exec R --vanilla -q --slave -e "source(file=pipe(\"tail -n +4 $0\"))" --args
$@
#debug: exec R --vanilla --verbose -e "source(file=pipe(\"tail -n +4 $0\"))"
--args $@
### The above line starts R and then reads in this script, starting at line
4:
#
# scriptdemo.rsh : a simple filter script to demonstrate how to write a
script in R that
# reads stdin and utilizes command line argv. Also shows how
to use ppp() to do
# bash scripting like variable substitution, which is really
just syntactic
# sugar. But sugar can be sweet.
#
# NB: Only tested on Linux, YMMV, and you may have to adapt to your OS. If
it breaks, you
# get to keep both pieces.
# 1st point of note: notice the exec R invocation above, with the pipe and
tail combo.
# This file becomes the program read into R. If it is set chmod+x then you
can execute this file.
pp=function(...) paste(sep="",...)
script="scriptdemo.rsh"
usage=pp(script,": put help info here")
argv = commandArgs(trailingOnly=TRUE)
# --help
if(any(argv=="--help")) {
cat(usage)
quit(save="no",status=0)
}
# 2nd point of note: this is how to read stdin inside a script:
#
# slurp in all the input
r=readLines("stdin")
bad=grep("^#",r) # remove comments
# write out lines that didn't start with #
cat(r[setdiff(1:length(r),bad)],sep="\n")
# 3rd point of note: if you want nice bash shell scripting string
substitution and backticking
# you can use my ppp() function. Note it's not well vectorized at the
moment, so it will expect
# variables that are substituted from the environment to be of length 1.
# A bit hackish in places (sure the |@|@| and 34HERE43 stuff makes me
wince), but it gets the job done,
# as it's meant as a proof of concept.
##########################
# utility functions leading up to final definition of ppp() : shell
scripting like facilities for R
# Skip to the end of this file to see what ppp() does for you.
##########################
# delete one trailing whitespace
chomp=function(x) {
n=nchar(x)
a=substr(x,n,n)
w=which(a==" " | a == "\n" | a=="\t")
if (length(w)) {
x[w]=substr(x[w],1,n[w]-1)
}
x
}
# delete one leading whitespace
prechomp=function(x) {
n=nchar(x)
a=substr(x,1,1)
w=which(a==" " | a == "\n" | a == "\t")
if (length(w)) {
x[w]=substr(x[w],2,n[w])
}
x
}
# eliminate whitespace leading/trailing from a string
trim=function(x) {
y=chomp(x)
while(any(y!=x)) {
x=y
y=chomp(x)
}
y=prechomp(x)
while(any(y!=x)) {
x=y
y=prechomp(x)
}
x
}
strsplit2=function(x,split,...) {
# detect trailing split : and add "" afterwards, so we know if it was
there.
a=strsplit(pp(x,"|@|@|"),split,...)
lapply(a,function(x) gsub("|@|@|","",x,fixed=TRUE))
}
strsplit3=function(x,split,keepsplit=FALSE,...) {
if (keepsplit) {
repstring="34HERE43"
if (length(grep(repstring,x))) { die(repstring, " repstring already
found. Arg! Aborting") } # sanity check
# note where we want to split, using \\1 backref to keep the original
a=gsub(pattern=pp("(",split,")") ,replacement=pp(repstring,"\\1"),x)
} else {
a=x
repstring=split
}
b=strsplit2(a,repstring,...) # split, keeping the original delimiters
}
pp=function(...) paste(...,sep="") # pp() must be defined in outermost scope
for ppp() to work
replacer=function(s,begin.string="${",end.string="}",keepend=FALSE,require.end=TRUE)
{
translate.env=function(x) {
if (exists(x)) return(x)
a=Sys.getenv(x)
if (a!="") return(pp("\"",a,"\""))
x
}
parts=strsplit2(s,begin.string,fixed=T)[[1]]
if (length(parts) < 2 || all(parts=="")) return(s)
if (any(trim(parts[-1])=="")) {
warning(pp("ppp::replacer(): found begin.string '",begin.string,"'
in '",s,"' but had empty/blankspace/end of string following it."))
return(s)
}
collap=c()
collap[1]=parts[1]
for (i in 2:length(parts)) {
tmp=strsplit3(parts[i],end.string,keepsplit=keepend)[[1]]
if (length(tmp)==1) {
if (require.end) {
warning(pp("ppp::replacer(): could not find end.string
'",end.string,"' in string '",s,"' and require.end=TRUE, so karping."))
collap[(i-1)*2]=parts[i]
collap[(i-1)*2+1]=""
} else {
collap[(i-1)*2]=pp("\",",translate.env(tmp[1]),",\"") # allow
newline to terminate as well, if end not required
collap[(i-1)*2+1]=""
}
} else {
collap[(i-1)*2]=pp("\",",translate.env(tmp[1]),",\"")
# collect the rest of parts[i] following tmp[1] and the end.string
(assumes end.string is only ever length 1)
collap[(i-1)*2+1]=substr(parts[i],nchar(tmp[1])+1+(1-as.numeric(keepend)),nchar(parts[i]))
}
}
text=pp("pp(\"",pp(collap,collapse=""),"\")")
# sys.frame(-2) is necessary to get definitions from calling function
before where we were defined.
if (sys.nframe() > 1) {
ftext=eval(parse(text=text),envir=sys.frame(-2))
} else {
ftext=eval(parse(text=text))
}
ftext
}
pp=function(...) paste(...,sep="") # must be defined in outermost scope for
ppp() to work
# shell like string interpolation... ppp("fill in ${myvar} here after
`hostname` is $myvar")
ppp=function(...) {
sa=paste(sep="",...)
res=c()
for (j in 1:length(sa)) {
s=sa[j]
s2=replacer(s,"${","}")
terminators="\t| |\\.|`|\\$|\\{|\\}|\\(|\\)|<|>|\\|"
s3=replacer(s2,"$",terminators,keepend=TRUE,require.end=FALSE) #
!require.end allows end of line termination
res[j]=s3
}
do.sys.expecting.output=function(cmd) {
got=pp(system(intern=T,cmd),collapse="\n")
if (got=="") die("do.sys() on '",cmd,"' returned no output.")
got
}
# now check for backtick system call requests as well, *after* variable
substitution is all finished.
bt=grep("`",res)
if(length(bt)) {
sa=res[bt]
for (j in 1:length(sa)) {
s=sa[j]
parts=strsplit2(s,"`",fixed=T)[[1]]
if (length(parts) < 3) { res[bt[j]]=s; next; }
collap=c()
collap[1]=parts[1]
for (i in seq(2,length(parts)-1,2)) {
cmd=parts[i]
collap[i]=do.sys.expecting.output(cmd)
collap[i+1]=parts[i+1]
}
text=pp(collap,collapse="")
res[bt[j]]=text
} # for j
} #end if length(bt)
res
}
#
# now demonstrate the use of ppp() in a scripting context:
#
today="date"
month=3
year=2010
show=list()
show$syntax = 43
Sys.setenv(AN_ENV_VAR="greetings (I'm an env var!)")
demo=ppp("Getting $AN_ENV_VAR from the environment, on `$today`,
substituting ${show$syntax} in named lists is also possible. `cal $month
$year| head` ")
cat("here's the demo output\n")
cat(demo,sep="\n")
## # output of this demo script when run, show how to use stdin and ppp()
##
##
## m...@host:~/uns/bin$ cat ~/tmp/test | template2.rsh
## not comment 1
## not comment 2
## not comment 3
## not comment 4
## not comment 5
## here's the demo output
## Getting greetings (I'm an env var!) from the environment, on Mon Mar 29
10:23:49 CDT 2010, substituting 43 in named lists is also possible.
March 2010
## Su Mo Tu We Th Fr Sa
## 1 2 3 4 5 6
## 7 8 9 10 11 12 13
## 14 15 16 17 18 19 20
## 21 22 23 24 25 26 27
## 28 29 30 31
##
## m...@host:~/uns/bin$ cat ~/tmp/test
## # comment 1
## not comment 1
## not comment 2
## # comment 2
## not comment 3
## not comment 4
## not comment 5
## # comment 3
[[alternative HTML version deleted]]
______________________________________________
[email protected] mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.