Dear R users,

A colleague of mine asked me how to write a script (an executable text file
containing R code) in R. After I showed
him, he said that after extensive searching of the R archives, he had not
found anything like these techniques.

He suggested that I share these methods to enable others to leverage R as a
better alternative to bash/perl scripts.

So in the interest of giving back to the R community, and with all humility,
I offer the
following small demonstration of one method for creating scripts of R code
that are
executable from the (at least Linux) command line.

I don't make any warrantees that this will work for you, but if it helps
somebody at least
get starting utilizing R effectively in scripts, then great!

Best regards,

Jason

-- 
Jason E. Aten, Ph.D.


# file: scriptdemo.rsh

#!/bin/bash
exec R --vanilla -q --slave -e "source(file=pipe(\"tail -n +4 $0\"))" --args
$@
#debug: exec R --vanilla --verbose -e "source(file=pipe(\"tail -n +4 $0\"))"
--args $@
### The above line starts R and then reads in this script, starting at line
4:
#
# scriptdemo.rsh : a simple filter script to demonstrate how to write a
script in R that
#                reads stdin and utilizes command line argv. Also shows how
to use ppp() to do
#                bash scripting like variable substitution, which is really
just syntactic
#                sugar. But sugar can be sweet.
#
# NB: Only tested on Linux, YMMV, and you may have to adapt to your OS. If
it breaks, you
#     get to keep both pieces.

# 1st point of note: notice the exec R invocation above, with the pipe and
tail combo.
#   This file becomes the program read into R. If it is set chmod+x then you
can execute this file.

  pp=function(...) paste(sep="",...)
  script="scriptdemo.rsh"
  usage=pp(script,": put help info here")

  argv = commandArgs(trailingOnly=TRUE)


  # --help
  if(any(argv=="--help")) {
    cat(usage)
    quit(save="no",status=0)
  }

# 2nd point of note: this is how to read stdin inside a script:
#

  # slurp in all the input
  r=readLines("stdin")

  bad=grep("^#",r) # remove comments

  # write out lines that didn't start with #
  cat(r[setdiff(1:length(r),bad)],sep="\n")


# 3rd point of note: if you want nice bash shell scripting string
substitution and backticking
#  you can use my ppp() function. Note it's not well vectorized at the
moment, so it will expect
#  variables that are substituted from the environment to be of length 1.
#  A bit hackish in places (sure the |@|@| and 34HERE43 stuff makes me
wince), but it gets the job done,
#  as it's meant as a proof of concept.

##########################
# utility functions leading up to final definition of ppp() : shell
scripting like facilities for R
#  Skip to the end of this file to see what ppp() does for you.
##########################

# delete one trailing whitespace
chomp=function(x) {
   n=nchar(x)
   a=substr(x,n,n)
   w=which(a==" " | a == "\n" | a=="\t")
   if (length(w)) {
     x[w]=substr(x[w],1,n[w]-1)
   }
   x
}

# delete one leading whitespace
prechomp=function(x) {
   n=nchar(x)
   a=substr(x,1,1)
   w=which(a==" " | a == "\n" | a == "\t")
   if (length(w)) {
     x[w]=substr(x[w],2,n[w])
   }
   x
}


# eliminate whitespace leading/trailing from a string
trim=function(x) {
   y=chomp(x)
   while(any(y!=x)) {
     x=y
     y=chomp(x)
   }

   y=prechomp(x)
   while(any(y!=x)) {
     x=y
     y=prechomp(x)
   }

  x
}

strsplit2=function(x,split,...) {
    # detect trailing split : and add "" afterwards, so we know if it was
there.
    a=strsplit(pp(x,"|@|@|"),split,...)
    lapply(a,function(x) gsub("|@|@|","",x,fixed=TRUE))
}

strsplit3=function(x,split,keepsplit=FALSE,...) {
    if (keepsplit) {
      repstring="34HERE43"
      if (length(grep(repstring,x))) { die(repstring, " repstring already
found. Arg! Aborting")  } # sanity check
      # note where we want to split, using \\1 backref to keep the original
      a=gsub(pattern=pp("(",split,")") ,replacement=pp(repstring,"\\1"),x)
    } else {
       a=x
       repstring=split
    }
    b=strsplit2(a,repstring,...) # split, keeping the original delimiters
}

pp=function(...) paste(...,sep="") # pp() must be defined in outermost scope
for ppp() to work

replacer=function(s,begin.string="${",end.string="}",keepend=FALSE,require.end=TRUE)
{

     translate.env=function(x) {
       if (exists(x)) return(x)
       a=Sys.getenv(x)
       if (a!="") return(pp("\"",a,"\""))
       x
     }

     parts=strsplit2(s,begin.string,fixed=T)[[1]]
     if (length(parts) < 2 || all(parts=="")) return(s)
     if (any(trim(parts[-1])=="")) {
        warning(pp("ppp::replacer(): found begin.string '",begin.string,"'
in '",s,"' but had empty/blankspace/end of string following it."))
        return(s)
     }

     collap=c()
     collap[1]=parts[1]
     for (i in 2:length(parts)) {
       tmp=strsplit3(parts[i],end.string,keepsplit=keepend)[[1]]
       if (length(tmp)==1) {
         if (require.end) {
           warning(pp("ppp::replacer(): could not find end.string
'",end.string,"' in string '",s,"' and require.end=TRUE, so karping."))
           collap[(i-1)*2]=parts[i]
           collap[(i-1)*2+1]=""
         } else {
           collap[(i-1)*2]=pp("\",",translate.env(tmp[1]),",\"") # allow
newline to terminate as well, if end not required
           collap[(i-1)*2+1]=""
         }
       } else {
         collap[(i-1)*2]=pp("\",",translate.env(tmp[1]),",\"")
         # collect the rest of parts[i] following tmp[1] and the end.string
(assumes end.string is only ever length 1)

collap[(i-1)*2+1]=substr(parts[i],nchar(tmp[1])+1+(1-as.numeric(keepend)),nchar(parts[i]))
       }
     }
     text=pp("pp(\"",pp(collap,collapse=""),"\")")

     # sys.frame(-2) is necessary to get definitions from calling function
before where we were defined.
     if (sys.nframe() > 1) {
       ftext=eval(parse(text=text),envir=sys.frame(-2))
     } else {
       ftext=eval(parse(text=text))
     }
     ftext
}

pp=function(...) paste(...,sep="") # must be defined in outermost scope for
ppp() to work

# shell like string interpolation... ppp("fill in ${myvar} here after
`hostname` is $myvar")
ppp=function(...) {

  sa=paste(sep="",...)
  res=c()
  for (j in 1:length(sa)) {
     s=sa[j]
     s2=replacer(s,"${","}")
     terminators="\t| |\\.|`|\\$|\\{|\\}|\\(|\\)|<|>|\\|"
     s3=replacer(s2,"$",terminators,keepend=TRUE,require.end=FALSE) #
!require.end allows end of line termination
     res[j]=s3
  }

  do.sys.expecting.output=function(cmd) {
    got=pp(system(intern=T,cmd),collapse="\n")
    if (got=="") die("do.sys() on '",cmd,"' returned no output.")
    got
  }

  # now check for backtick system call requests as well, *after* variable
substitution is all finished.
  bt=grep("`",res)
  if(length(bt)) {
     sa=res[bt]
     for (j in 1:length(sa)) {
         s=sa[j]
         parts=strsplit2(s,"`",fixed=T)[[1]]
         if (length(parts) < 3) { res[bt[j]]=s; next; }

         collap=c()
         collap[1]=parts[1]
         for (i in seq(2,length(parts)-1,2)) {
            cmd=parts[i]
            collap[i]=do.sys.expecting.output(cmd)
            collap[i+1]=parts[i+1]
         }
         text=pp(collap,collapse="")
         res[bt[j]]=text
       } # for j
     } #end if length(bt)
  res
}



#
# now demonstrate the use of ppp() in a scripting context:
#

today="date"
month=3
year=2010

show=list()
show$syntax = 43

Sys.setenv(AN_ENV_VAR="greetings (I'm an env var!)")

demo=ppp("Getting $AN_ENV_VAR from the environment, on `$today`,
substituting ${show$syntax} in named lists is also possible. `cal $month
$year| head` ")

cat("here's the demo output\n")
cat(demo,sep="\n")


## # output of this demo script when run, show how to use stdin and ppp()
##
##
## m...@host:~/uns/bin$ cat ~/tmp/test | template2.rsh
## not comment 1
## not comment 2
## not comment 3
## not comment 4
## not comment 5
## here's the demo output
## Getting greetings (I'm an env var!) from the environment, on Mon Mar 29
10:23:49 CDT 2010, substituting 43 in named lists is also possible.
 March 2010
## Su Mo Tu We Th Fr Sa
##     1  2  3  4  5  6
##  7  8  9 10 11 12 13
## 14 15 16 17 18 19 20
## 21 22 23 24 25 26 27
## 28 29 30 31
##
## m...@host:~/uns/bin$ cat ~/tmp/test
## # comment 1
## not comment 1
## not comment 2
## # comment 2
## not comment 3
## not comment 4
## not comment 5
## # comment 3

        [[alternative HTML version deleted]]

______________________________________________
R-help@r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.

Reply via email to