Re: [R] R benchmarking program

Martin Maechler Wed, 14 May 2008 04:12:18 -0700

>>>>> "MM" == Martin Maechler <[EMAIL PROTECTED]>
>>>>>     on Wed, 14 May 2008 12:05:00 +0200 writes:


>>>>> "PhGr" == Philippe Grosjean <[EMAIL PROTECTED]>
>>>>>     on Tue, 13 May 2008 16:10:15 +0200 writes:

    PhGr> Hello,
    PhGr> I did this bechmark test. Perhaps is it a good oppotunity to rewrite 
it 
    PhGr> and make it compatible with R 2.7.0, David?

    MM> I'll not really rewrite it (to make it "nice" in my eyes),
    MM> but I'm fixing the problems with the fact that the 'Matrix'
    MM> package you were using back then has been much changed in the
    MM> mean time.

    MM> Expect a corrected benchmark R script within a day or so.

Here, it is {attachament in 'text/plain' which is allowed for R-help}

Note that I *do* agree with Prof. Brian Ripley about the
usefulness of benchmarks.

Martin Maechler, ETH Zurich

#### R Benchmark 2.4 (May 2008)
benchVersion <- structure(2.4,
                          date = as.Date("2008-05-14"))
#### adapted to run in R 2.7.0 with Matrix 0.999735-9, Martin Maechler ETH 
Zurich
##
#### R Benchmark 2.3 (21 April 2004)
#### Warning: changes are not carefully checked yet!
#### version 2.3 adapted to R 1.9.0
#### Many thanks to Douglas Bates ([EMAIL PROTECTED]) for improvements!
#### version 2.2 adapted to R 1.8.0
#### version 2.1 adapted to R 1.7.0
#### version 2, scaled to get 1 +/- 0.1 sec with R 1.6.2
#### using the standard ATLAS library (Rblas.dll)
#### on a Pentium IV 1.6 Ghz with 1 Gb Ram on Win XP pro

#### revised and optimized for R v. 1.5.x, 8 June 2002
#### Requires additionnal libraries: Matrix, SuppDists
#### Author : Philippe Grosjean
#### eMail  : [EMAIL PROTECTED]
#### Web    : http://www.sciviews.org
#### License: GPL 2 or above at your convenience (see: http://www.gnu.org)

#### Several tests are adapted from the Splus Benchmark Test V. 2
#### by Stephan Steinhaus ([EMAIL PROTECTED])
#### Reference for Escoufier's equivalents vectors (test III.5):
#### Escoufier Y., 1970. Echantillonnage dans une population de variables
#### aleatoires réelles. Publ. Inst. Statis. Univ. Paris 19 Fasc 4, 1-47.

#### source("<this file>") to start the test
#### ---------------------
#### TODO: Rewrite all this to work nicely with 'R CMD BATCH'

runs <- 3                       # Number of times the tests are executed
times <- rep(0, 15); dim(times) <- c(5,3)
require(Matrix)         # Optimized matrix operations
require(SuppDists)      # Optimized random number generators
Runif <- rMWC1019       # The fast uniform number generator
## If you don't have SuppDists, you can use: Runif <- runif
a <- rMWC1019(10, new.start=TRUE, seed=492166)  # Init. the generator
Rnorm <- rziggurat      # The fast normal number generator
## If you don't have SuppDists, you can use: Rnorm <- rnorm
b <- rziggurat(10, new.start=TRUE)      # Init. the generator
remove("a", "b")
options(object.size=100000000)
maybeFlush <- function()
    if(R.Version()$os %in% c("Win32", "mingw32")) flush.console()

MatrixRnorm <- function(n, m=n) Matrix(Rnorm(n * m), nrow = n, ncol = m)

cat("\n\n   R Benchmark", benchVersion,
      "\n   ===============\n",
    "\nNumber of times each test is run__________________________: ", runs,
    "\n\n")


cat("   I. Matrix calculation\n")
cat("   ---------------------\n")
maybeFlush()

## (1)
cumulate <- 0; a <- 0; b <- 0
for (i in 1:runs) {
  timing <- system.time({
    a <- matrix(Rnorm(1500*1500)/10, ncol=1500, nrow=1500)
    b <- t(a)
    dim(b) <- c(750, 3000)
    a <- t(b)
  })[3]
  cumulate <- cumulate + timing
}
timing <- cumulate/runs
times[1, 1] <- timing
cat("Creation, transp., deformation of a 1500x1500 matrix (sec): ", timing, 
"\n")
remove("a", "b")
maybeFlush()

## (2)
cumulate <- 0; b <- 0
for (i in 1:runs) {
  a <- abs(matrix(Rnorm(800*800)/2, ncol=800, nrow=800))
  timing <- system.time({
    b <- a^1000
  })[3]
  cumulate <- cumulate + timing
}
timing <- cumulate/runs
times[2, 1] <- timing
cat("800x800 normal distributed random matrix ^1000______ (sec): ", timing, 
"\n")
remove("a", "b")
maybeFlush()

## (3)
cumulate <- 0; b <- 0
for (i in 1:runs) {
  a <- Rnorm(2000000)
  timing <- system.time({
    b <- sort(a, method="quick")        # Sort is modified in v. 1.5.x
    ## And there is now a quick method that better competes with other 
packages!!!
  })[3]
  cumulate <- cumulate + timing
}
timing <- cumulate/runs
times[3, 1] <- timing
cat("Sorting of 2,000,000 random values__________________ (sec): ", timing, 
"\n")
remove("a", "b")
maybeFlush()

## (4)
cumulate <- 0; b <- 0
for (i in 1:runs) {
  a <- Rnorm(700*700); dim(a) <- c(700, 700)
  timing <- system.time({
    b <- crossprod(a)           # equivalent to: b <- t(a) %*% a
  })[3]
  cumulate <- cumulate + timing
}
timing <- cumulate/runs
times[4, 1] <- timing
cat("700x700 cross-product matrix (b = a' * a)___________ (sec): ", timing, 
"\n")
remove("a", "b")
maybeFlush()

## (5)
cumulate <- 0; c <- 0; qra <-0
for (i in 1:runs) {
  a <- MatrixRnorm(600)
  b <- as.double(1:600)
  timing <- system.time({
    c <- solve(crossprod(a), crossprod(a,b))
  })[3]
  cumulate <- cumulate + timing

  ## This is the old method
  ##a <- Rnorm(600*600); dim(a) <- c(600,600)
  ##b <- 1:600
  ##timing <- system.time({
  ##  qra <- qr(a, tol = 1e-7)
  ##  c <- qr.coef(qra, b)
  ##  #Rem: a little faster than c <- lsfit(a, b, inter=F)$coefficients
  ##})[3]
  ##cumulate <- cumulate + timing
}
timing <- cumulate/runs
times[5, 1] <- timing
cat("Linear regression over a 600x600 matrix (c = a \\ b') (sec): ", timing, 
"\n")
remove("a", "b", "c", "qra")
maybeFlush()

times[ , 1] <- sort(times[ , 1])
cat("                      --------------------------------------------\n")
cat("                 Trimmed geom. mean (2 extremes eliminated): ",
    exp(mean(log(times[2:4, 1]))), "\n\n")

cat("   II. Matrix functions\n")
cat("   --------------------\n")
maybeFlush()


## (1)
cumulate <- 0; b <- 0
for (i in 1:runs) {
  a <- Rnorm(800000)
  timing <- system.time({
    b <- fft(a)
  })[3]
  cumulate <- cumulate + timing
}
timing <- cumulate/runs
times[1, 2] <- timing
cat("FFT over 800,000 random values______________________ (sec): ", timing, 
"\n")
remove("a", "b")
maybeFlush()

## (2) a) traditional matrix
cumulate <- 0; b <- 0
for (i in 1:runs) {
  a <- array(Rnorm(320*320), dim = c(320, 320))
  timing <- system.time({
        b <- eigen(a, symmetric=FALSE, only.values=TRUE)$Value
        ## Rem: on my machine, it is faster than:
  ##     b <- La.eigen(a, symmetric=F, only.values=T, method="dsyevr")$Value
  ##     b <- La.eigen(a, symmetric=F, only.values=T, method="dsyev")$Value
  ##  b <- eigen.Matrix(a, vectors = F)$Value
  })[3]
  cumulate <- cumulate + timing
}
timing <- cumulate/runs
times[2, 2] <- timing
cat("Eigenvalues of a 320x320 random matrix______________ (sec): ", timing, 
"\n")
remove("a", "b")
maybeFlush()
##
## (2) b) "Matrix"
cumulate <- 0; b <- 0
for (i in 1:runs) {
  a <- MatrixRnorm(320)
  timing <- system.time({
      b <- eigen(a, symmetric=FALSE, only.values=TRUE)$Value
  })[3]
  cumulate <- cumulate + timing
}
timing <- cumulate/runs
times[2, 2] <- timing
cat("Eigenvalues of a 320x320 random Matrix______________ (sec): ", timing, 
"\n")
remove("a", "b")
maybeFlush()


## (3) a) traditional matrix
cumulate <- 0; b <- 0
for (i in 1:runs) {
  a <- Rnorm(650*650); dim(a) <- c(650, 650)
  timing <- system.time({
    ##b <- determinant(a, logarithm=F)
    ## Rem: the following is slower on my computer!
    ## b <- det.default(a)
    b <- determinant(a)
  })[3]
  cumulate <- cumulate + timing
}
timing <- cumulate/runs
times[3, 2] <- timing
cat("Determinant of a 650x650 random matrix______________ (sec): ", timing, 
"\n")
remove("a", "b")
maybeFlush()
##
## (3)  b) --- using "Matrix"
cumulate <- 0; b <- 0
for (i in 1:runs) {
  a <- MatrixRnorm(650)
  timing <- system.time({
      b <- determinant(a)
  })[3]
  cumulate <- cumulate + timing
}
timing <- cumulate/runs
times[3, 2] <- timing
cat("Determinant of a 650x650 random Matrix______________ (sec): ", timing, 
"\n")
remove("a", "b")
maybeFlush()


## (4)
cumulate <- 0; b <- 0
for (i in 1:runs) {
  a <- crossprod(MatrixRnorm(900))
  ##a <- Rnorm(900*900); dim(a) <- c(900, 900)
  ##a <- crossprod(a, a)
  timing <- system.time({
    b <- chol(a)
  })[3]
  cumulate <- cumulate + timing
}
timing <- cumulate/runs
times[4, 2] <- timing
cat("Cholesky decomposition of a 900x900 matrix__________ (sec): ", timing, 
"\n")
remove("a", "b")
maybeFlush()

## (5)
cumulate <- 0; b <- 0
for (i in 1:runs) {
  a <- MatrixRnorm(400)
  ##a <- Rnorm(400*400); dim(a) <- c(400, 400)
  timing <- system.time({
  ##  b <- qr.solve(a)
    ## Rem: a little faster than
    b <- solve(a)
  })[3]
  cumulate <- cumulate + timing
}
timing <- cumulate/runs
times[5, 2] <- timing
cat("Inverse of a 400x400 random matrix__________________ (sec): ", timing, 
"\n")
remove("a", "b")
maybeFlush()

times[ , 2] <- sort(times[ , 2])
cat("                      --------------------------------------------\n")
cat("                Trimmed geom. mean (2 extremes eliminated): ",
    exp(mean(log(times[2:4, 2]))), "\n\n")

cat("   III. Programmation\n")
cat("   ------------------\n")
maybeFlush()


## (1)
cumulate <- 0; a <- 0; b <- 0; phi <- 1.6180339887498949
for (i in 1:runs) {
  a <- floor(Runif(750000)*1000)
  timing <- system.time({
    b <- (phi^a - (-phi)^(-a))/sqrt(5)
  })[3]
  cumulate <- cumulate + timing
}
timing <- cumulate/runs
times[1, 3] <- timing
cat("750,000 Fibonacci numbers calculation (vector calc)_ (sec): ", timing, 
"\n")
remove("a", "b", "phi")
maybeFlush()

## (2)
cumulate <- 0; a <- 2250; b <- 0
for (i in 1:runs) {
  timing <- system.time({
    b <- rep(1:a, a); dim(b) <- c(a, a)
    b <- 1 / (t(b) + 0:(a-1))
    ## Rem: this is twice as fast as the following code proposed by R 
programmers
    ## a <- 1:a; b <- 1 / outer(a - 1, a, "+")
  })[3]
  cumulate <- cumulate + timing
}
timing <- cumulate/runs
times[2, 3] <- timing
cat("Creation of a 2250x2250 Hilbert matrix (matrix calc) (sec): ", timing, 
"\n")
remove("a", "b")
maybeFlush()

## (3)
cumulate <- 0; c <- 0
gcd2 <- function(x, y) {
    if (sum(y > 1e-4) == 0) x else {
        y[y == 0] <- x[y == 0]
        Recall(y, x %% y)
    }
}
for (i in 1:runs) {
  a <- ceiling(Runif(70000)*1000)
  b <- ceiling(Runif(70000)*1000)
  timing <- system.time({
    c <- gcd2(a, b)                            # gcd2 is a recursive function
  })[3]
  cumulate <- cumulate + timing
}
timing <- cumulate/runs
times[3, 3] <- timing
cat("Grand common divisors of 70,000 pairs (recursion)___ (sec): ", timing, 
"\n")
remove("a", "b", "c", "gcd2")
maybeFlush()

## (4)
cumulate <- 0; b <- 0
for (i in 1:runs) {
  b <- rep(0, 220*220); dim(b) <- c(220, 220)
  timing <- system.time({
        ## Rem: there are faster ways to do this
        ## but here we want to time loops (220*220 'for' loops)!
    for (j in 1:220) {
      for (k in 1:220) {
        b[k,j] <- abs(j - k) + 1
      }
    }
  })[3]
  cumulate <- cumulate + timing
}
timing <- cumulate/runs
times[4, 3] <- timing
cat("Creation of a 220x220 Toeplitz matrix (loops)_______ (sec): ", timing, 
"\n")
remove("b", "j", "k")
maybeFlush()

## (5)
cumulate <- 0; p <- 0; vt <- 0; vr <- 0; vrt <- 0; rvt <- 0; RV <- 0; j <- 0; k 
<- 0
x2 <- 0; R <- 0; Rxx <- 0; Ryy <- 0; Rxy <- 0; Ryx <- 0; Rvmax <- 0
## Calculate the trace of a matrix (sum of its diagonal elements)
Trace <- function(y) sum(c(y)[1 + 0:(min(dim(y)) - 1) * (dim(y)[1] + 1)],
                          na.rm=FALSE)
for (i in 1:runs) {
  x <- abs(Rnorm(37*37)); dim(x) <- c(37, 37)
  timing <- system.time({
    ## Calculation of Escoufier's equivalent vectors
    p <- ncol(x)
    vt <- 1:p                                  # Variables to test
    vr <- NULL                                 # Result: ordered variables
    RV <- 1:p                                  # Result: correlations
    vrt <- NULL
    for (j in 1:p) {                           # loop on the variable number
      Rvmax <- 0
      for (k in 1:(p-j+1)) {                   # loop on the variables
        x2 <- cbind(x, x[,vr], x[,vt[k]])
        R <- cor(x2)                           # Correlations table
        Ryy <- R[1:p, 1:p]
        Rxx <- R[(p+1):(p+j), (p+1):(p+j)]
        Rxy <- R[(p+1):(p+j), 1:p]
        Ryx <- t(Rxy)
        rvt <- Trace(Ryx %*% Rxy) /
            sqrt(Trace(Ryy %*% Ryy) * Trace(Rxx %*% Rxx)) # RV calculation
        if (rvt > Rvmax) {
          Rvmax <- rvt                         # test of RV
          vrt <- vt[k]                         # temporary held variable
        }
      }
      vr[j] <- vrt                             # Result: variable
      RV[j] <- Rvmax                           # Result: correlation
      vt <- vt[vt!=vr[j]]                      # reidentify variables to test
    }
  })[3]
  cumulate <- cumulate + timing
}
times[5, 3] <- timing
cat("Escoufier's method on a 37x37 matrix (mixed)________ (sec): ", timing, 
"\n")
remove("x", "p", "vt", "vr", "vrt", "rvt", "RV", "j", "k",
       "x2", "R", "Rxx", "Ryy", "Rxy", "Ryx", "Rvmax", "Trace")
maybeFlush()


times[ , 3] <- sort(times[ , 3])
cat("                      --------------------------------------------\n")
cat("                Trimmed geom. mean (2 extremes eliminated): ",
    exp(mean(log(times[2:4, 3]))), "\n\n\n")

cat("Total time for all 15 tests_________________________ (sec): ", 
sum(times),"\n")
cat("Overall mean (sum of I, II and III trimmed means/3)_ (sec): ",
    exp(mean(log(times[2:4, ]))), "\n")
remove("cumulate", "timing", "times", "runs", "i")
cat("                      --- End of test ---\n\n")

______________________________________________
R-help@r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.

Re: [R] R benchmarking program

Reply via email to