Hi,

I found the init.c file I did not see before.

> m <- expand.grid( x = 1:20, y = 1:20, z = 1:20 )
> d <- dist( m )
> system.time( out <- as.matrix( d ) )
  user  system elapsed
 3.006   1.252   4.429
> old.as.matrix.dist <- function(x, ...)
+ {
+     size <- attr(x, "Size")
+     df <- matrix(0, size, size)
+     df[row(df) > col(df)] <- x
+     df <- df + t(df)
+     labels <- attr(x, "Labels")
+     dimnames(df) <-
+ if(is.null(labels)) list(1L:size,1L:size) else list(labels,labels)
+     df
+ }
> system.time( out <- old.as.matrix.dist( d ) )
  user  system elapsed
17.471   2.964  21.304


Romain

Romain Francois wrote:
Hello,

I am trying to patch as.matrix.dist to achieve some speedup.

> m <- expand.grid( x = 1:20, y = 1:20, z = 1:20 )
> d <- dist( m )
> system.time( out <- stats:::as.matrix.dist( d ) )
  user  system elapsed
15.355   3.110  19.123
> system.time( out <- as.matrix.dist( d ) )
  user  system elapsed
 3.153   0.480   3.782


--
Romain Francois
Independent R Consultant
+33(0) 6 28 91 30 30
http://romainfrancois.blog.free.fr


Index: src/stats.h
===================================================================
--- src/stats.h	(revision 48386)
+++ src/stats.h	(working copy)
@@ -52,4 +52,5 @@
 void F77_NAME(lminfl)(double *x, int *ldx, int *n, int *k, int *docoef,
 		      double *qraux, double *resid, double *hat, 
 		      double *coef, double *sigma, double *tol);
+void as_matrix_dist(double*d, double*x, int*n) ;
 #endif
Index: src/init.c
===================================================================
--- src/init.c	(revision 48386)
+++ src/init.c	(working copy)
@@ -32,6 +32,7 @@
 #include <R_ext/Rdynload.h>
 #include <R_ext/Visibility.h>
 
+static R_NativePrimitiveArgType as_matrix_dist_t[3] = {REALSXP, REALSXP,INTSXP};
 static R_NativePrimitiveArgType chisqsim_t[11] = {INTSXP, INTSXP, INTSXP, INTSXP, INTSXP,
 					   INTSXP, REALSXP, INTSXP, REALSXP, INTSXP, REALSXP};
 static R_NativePrimitiveArgType fishersim_t[10] = {INTSXP, INTSXP, INTSXP, INTSXP, INTSXP,
@@ -91,6 +92,7 @@
 #define FDEF(name)  {#name, (DL_FUNC) &F77_SUB(name), sizeof(name ## _t)/sizeof(name ## _t[0]), name ##_t}
 
 static const R_CMethodDef CEntries[]  = {
+    {"as_matrix_dist", (DL_FUNC) &as_matrix_dist, 3, as_matrix_dist_t},
     {"chisqsim", (DL_FUNC) &chisqsim, 11, chisqsim_t},
     {"fisher_sim", (DL_FUNC) &fisher_sim, 10, fishersim_t},
     {"d2x2xk", (DL_FUNC) &d2x2xk, 5, d2_t},
Index: src/distance.c
===================================================================
--- src/distance.c	(revision 48386)
+++ src/distance.c	(working copy)
@@ -230,3 +230,25 @@
 	    d[ij++] = (*method != MINKOWSKI) ?
 		distfun(x, *nr, *nc, i, j) : R_minkowski(x, *nr, *nc, i, j, *p);
 }
+
+/**
+ * called by as.matrix.dist
+ *
+ * @param d the array representing the distance matrix, see R_distance above
+ * @param x the matrix this is filling
+ * @param n the size attribute (number of rows and columns of x)
+ */
+void as_matrix_dist(double* d, double*x, int* n){
+   int i,j,k;
+   int s = *n;
+   double element;
+   for( i=0,k=0; i<s; i++){
+       x[i+s*i] = 0.0 ;
+       for( j=i+1; j<s; j++,k++){
+           element = d[k] ;
+           x[ i + s*j ] = element ;
+           x[ j + s*i ] = element ;
+       }
+   }
+}
+
Index: R/dist.R
===================================================================
--- R/dist.R	(revision 48386)
+++ R/dist.R	(working copy)
@@ -51,13 +51,16 @@
 
 format.dist <- function(x, ...) format(as.vector(x), ...)
 
-as.matrix.dist <- function(x, ...)
-{
-    size <- attr(x, "Size")
-    df <- matrix(0, size, size)
-    df[row(df) > col(df)] <- x
-    df <- df + t(df)
-    labels <- attr(x, "Labels")
+as.matrix.dist <- function(x, ...){
+    size <- as.integer( attr(x, "Size") )
+	storage.mode(x) <- "numeric"
+	if( size*(size-1L)/2L != length(x) ){
+		stop( "the `Size` attribute does not match the size of the distance matrix" )
+	}
+    df <- .C( "as_matrix_dist", d=x, x=numeric(size*size), 
+		n=as.integer(size), PACKAGE = "stats", DUP = FALSE )$x
+	dim(df) <- c(size,size)
+	labels <- attr(x, "Labels")
     dimnames(df) <-
 	if(is.null(labels)) list(1L:size,1L:size) else list(labels,labels)
     df
______________________________________________
R-devel@r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-devel

Reply via email to