Hi,
I found the init.c file I did not see before.
> m <- expand.grid( x = 1:20, y = 1:20, z = 1:20 )
> d <- dist( m )
> system.time( out <- as.matrix( d ) )
user system elapsed
3.006 1.252 4.429
> old.as.matrix.dist <- function(x, ...)
+ {
+ size <- attr(x, "Size")
+ df <- matrix(0, size, size)
+ df[row(df) > col(df)] <- x
+ df <- df + t(df)
+ labels <- attr(x, "Labels")
+ dimnames(df) <-
+ if(is.null(labels)) list(1L:size,1L:size) else list(labels,labels)
+ df
+ }
> system.time( out <- old.as.matrix.dist( d ) )
user system elapsed
17.471 2.964 21.304
Romain
Romain Francois wrote:
Hello,
I am trying to patch as.matrix.dist to achieve some speedup.
> m <- expand.grid( x = 1:20, y = 1:20, z = 1:20 )
> d <- dist( m )
> system.time( out <- stats:::as.matrix.dist( d ) )
user system elapsed
15.355 3.110 19.123
> system.time( out <- as.matrix.dist( d ) )
user system elapsed
3.153 0.480 3.782
--
Romain Francois
Independent R Consultant
+33(0) 6 28 91 30 30
http://romainfrancois.blog.free.fr
Index: src/stats.h
===================================================================
--- src/stats.h (revision 48386)
+++ src/stats.h (working copy)
@@ -52,4 +52,5 @@
void F77_NAME(lminfl)(double *x, int *ldx, int *n, int *k, int *docoef,
double *qraux, double *resid, double *hat,
double *coef, double *sigma, double *tol);
+void as_matrix_dist(double*d, double*x, int*n) ;
#endif
Index: src/init.c
===================================================================
--- src/init.c (revision 48386)
+++ src/init.c (working copy)
@@ -32,6 +32,7 @@
#include <R_ext/Rdynload.h>
#include <R_ext/Visibility.h>
+static R_NativePrimitiveArgType as_matrix_dist_t[3] = {REALSXP, REALSXP,INTSXP};
static R_NativePrimitiveArgType chisqsim_t[11] = {INTSXP, INTSXP, INTSXP, INTSXP, INTSXP,
INTSXP, REALSXP, INTSXP, REALSXP, INTSXP, REALSXP};
static R_NativePrimitiveArgType fishersim_t[10] = {INTSXP, INTSXP, INTSXP, INTSXP, INTSXP,
@@ -91,6 +92,7 @@
#define FDEF(name) {#name, (DL_FUNC) &F77_SUB(name), sizeof(name ## _t)/sizeof(name ## _t[0]), name ##_t}
static const R_CMethodDef CEntries[] = {
+ {"as_matrix_dist", (DL_FUNC) &as_matrix_dist, 3, as_matrix_dist_t},
{"chisqsim", (DL_FUNC) &chisqsim, 11, chisqsim_t},
{"fisher_sim", (DL_FUNC) &fisher_sim, 10, fishersim_t},
{"d2x2xk", (DL_FUNC) &d2x2xk, 5, d2_t},
Index: src/distance.c
===================================================================
--- src/distance.c (revision 48386)
+++ src/distance.c (working copy)
@@ -230,3 +230,25 @@
d[ij++] = (*method != MINKOWSKI) ?
distfun(x, *nr, *nc, i, j) : R_minkowski(x, *nr, *nc, i, j, *p);
}
+
+/**
+ * called by as.matrix.dist
+ *
+ * @param d the array representing the distance matrix, see R_distance above
+ * @param x the matrix this is filling
+ * @param n the size attribute (number of rows and columns of x)
+ */
+void as_matrix_dist(double* d, double*x, int* n){
+ int i,j,k;
+ int s = *n;
+ double element;
+ for( i=0,k=0; i<s; i++){
+ x[i+s*i] = 0.0 ;
+ for( j=i+1; j<s; j++,k++){
+ element = d[k] ;
+ x[ i + s*j ] = element ;
+ x[ j + s*i ] = element ;
+ }
+ }
+}
+
Index: R/dist.R
===================================================================
--- R/dist.R (revision 48386)
+++ R/dist.R (working copy)
@@ -51,13 +51,16 @@
format.dist <- function(x, ...) format(as.vector(x), ...)
-as.matrix.dist <- function(x, ...)
-{
- size <- attr(x, "Size")
- df <- matrix(0, size, size)
- df[row(df) > col(df)] <- x
- df <- df + t(df)
- labels <- attr(x, "Labels")
+as.matrix.dist <- function(x, ...){
+ size <- as.integer( attr(x, "Size") )
+ storage.mode(x) <- "numeric"
+ if( size*(size-1L)/2L != length(x) ){
+ stop( "the `Size` attribute does not match the size of the distance matrix" )
+ }
+ df <- .C( "as_matrix_dist", d=x, x=numeric(size*size),
+ n=as.integer(size), PACKAGE = "stats", DUP = FALSE )$x
+ dim(df) <- c(size,size)
+ labels <- attr(x, "Labels")
dimnames(df) <-
if(is.null(labels)) list(1L:size,1L:size) else list(labels,labels)
df
______________________________________________
[email protected] mailing list
https://stat.ethz.ch/mailman/listinfo/r-devel