Hi, I found the init.c file I did not see before.
> m <- expand.grid( x = 1:20, y = 1:20, z = 1:20 ) > d <- dist( m ) > system.time( out <- as.matrix( d ) ) user system elapsed 3.006 1.252 4.429 > old.as.matrix.dist <- function(x, ...) + { + size <- attr(x, "Size") + df <- matrix(0, size, size) + df[row(df) > col(df)] <- x + df <- df + t(df) + labels <- attr(x, "Labels") + dimnames(df) <- + if(is.null(labels)) list(1L:size,1L:size) else list(labels,labels) + df + } > system.time( out <- old.as.matrix.dist( d ) ) user system elapsed 17.471 2.964 21.304 Romain Romain Francois wrote:
Hello, I am trying to patch as.matrix.dist to achieve some speedup. > m <- expand.grid( x = 1:20, y = 1:20, z = 1:20 ) > d <- dist( m ) > system.time( out <- stats:::as.matrix.dist( d ) ) user system elapsed 15.355 3.110 19.123 > system.time( out <- as.matrix.dist( d ) ) user system elapsed 3.153 0.480 3.782
-- Romain Francois Independent R Consultant +33(0) 6 28 91 30 30 http://romainfrancois.blog.free.fr
Index: src/stats.h =================================================================== --- src/stats.h (revision 48386) +++ src/stats.h (working copy) @@ -52,4 +52,5 @@ void F77_NAME(lminfl)(double *x, int *ldx, int *n, int *k, int *docoef, double *qraux, double *resid, double *hat, double *coef, double *sigma, double *tol); +void as_matrix_dist(double*d, double*x, int*n) ; #endif Index: src/init.c =================================================================== --- src/init.c (revision 48386) +++ src/init.c (working copy) @@ -32,6 +32,7 @@ #include <R_ext/Rdynload.h> #include <R_ext/Visibility.h> +static R_NativePrimitiveArgType as_matrix_dist_t[3] = {REALSXP, REALSXP,INTSXP}; static R_NativePrimitiveArgType chisqsim_t[11] = {INTSXP, INTSXP, INTSXP, INTSXP, INTSXP, INTSXP, REALSXP, INTSXP, REALSXP, INTSXP, REALSXP}; static R_NativePrimitiveArgType fishersim_t[10] = {INTSXP, INTSXP, INTSXP, INTSXP, INTSXP, @@ -91,6 +92,7 @@ #define FDEF(name) {#name, (DL_FUNC) &F77_SUB(name), sizeof(name ## _t)/sizeof(name ## _t[0]), name ##_t} static const R_CMethodDef CEntries[] = { + {"as_matrix_dist", (DL_FUNC) &as_matrix_dist, 3, as_matrix_dist_t}, {"chisqsim", (DL_FUNC) &chisqsim, 11, chisqsim_t}, {"fisher_sim", (DL_FUNC) &fisher_sim, 10, fishersim_t}, {"d2x2xk", (DL_FUNC) &d2x2xk, 5, d2_t}, Index: src/distance.c =================================================================== --- src/distance.c (revision 48386) +++ src/distance.c (working copy) @@ -230,3 +230,25 @@ d[ij++] = (*method != MINKOWSKI) ? distfun(x, *nr, *nc, i, j) : R_minkowski(x, *nr, *nc, i, j, *p); } + +/** + * called by as.matrix.dist + * + * @param d the array representing the distance matrix, see R_distance above + * @param x the matrix this is filling + * @param n the size attribute (number of rows and columns of x) + */ +void as_matrix_dist(double* d, double*x, int* n){ + int i,j,k; + int s = *n; + double element; + for( i=0,k=0; i<s; i++){ + x[i+s*i] = 0.0 ; + for( j=i+1; j<s; j++,k++){ + element = d[k] ; + x[ i + s*j ] = element ; + x[ j + s*i ] = element ; + } + } +} + Index: R/dist.R =================================================================== --- R/dist.R (revision 48386) +++ R/dist.R (working copy) @@ -51,13 +51,16 @@ format.dist <- function(x, ...) format(as.vector(x), ...) -as.matrix.dist <- function(x, ...) -{ - size <- attr(x, "Size") - df <- matrix(0, size, size) - df[row(df) > col(df)] <- x - df <- df + t(df) - labels <- attr(x, "Labels") +as.matrix.dist <- function(x, ...){ + size <- as.integer( attr(x, "Size") ) + storage.mode(x) <- "numeric" + if( size*(size-1L)/2L != length(x) ){ + stop( "the `Size` attribute does not match the size of the distance matrix" ) + } + df <- .C( "as_matrix_dist", d=x, x=numeric(size*size), + n=as.integer(size), PACKAGE = "stats", DUP = FALSE )$x + dim(df) <- c(size,size) + labels <- attr(x, "Labels") dimnames(df) <- if(is.null(labels)) list(1L:size,1L:size) else list(labels,labels) df
______________________________________________ R-devel@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-devel