I believe the performance of isUnsorted() in sort.c could be improved by calling REAL() once (outside of the for loop), rather than calling it twice inside the loop. As an aside, it is implemented in the faster way in doSort() (sort.c line 401). The example below shows the performance improvement for a vectors of double of moving REAL() outside the for loop.
# example as implemented in isUnsorted body = " R_xlen_t n, i; n = XLENGTH(x); for(i = 0; i+1 < n ; i++) if(REAL(x)[i] > REAL(x)[i+1]) return ScalarLogical(TRUE); return ScalarLogical(FALSE);"; f1 = inline::cfunction(sig = signature(x='numeric'), body=body) # example updated with only one call to REAL() body = " R_xlen_t n, i; n = XLENGTH(x); double* real_x = REAL(x); for(i = 0; i+1 < n ; i++) if(real_x[i] > real_x[i+1]) return ScalarLogical(TRUE); return ScalarLogical(FALSE);"; f2 = inline::cfunction(sig = signature(x='numeric'), body=body) # unsorted x.double = as.double(1:1e7) + 0 x.posixct = Sys.time() + x.double microbenchmark::microbenchmark( f1(x.double), f2(x.double), # faster due to one REAL() f1(x.posixct), f2(x.posixct), # faster due to one REAL() unit='ms', times=10) Unit: milliseconds expr min lq mean median uq max neval f1(x.double) 35.737629 37.991785 43.004432 38.575525 39.198533 80.85625 10 f2(x.double) 6.053373 6.064323 7.238750 6.092453 8.438550 10.69384 10 f1(x.posixct) 36.315705 36.542253 42.349745 38.355395 39.378262 81.59857 10 f2(x.posixct) 6.063946 6.070741 7.579176 6.138518 7.063024 13.94141 10 I would also like to suggest ALTREP support for POSIXct vectors, which are interpreted as type REAL in the c code, but do not gain the performance benefits of real vectors. Sorted vectors of timestamps are important for joining time series and in calls to findInterval(). # unsorted vectors x.double = as.double(1:1e7) + 0 x.posixct = Sys.time() + x.double # sort for altrep benefit x.double.sort <- sort(x.double) x.posixct.sort <- sort(x.posixct) microbenchmark::microbenchmark( is.unsorted(x.double), is.unsorted(x.double.sort), # faster due to altrep is.unsorted(x.posixct), is.unsorted(x.posixct.sort), # no altrep benefit unit='ms', times=10) Unit: milliseconds expr min lq mean median uq max neval is.unsorted(x.double) 16.987730 17.010008 17.1577173 17.0862785 17.308674 17.474432 10 is.unsorted(x.double.sort) 0.000378 0.000756 0.0065327 0.0075525 0.010195 0.011706 10 is.unsorted(x.posixct) 36.925876 37.084837 43.4125593 37.4695915 41.858589 78.742174 10 is.unsorted(x.posixct.sort) 36.966654 37.031975 51.1228686 37.1235380 37.777319 153.270170 10 Since there do not appear to be any tests for is.unsorted() these are some tests to be added for some types. # integer sequence x <- -10L:10L stopifnot(!is.unsorted(x, na.rm = F, strictly = T)) stopifnot(!is.unsorted(x, na.rm = F, strictly = F)) # integer not strictly x <- -10L:10L x[2] <- x[3] stopifnot( is.unsorted(x, na.rm = F, strictly = T)) stopifnot(!is.unsorted(x, na.rm = F, strictly = F)) # integer with NA x <- -10L:10L x[2] <- NA stopifnot(!is.unsorted(x, na.rm = T, strictly = F)) stopifnot(is.na(is.unsorted(x, na.rm = F, strictly = F))) # double x <- seq(from = -10, to = 10, by=0.01) stopifnot(!is.unsorted(x, na.rm = F, strictly = T)) stopifnot(!is.unsorted(x, na.rm = F, strictly = F)) # double not strictly x <- seq(from = -10, to = 10, by=0.01) x[2] <- x[3] stopifnot( is.unsorted(x, na.rm = F, strictly = T)) stopifnot(!is.unsorted(x, na.rm = F, strictly = F)) # double with NA x <- seq(from = -10, to = 10, by=0.01) x[length(x)] <- NA stopifnot(!is.unsorted(x, na.rm = T, strictly = F)) stopifnot(is.na(is.unsorted(x, na.rm = F, strictly = F))) # logical stopifnot(!is.unsorted( c(F, T, T), strictly = F)) stopifnot( is.unsorted( c(F, T, T), strictly = T)) stopifnot( is.unsorted( c(T, T, F), strictly = F)) stopifnot( is.unsorted( c(T, T, F), strictly = T)) # POSIXct x <- seq(from=as.POSIXct('2018-1-1'), to=as.POSIXct('2019-1-1'), by='day') stopifnot(!is.unsorted(x, na.rm = T, strictly = F)) stopifnot(!is.unsorted(x, na.rm = F, strictly = F)) # POSIXct not strictly x <- seq(from=as.POSIXct('2018-1-1'), to=as.POSIXct('2019-1-1'), by='day') x[2] <- x[3] stopifnot( is.unsorted(x, na.rm = F, strictly = T)) stopifnot(!is.unsorted(x, na.rm = F, strictly = F)) # POSIXct with NA x <- seq(from=as.POSIXct('2018-1-1'), to=as.POSIXct('2019-1-1'), by='day') x[length(x)] <- NA stopifnot(!is.unsorted(x, na.rm = T, strictly = F)) stopifnot(is.na(is.unsorted(x, na.rm = F, strictly = F))) [[alternative HTML version deleted]] ______________________________________________ R-devel@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-devel