------- Comment #12 from potswa at mac dot com 2009-09-15 08:55 ------- Just for safekeeping, here's the body of the final code. It's much faster than the current revision on my machine, 2.2 sec for 100 iterations rotating a 10 million int vector 5 places left or right, vs 10.5 sec currently. Also it doesn't use a separate gcd function; the computation is built in.
for ( ;; ) { // preconditions: range in [p, p + n), stride = k if ( __k * 2 < __n ) { _RandomAccessIterator __q = __p + __k; for ( _Distance __i = 0; __i < __n - __k; ++ __i ) { iter_swap( __p ++, __q ++ ); } __n = __n % __k; if ( __n == 0 ) return; swap( __n, __k ); __k = __n - __k; } else { __k = __n - __k; _RandomAccessIterator __q = __p + __n; __p = __q - __k; for ( _Distance __i = 0; __i < __n - __k; ++ __i ) { iter_swap( -- __p, -- __q ); } __n = __n % __k; if ( __n == 0 ) return; swap( __n, __k ); } } -- http://gcc.gnu.org/bugzilla/show_bug.cgi?id=41351