https://gcc.gnu.org/bugzilla/show_bug.cgi?id=87232

            Bug ID: 87232
           Summary: modular inverse not used for is-multiple test
                    optimizations
           Product: gcc
           Version: 9.0
            Status: UNCONFIRMED
          Keywords: missed-optimization
          Severity: normal
          Priority: P3
         Component: middle-end
          Assignee: unassigned at gcc dot gnu.org
          Reporter: antoshkka at gmail dot com
  Target Milestone: ---

There's a paper on optimizing expressions like `return x % 679 == 0;`

http://duriansoftware.com/joe/Optimizing-is-multiple-checks-with-modular-arithmetic.html

Paper proposes to (if i'm not mistaken in calculations):
* replace `return x % A == 0;` with `return x * (modular inverse of A) <
(sizeof(x) * CHAR_BITS / A);` for odd A
* replace `return x % C == 0;` with `inv = x * (modular inverse of A); inv =
(inv >> B) | (inv << (sizeof(x) * CHAR_BITS - B)); return inv <=
(floor(sizeof(x) * CHAR_BITS / A) >> B);` for even C, where A is odd and C = (A
* pow(2, B))

Such tricks will allow to avoid imul and some mov instructions + use less
registers:

bool is_multiple_simple(unsigned x) {
  return x % 1738 == 0;
}

is_multiple_simple(unsigned int): # @is_multiple_simple(unsigned int)
  mov eax, edi
  mov ecx, 2530521583
  imul rcx, rax
  shr rcx, 42
  imul eax, ecx, 1738
  cmp edi, eax
  sete al
  ret


bool is_multiple_tuned(unsigned x) {
  unsigned inv = x * 148272749u;     // inverse of 869 mod 2**32
  inv = (inv >> 1u) | (inv << 31u);  // rotate by one bit
  return inv <= 2471212u;            // floor(2**32/869) >> 1
}

is_multiple_tuned(unsigned int): # @is_multiple_tuned(unsigned int)
  imul eax, edi, 148272749
  shrd eax, edi, 1
  cmp eax, 2471213
  setb al
  ret

Reply via email to