Hi,

Following is version 4 of the patch proposed for master to fix PR104116. 
Bootstrapped/regtested on powerpc64le and x86_64 on linux. Kindly review.

There are some issues I found for ceil and round division, which I 
believe is existing.
For CEIL_DIV_EXPR, ROUND_DIV_EXPR in the unsigned case, when operand1 
(constant) is a 2 or power of 2, there are some shift/bitwise optimize done in
the folding phase (match.pd), due to which it does not use the implemented 
pattern to add compensation code. Therefore the following 4 test cases fail: 
1. pr104116-ceil-udiv-2.c
2. pr104116-ceil-udiv-pow2.c
3. pr104116-round-udiv-2.c
4. pr104116-round-udiv-pow2.c

Can I create a new PR for this in bugzilla?

Thanks and regards,
Avinash Jayakar

Changes from v3:
        - Add implementation for {ceil,round}_{div,mod} for signed and unsigned.
        - Split test cases into individual files. 
        - Added runtime check to verify functionality of the transformation.
        - Added 30 tests [2(rd,cl)*3(paths)*2(types)*2(div,mod) 
        + 3(paths)*2(div,mod) = 30] 
Changes from v2:
        - Correct all formatting
        - Update test case with a main function and remove powerpc
          specific option
        - Added null initialization for pattern_stmt and def_stmt in
          vect_recog_divmod_pattern
Changes from v1:
        - Added new tests for checking vectorization of FLOOR_{DIV.MOD}
          for multiple paths.
        - Incorporated review comments to use proper vector masks and
          checks for if the target supports generated code.

Added a new helper function "add_code_for_floorceilround_divmod" in
tree-vect-patterns.cc for adding compensating code for each of the op
{FLOOR,ROUND,CEIL}_{DIV,MOD}_EXPR. This function checks if target supports all 
required operations required to implement these operation and generates
vectorized code for the respective operations. Based on the following logic 
FLOOR_{DIV,MOD}
        r = x %[fl] y; 
        r = x % y; if (r && (x ^ y) < 0) r += y;
        r = x/[fl] y;
        r = x % y; d = x/y; if (r && (x ^ y) < 0) d--;
CEIL_{DIV,MOD} (unsigned)
        r = x %[cl] y;
        r = x % y; if (r) r -= y;
        r = x/[cl] y;
        r = x % y; d = x/y; if (r) d++;
CEIL_{DIV,MOD} (signed)
        r = x %[cl] y; 
        r = x % y; if (r && (x ^ y) >= 0) r -= y;
        r = x/[cl] y;
        r = x % y; d = x/y; if (r && (x ^ y) >= 0) d++;
ROUND_{DIV,MOD} (unsigned)
        r = x %[rd] y;
        r = x % y; if (r > ((y-1)/2)) r -= y;
        r = x/[rd] y;
        r = x % y; d = x/y; if (r > ((y-1)/2)) d++;
ROUND_{DIV,MOD} (signed)
        r = x %[rd] y; 
        r = x % y; if (r > ((y-1)/2)) 
                {if ((x ^ y) >= 0) r -= y; else r += y;}
        r = x/[rd] y;
        r = x % y; d = x/y; if ((r > ((y-1)/2)) && (x ^ y) >= 0) 
                {if ((x ^ y) >= 0) d++; else d--;}
each of the case is implemented in a vectorized form.
This function is then called in each of the path in vect_recog_divmod_pattern, 
which there are 3, based on value of constant operand1,
1. == 2
2. == power of 2
3. otherwise

2025-10-08  Avinash Jayakar  <[email protected]>

gcc/ChangeLog:
        PR vect/104116
        * tree-vect-patterns.cc (add_code_for_floorceilround_divmod): patt recog
        for {FLOOR,ROUND,CEIL}_{DIV,MOD}_EXPR.
        (vect_recog_divmod_pattern): Call add_code_for_floorceilround_divmod
        after computing div/mod for each control path.

gcc/testsuite/ChangeLog:
        PR vect/104116
        * gcc.dg/vect/pr104116-ceil-div-2.c: New test.
        * gcc.dg/vect/pr104116-ceil-div-pow2.c: New test.
        * gcc.dg/vect/pr104116-ceil-div.c: New test.
        * gcc.dg/vect/pr104116-ceil-mod-2.c: New test.
        * gcc.dg/vect/pr104116-ceil-mod-pow2.c: New test.
        * gcc.dg/vect/pr104116-ceil-mod.c: New test.
        * gcc.dg/vect/pr104116-ceil-udiv-2.c: New test.
        * gcc.dg/vect/pr104116-ceil-udiv-pow2.c: New test.
        * gcc.dg/vect/pr104116-ceil-udiv.c: New test.
        * gcc.dg/vect/pr104116-ceil-umod-2.c: New test.
        * gcc.dg/vect/pr104116-ceil-umod-pow2.c: New test.
        * gcc.dg/vect/pr104116-ceil-umod.c: New test.
        * gcc.dg/vect/pr104116-floor-div-2.c: New test.
        * gcc.dg/vect/pr104116-floor-div-pow2.c: New test.
        * gcc.dg/vect/pr104116-floor-div.c: New test.
        * gcc.dg/vect/pr104116-floor-mod-2.c: New test.
        * gcc.dg/vect/pr104116-floor-mod-pow2.c: New test.
        * gcc.dg/vect/pr104116-floor-mod.c: New test.
        * gcc.dg/vect/pr104116-round-div-2.c: New test.
        * gcc.dg/vect/pr104116-round-div-pow2.c: New test.
        * gcc.dg/vect/pr104116-round-div.c: New test.
        * gcc.dg/vect/pr104116-round-mod-2.c: New test.
        * gcc.dg/vect/pr104116-round-mod-pow2.c: New test.
        * gcc.dg/vect/pr104116-round-mod.c: New test.
        * gcc.dg/vect/pr104116-round-udiv-2.c: New test.
        * gcc.dg/vect/pr104116-round-udiv-pow2.c: New test.
        * gcc.dg/vect/pr104116-round-udiv.c: New test.
        * gcc.dg/vect/pr104116-round-umod-2.c: New test.
        * gcc.dg/vect/pr104116-round-umod-pow2.c: New test.
        * gcc.dg/vect/pr104116-round-umod.c: New test.
        * gcc.dg/vect/pr104116.h: New test.

---
 .../gcc.dg/vect/pr104116-ceil-div-2.c         |  29 ++
 .../gcc.dg/vect/pr104116-ceil-div-pow2.c      |  30 ++
 gcc/testsuite/gcc.dg/vect/pr104116-ceil-div.c |  30 ++
 .../gcc.dg/vect/pr104116-ceil-mod-2.c         |  30 ++
 .../gcc.dg/vect/pr104116-ceil-mod-pow2.c      |  30 ++
 gcc/testsuite/gcc.dg/vect/pr104116-ceil-mod.c |  30 ++
 .../gcc.dg/vect/pr104116-ceil-udiv-2.c        |  29 ++
 .../gcc.dg/vect/pr104116-ceil-udiv-pow2.c     |  29 ++
 .../gcc.dg/vect/pr104116-ceil-udiv.c          |  29 ++
 .../gcc.dg/vect/pr104116-ceil-umod-2.c        |  30 ++
 .../gcc.dg/vect/pr104116-ceil-umod-pow2.c     |  30 ++
 .../gcc.dg/vect/pr104116-ceil-umod.c          |  30 ++
 .../gcc.dg/vect/pr104116-floor-div-2.c        |  30 ++
 .../gcc.dg/vect/pr104116-floor-div-pow2.c     |  30 ++
 .../gcc.dg/vect/pr104116-floor-div.c          |  30 ++
 .../gcc.dg/vect/pr104116-floor-mod-2.c        |  31 ++
 .../gcc.dg/vect/pr104116-floor-mod-pow2.c     |  31 ++
 .../gcc.dg/vect/pr104116-floor-mod.c          |  31 ++
 .../gcc.dg/vect/pr104116-round-div-2.c        |  31 ++
 .../gcc.dg/vect/pr104116-round-div-pow2.c     |  31 ++
 .../gcc.dg/vect/pr104116-round-div.c          |  31 ++
 .../gcc.dg/vect/pr104116-round-mod-2.c        |  31 ++
 .../gcc.dg/vect/pr104116-round-mod-pow2.c     |  31 ++
 .../gcc.dg/vect/pr104116-round-mod.c          |  31 ++
 .../gcc.dg/vect/pr104116-round-udiv-2.c       |  31 ++
 .../gcc.dg/vect/pr104116-round-udiv-pow2.c    |  31 ++
 .../gcc.dg/vect/pr104116-round-udiv.c         |  32 ++
 .../gcc.dg/vect/pr104116-round-umod-2.c       |  31 ++
 .../gcc.dg/vect/pr104116-round-umod-pow2.c    |  31 ++
 .../gcc.dg/vect/pr104116-round-umod.c         |  31 ++
 gcc/testsuite/gcc.dg/vect/pr104116.h          | 201 ++++++++++
 gcc/tree-vect-patterns.cc                     | 368 +++++++++++++++++-
 32 files changed, 1465 insertions(+), 16 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr104116-ceil-div-2.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr104116-ceil-div-pow2.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr104116-ceil-div.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr104116-ceil-mod-2.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr104116-ceil-mod-pow2.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr104116-ceil-mod.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr104116-ceil-udiv-2.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr104116-ceil-udiv-pow2.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr104116-ceil-udiv.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr104116-ceil-umod-2.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr104116-ceil-umod-pow2.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr104116-ceil-umod.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr104116-floor-div-2.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr104116-floor-div-pow2.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr104116-floor-div.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr104116-floor-mod-2.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr104116-floor-mod-pow2.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr104116-floor-mod.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr104116-round-div-2.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr104116-round-div-pow2.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr104116-round-div.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr104116-round-mod-2.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr104116-round-mod-pow2.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr104116-round-mod.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr104116-round-udiv-2.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr104116-round-udiv-pow2.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr104116-round-udiv.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr104116-round-umod-2.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr104116-round-umod-pow2.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr104116-round-umod.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr104116.h

diff --git a/gcc/testsuite/gcc.dg/vect/pr104116-ceil-div-2.c 
b/gcc/testsuite/gcc.dg/vect/pr104116-ceil-div-2.c
new file mode 100644
index 00000000000..7078776a577
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr104116-ceil-div-2.c
@@ -0,0 +1,29 @@
+/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */
+/* { dg-require-effective-target vect_int} */
+/* { dg-require-effective-target vect_condition} */
+/* { dg-require-effective-target vect_shift} */
+
+
+
+#include "pr104116.h"
+#include "tree-vect.h"
+
+TEST_FN(__CEIL_DIV, 2, div)
+
+int main (void)
+{
+  check_vect ();
+  int *a = (int*)&arr;
+  init_arr(a, N);
+  div(a);
+  for (int i=0; i<N; i++)
+  {
+    int expected = cl_div (i - N/2, 2);
+    if (expected != a[i])
+      abort();
+  }
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1 "vect" } } 
*/
\ No newline at end of file
diff --git a/gcc/testsuite/gcc.dg/vect/pr104116-ceil-div-pow2.c 
b/gcc/testsuite/gcc.dg/vect/pr104116-ceil-div-pow2.c
new file mode 100644
index 00000000000..7aa9ae84627
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr104116-ceil-div-pow2.c
@@ -0,0 +1,30 @@
+/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */
+/* { dg-require-effective-target vect_int} */
+/* { dg-require-effective-target vect_condition} */
+/* { dg-require-effective-target vect_shift} */
+
+
+
+#include "pr104116.h"
+#include "tree-vect.h"
+
+
+TEST_FN(__CEIL_DIV, 8, div)
+
+int main (void)
+{
+  check_vect ();
+  int *a = (int*)&arr;
+  init_arr(a, N);
+  div(a);
+  for (int i=0; i<N; i++)
+  {
+    int expected = cl_div (i - N/2, 8);
+    if (expected != a[i])
+      abort();
+  }
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1 "vect" } } 
*/
\ No newline at end of file
diff --git a/gcc/testsuite/gcc.dg/vect/pr104116-ceil-div.c 
b/gcc/testsuite/gcc.dg/vect/pr104116-ceil-div.c
new file mode 100644
index 00000000000..6f903ffda92
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr104116-ceil-div.c
@@ -0,0 +1,30 @@
+/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */
+/* { dg-require-effective-target vect_int} */
+/* { dg-require-effective-target vect_condition} */
+/* { dg-require-effective-target vect_shift} */
+
+
+
+#include "pr104116.h"
+#include "tree-vect.h"
+
+
+TEST_FN(__CEIL_DIV, 19, div)
+
+int main (void)
+{
+  check_vect ();
+  int *a = (int*)&arr;
+  init_arr(a, N);
+  div(a);
+  for (int i=0; i<N; i++)
+  {
+    int expected = cl_div (i - N/2, 19);
+    if (expected != a[i])
+      abort();
+  }
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1 "vect" } } 
*/
\ No newline at end of file
diff --git a/gcc/testsuite/gcc.dg/vect/pr104116-ceil-mod-2.c 
b/gcc/testsuite/gcc.dg/vect/pr104116-ceil-mod-2.c
new file mode 100644
index 00000000000..ee6dfb92de9
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr104116-ceil-mod-2.c
@@ -0,0 +1,30 @@
+/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */
+/* { dg-require-effective-target vect_int} */
+/* { dg-require-effective-target vect_condition} */
+/* { dg-require-effective-target vect_shift} */
+
+
+
+#include "pr104116.h"
+#include "tree-vect.h"
+
+
+TEST_FN(__CEIL_MOD, 2, div)
+
+int main (void)
+{
+  check_vect ();
+  int *a = (int*)&arr;
+  init_arr(a, N);
+  div(a);
+  for (int i=0; i<N; i++)
+  {
+    int expected = cl_mod (i - N/2, 2);
+    if (expected != a[i])
+      abort();
+  }
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1 "vect" } } 
*/
\ No newline at end of file
diff --git a/gcc/testsuite/gcc.dg/vect/pr104116-ceil-mod-pow2.c 
b/gcc/testsuite/gcc.dg/vect/pr104116-ceil-mod-pow2.c
new file mode 100644
index 00000000000..de409ea349a
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr104116-ceil-mod-pow2.c
@@ -0,0 +1,30 @@
+/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */
+/* { dg-require-effective-target vect_int} */
+/* { dg-require-effective-target vect_condition} */
+/* { dg-require-effective-target vect_shift} */
+
+
+
+#include "pr104116.h"
+#include "tree-vect.h"
+
+
+TEST_FN(__CEIL_MOD, 8, div)
+
+int main (void)
+{
+  check_vect ();
+  unsigned int *a = (unsigned int*)&arr;
+  init_arr(a, N);
+  div(a);
+  for (int i=0; i<N; i++)
+  {
+    unsigned int expected = cl_mod (i - N/2, 8);
+    if (expected != a[i])
+      abort();
+  }
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1 "vect" } } 
*/
\ No newline at end of file
diff --git a/gcc/testsuite/gcc.dg/vect/pr104116-ceil-mod.c 
b/gcc/testsuite/gcc.dg/vect/pr104116-ceil-mod.c
new file mode 100644
index 00000000000..f2ba9367461
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr104116-ceil-mod.c
@@ -0,0 +1,30 @@
+/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */
+/* { dg-require-effective-target vect_int} */
+/* { dg-require-effective-target vect_condition} */
+/* { dg-require-effective-target vect_shift} */
+
+
+
+#include "pr104116.h"
+#include "tree-vect.h"
+
+
+TEST_FN(__CEIL_MOD, 19, div)
+
+int main (void)
+{
+  check_vect ();
+  int *a = (int*)&arr;
+  init_arr(a, N);
+  div(a);
+  for (int i=0; i<N; i++)
+  {
+    int expected = cl_mod (i - N/2, 19);
+    if (expected != a[i])
+      abort();
+  }
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1 "vect" } } 
*/
\ No newline at end of file
diff --git a/gcc/testsuite/gcc.dg/vect/pr104116-ceil-udiv-2.c 
b/gcc/testsuite/gcc.dg/vect/pr104116-ceil-udiv-2.c
new file mode 100644
index 00000000000..db1f797c1da
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr104116-ceil-udiv-2.c
@@ -0,0 +1,29 @@
+/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */
+/* { dg-require-effective-target vect_int} */
+/* { dg-require-effective-target vect_condition} */
+/* { dg-require-effective-target vect_shift} */
+
+
+
+#include "pr104116.h"
+#include "tree-vect.h"
+
+TEST_FN_UNSIGNED(__CEIL_DIV, 2u, udiv)
+
+int main (void)
+{
+  check_vect ();
+  unsigned int *ua = (unsigned int*)&uarr;
+  init_uarr(ua, N);
+  udiv(ua);
+  for (int i=0; i<N; i++)
+  {
+    unsigned int expected = cl_udiv (0xf0000000 + i, 2);
+    if (expected != ua[i])
+      abort();
+  }
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1 "vect" } } 
*/
\ No newline at end of file
diff --git a/gcc/testsuite/gcc.dg/vect/pr104116-ceil-udiv-pow2.c 
b/gcc/testsuite/gcc.dg/vect/pr104116-ceil-udiv-pow2.c
new file mode 100644
index 00000000000..06b4257d58f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr104116-ceil-udiv-pow2.c
@@ -0,0 +1,29 @@
+/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */
+/* { dg-require-effective-target vect_int} */
+/* { dg-require-effective-target vect_condition} */
+/* { dg-require-effective-target vect_shift} */
+
+
+
+#include "pr104116.h"
+#include "tree-vect.h"
+
+TEST_FN_UNSIGNED(__CEIL_DIV, 8u, udiv)
+
+int main (void)
+{
+  check_vect ();
+  unsigned int *ua = (unsigned int*)&uarr;
+  init_uarr(ua, N);
+  udiv(ua);
+  for (int i=0; i<N; i++)
+  {
+    unsigned int expected = cl_udiv (0xf0000000 + i, 8);
+    if (expected != ua[i])
+      abort();
+  }
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1 "vect" } } 
*/
\ No newline at end of file
diff --git a/gcc/testsuite/gcc.dg/vect/pr104116-ceil-udiv.c 
b/gcc/testsuite/gcc.dg/vect/pr104116-ceil-udiv.c
new file mode 100644
index 00000000000..ef6e8563ce0
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr104116-ceil-udiv.c
@@ -0,0 +1,29 @@
+/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */
+/* { dg-require-effective-target vect_int} */
+/* { dg-require-effective-target vect_condition} */
+/* { dg-require-effective-target vect_shift} */
+
+
+
+#include "pr104116.h"
+#include "tree-vect.h"
+
+TEST_FN_UNSIGNED(__CEIL_DIV, 19u, udiv)
+
+int main (void)
+{
+  check_vect ();
+  unsigned int *ua = (unsigned int*)&uarr;
+  init_uarr(ua, N);
+  udiv(ua);
+  for (int i=0; i<N; i++)
+  {
+    unsigned int expected = cl_udiv (0xf0000000 + i, 19);
+    if (expected != ua[i])
+      abort();
+  }
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1 "vect" } } 
*/
\ No newline at end of file
diff --git a/gcc/testsuite/gcc.dg/vect/pr104116-ceil-umod-2.c 
b/gcc/testsuite/gcc.dg/vect/pr104116-ceil-umod-2.c
new file mode 100644
index 00000000000..2d0a5dbaf77
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr104116-ceil-umod-2.c
@@ -0,0 +1,30 @@
+/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */
+/* { dg-require-effective-target vect_int} */
+/* { dg-require-effective-target vect_condition} */
+/* { dg-require-effective-target vect_shift} */
+
+
+
+#include "pr104116.h"
+#include "tree-vect.h"
+
+
+TEST_FN_UNSIGNED (__CEIL_MOD, 2u, mod)
+
+int main (void)
+{
+  check_vect ();
+  unsigned int *a = (unsigned int*)&uarr;
+  init_uarr(a, N);
+  mod(a);
+  for (int i=0; i<N; i++)
+  {
+    unsigned int expected = cl_umod (0xf0000000 + i, 2);
+    if (expected != a[i])
+      abort();
+  }
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1 "vect" } } 
*/
\ No newline at end of file
diff --git a/gcc/testsuite/gcc.dg/vect/pr104116-ceil-umod-pow2.c 
b/gcc/testsuite/gcc.dg/vect/pr104116-ceil-umod-pow2.c
new file mode 100644
index 00000000000..2d0a5dbaf77
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr104116-ceil-umod-pow2.c
@@ -0,0 +1,30 @@
+/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */
+/* { dg-require-effective-target vect_int} */
+/* { dg-require-effective-target vect_condition} */
+/* { dg-require-effective-target vect_shift} */
+
+
+
+#include "pr104116.h"
+#include "tree-vect.h"
+
+
+TEST_FN_UNSIGNED (__CEIL_MOD, 2u, mod)
+
+int main (void)
+{
+  check_vect ();
+  unsigned int *a = (unsigned int*)&uarr;
+  init_uarr(a, N);
+  mod(a);
+  for (int i=0; i<N; i++)
+  {
+    unsigned int expected = cl_umod (0xf0000000 + i, 2);
+    if (expected != a[i])
+      abort();
+  }
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1 "vect" } } 
*/
\ No newline at end of file
diff --git a/gcc/testsuite/gcc.dg/vect/pr104116-ceil-umod.c 
b/gcc/testsuite/gcc.dg/vect/pr104116-ceil-umod.c
new file mode 100644
index 00000000000..949a5091e36
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr104116-ceil-umod.c
@@ -0,0 +1,30 @@
+/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */
+/* { dg-require-effective-target vect_int} */
+/* { dg-require-effective-target vect_condition} */
+/* { dg-require-effective-target vect_shift} */
+
+
+
+#include "pr104116.h"
+#include "tree-vect.h"
+
+
+TEST_FN_UNSIGNED (__CEIL_MOD, 19u, mod)
+
+int main (void)
+{
+  check_vect ();
+  unsigned int *a = (unsigned int*)&uarr;
+  init_uarr(a, N);
+  mod(a);
+  for (int i=0; i<N; i++)
+  {
+    unsigned int expected = cl_umod (0xf0000000 + i, 19);
+    if (expected != a[i])
+      abort();
+  }
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1 "vect" } } 
*/
\ No newline at end of file
diff --git a/gcc/testsuite/gcc.dg/vect/pr104116-floor-div-2.c 
b/gcc/testsuite/gcc.dg/vect/pr104116-floor-div-2.c
new file mode 100644
index 00000000000..d93e0513984
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr104116-floor-div-2.c
@@ -0,0 +1,30 @@
+/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */
+/* { dg-require-effective-target vect_int} */
+/* { dg-require-effective-target vect_condition} */
+/* { dg-require-effective-target vect_shift} */
+
+
+
+#include "pr104116.h"
+#include "tree-vect.h"
+
+TEST_FN(__FLOOR_DIV, 2, div_2)
+
+int main (void)
+{
+  check_vect ();
+  int * a = (int*)&arr;
+  init_arr(a, N);
+  div_2(a);
+  for (int i=0; i<N; i++)
+  {
+    int expected = fl_div (i - N/2, 2);
+    if (expected != a[i])
+      abort ();
+  }
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1 "vect" } } 
*/
+
+
diff --git a/gcc/testsuite/gcc.dg/vect/pr104116-floor-div-pow2.c 
b/gcc/testsuite/gcc.dg/vect/pr104116-floor-div-pow2.c
new file mode 100644
index 00000000000..9e986a75dd1
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr104116-floor-div-pow2.c
@@ -0,0 +1,30 @@
+/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */
+/* { dg-require-effective-target vect_int} */
+/* { dg-require-effective-target vect_condition} */
+/* { dg-require-effective-target vect_shift} */
+
+
+
+#include "pr104116.h"
+#include "tree-vect.h"
+
+TEST_FN(__FLOOR_DIV, 8, div_2)
+
+int main (void)
+{
+  check_vect ();
+  int * a = (int*)&arr;
+  init_arr(a, N);
+  div_2(a);
+  for (int i=0; i<N; i++)
+  {
+    int expected = fl_div (i - N/2, 8);
+    if (expected != a[i])
+      abort ();
+  }
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1 "vect" } } 
*/
+
+
diff --git a/gcc/testsuite/gcc.dg/vect/pr104116-floor-div.c 
b/gcc/testsuite/gcc.dg/vect/pr104116-floor-div.c
new file mode 100644
index 00000000000..89dd270364c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr104116-floor-div.c
@@ -0,0 +1,30 @@
+/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */
+/* { dg-require-effective-target vect_int} */
+/* { dg-require-effective-target vect_condition} */
+/* { dg-require-effective-target vect_shift} */
+
+
+
+#include "pr104116.h"
+#include "tree-vect.h"
+
+TEST_FN(__FLOOR_DIV, 19, div_2)
+
+int main (void)
+{
+  check_vect ();
+  int * a = (int*)&arr;
+  init_arr(a, N);
+  div_2(a);
+  for (int i=0; i<N; i++)
+  {
+    int expected = fl_div (i - N/2, 19);
+    if (expected != a[i])
+      abort ();
+  }
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1 "vect" } } 
*/
+
+
diff --git a/gcc/testsuite/gcc.dg/vect/pr104116-floor-mod-2.c 
b/gcc/testsuite/gcc.dg/vect/pr104116-floor-mod-2.c
new file mode 100644
index 00000000000..0c5c1621e0d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr104116-floor-mod-2.c
@@ -0,0 +1,31 @@
+/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */
+/* { dg-require-effective-target vect_int} */
+/* { dg-require-effective-target vect_condition} */
+/* { dg-require-effective-target vect_shift} */
+
+
+
+#include "pr104116.h"
+#include "tree-vect.h"
+
+TEST_FN(__FLOOR_MOD, 2, mod)
+
+int main (void)
+{
+  check_vect ();
+  int * a = (int*)&arr;
+  init_arr(a, N);
+  mod(a);
+  for (int i=0; i<N; i++)
+  {
+    int expected = fl_mod (i - N/2, 2);
+    if (expected != a[i])
+      abort ();
+  }
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1 "vect" } } 
*/
+
+
diff --git a/gcc/testsuite/gcc.dg/vect/pr104116-floor-mod-pow2.c 
b/gcc/testsuite/gcc.dg/vect/pr104116-floor-mod-pow2.c
new file mode 100644
index 00000000000..f3de1450000
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr104116-floor-mod-pow2.c
@@ -0,0 +1,31 @@
+/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */
+/* { dg-require-effective-target vect_int} */
+/* { dg-require-effective-target vect_condition} */
+/* { dg-require-effective-target vect_shift} */
+
+
+
+#include "pr104116.h"
+#include "tree-vect.h"
+
+TEST_FN(__FLOOR_MOD, 8, mod)
+
+int main (void)
+{
+  check_vect ();
+  int * a = (int*)&arr;
+  init_arr(a, N);
+  mod(a);
+  for (int i=0; i<N; i++)
+  {
+    int expected = fl_mod (i - N/2, 8);
+    if (expected != a[i])
+      abort ();
+  }
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1 "vect" } } 
*/
+
+
diff --git a/gcc/testsuite/gcc.dg/vect/pr104116-floor-mod.c 
b/gcc/testsuite/gcc.dg/vect/pr104116-floor-mod.c
new file mode 100644
index 00000000000..3e6bbe978b5
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr104116-floor-mod.c
@@ -0,0 +1,31 @@
+/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */
+/* { dg-require-effective-target vect_int} */
+/* { dg-require-effective-target vect_condition} */
+/* { dg-require-effective-target vect_shift} */
+
+
+
+#include "pr104116.h"
+#include "tree-vect.h"
+
+TEST_FN(__FLOOR_MOD, 19, mod)
+
+int main (void)
+{
+  check_vect ();
+  int * a = (int*)&arr;
+  init_arr(a, N);
+  mod(a);
+  for (int i=0; i<N; i++)
+  {
+    int expected = fl_mod (i - N/2, 19);
+    if (expected != a[i])
+      abort ();
+  }
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1 "vect" } } 
*/
+
+
diff --git a/gcc/testsuite/gcc.dg/vect/pr104116-round-div-2.c 
b/gcc/testsuite/gcc.dg/vect/pr104116-round-div-2.c
new file mode 100644
index 00000000000..c242ccb9b61
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr104116-round-div-2.c
@@ -0,0 +1,31 @@
+/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */
+/* { dg-require-effective-target vect_int} */
+/* { dg-require-effective-target vect_condition} */
+/* { dg-require-effective-target vect_shift} */
+
+
+
+#include "pr104116.h"
+#include "tree-vect.h"
+
+TEST_FN(__ROUND_DIV, 2, div)
+
+int main (void)
+{
+  check_vect ();
+  int * a = (int*)&arr;
+  init_arr(a, N);
+  div(a);
+  for (int i=0; i<N; i++)
+  {
+    int expected = rd_div (i - N/2, 2);
+    if (expected != a[i])
+      abort ();
+  }
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1 "vect" } } 
*/
+
+
diff --git a/gcc/testsuite/gcc.dg/vect/pr104116-round-div-pow2.c 
b/gcc/testsuite/gcc.dg/vect/pr104116-round-div-pow2.c
new file mode 100644
index 00000000000..365c2c59866
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr104116-round-div-pow2.c
@@ -0,0 +1,31 @@
+/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */
+/* { dg-require-effective-target vect_int} */
+/* { dg-require-effective-target vect_condition} */
+/* { dg-require-effective-target vect_shift} */
+
+
+
+#include "pr104116.h"
+#include "tree-vect.h"
+
+TEST_FN(__ROUND_DIV, 8, div)
+
+int main (void)
+{
+  check_vect ();
+  int * a = (int*)&arr;
+  init_arr(a, N);
+  div(a);
+  for (int i=0; i<N; i++)
+  {
+    int expected = rd_div (i - N/2, 8);
+    if (expected != a[i])
+      abort ();
+  }
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1 "vect" } } 
*/
+
+
diff --git a/gcc/testsuite/gcc.dg/vect/pr104116-round-div.c 
b/gcc/testsuite/gcc.dg/vect/pr104116-round-div.c
new file mode 100644
index 00000000000..5c377d118ab
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr104116-round-div.c
@@ -0,0 +1,31 @@
+/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */
+/* { dg-require-effective-target vect_int} */
+/* { dg-require-effective-target vect_condition} */
+/* { dg-require-effective-target vect_shift} */
+
+
+
+#include "pr104116.h"
+#include "tree-vect.h"
+
+TEST_FN(__ROUND_DIV, 19, div)
+
+int main (void)
+{
+  check_vect ();
+  int * a = (int*)&arr;
+  init_arr(a, N);
+  div(a);
+  for (int i=0; i<N; i++)
+  {
+    int expected = rd_div (i - N/2, 19);
+    if (expected != a[i])
+      abort();
+  }
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1 "vect" } } 
*/
+
+
diff --git a/gcc/testsuite/gcc.dg/vect/pr104116-round-mod-2.c 
b/gcc/testsuite/gcc.dg/vect/pr104116-round-mod-2.c
new file mode 100644
index 00000000000..6430b3ea9ad
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr104116-round-mod-2.c
@@ -0,0 +1,31 @@
+/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */
+/* { dg-require-effective-target vect_int} */
+/* { dg-require-effective-target vect_condition} */
+/* { dg-require-effective-target vect_shift} */
+
+
+
+#include "pr104116.h"
+#include "tree-vect.h"
+
+TEST_FN(__ROUND_MOD, 2, mod)
+
+int main (void)
+{
+  check_vect ();
+  int * a = (int*)&arr;
+  init_arr(a, N);
+  mod(a);
+  for (int i=0; i<N; i++)
+  {
+    int expected = rd_mod (i - N/2, 2);
+    if (expected != a[i])
+      abort ();
+  }
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1 "vect" } } 
*/
+
+
diff --git a/gcc/testsuite/gcc.dg/vect/pr104116-round-mod-pow2.c 
b/gcc/testsuite/gcc.dg/vect/pr104116-round-mod-pow2.c
new file mode 100644
index 00000000000..46c1789e939
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr104116-round-mod-pow2.c
@@ -0,0 +1,31 @@
+/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */
+/* { dg-require-effective-target vect_int} */
+/* { dg-require-effective-target vect_condition} */
+/* { dg-require-effective-target vect_shift} */
+
+
+
+#include "pr104116.h"
+#include "tree-vect.h"
+
+TEST_FN(__ROUND_MOD, 8, mod)
+
+int main (void)
+{
+  check_vect ();
+  int * a = (int*)&arr;
+  init_arr(a, N);
+  mod(a);
+  for (int i=0; i<N; i++)
+  {
+    int expected = rd_mod (i - N/2, 8);
+    if (expected != a[i])
+      abort ();
+  }
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1 "vect" } } 
*/
+
+
diff --git a/gcc/testsuite/gcc.dg/vect/pr104116-round-mod.c 
b/gcc/testsuite/gcc.dg/vect/pr104116-round-mod.c
new file mode 100644
index 00000000000..e7ca44e2f84
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr104116-round-mod.c
@@ -0,0 +1,31 @@
+/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */
+/* { dg-require-effective-target vect_int} */
+/* { dg-require-effective-target vect_condition} */
+/* { dg-require-effective-target vect_shift} */
+
+
+
+#include "pr104116.h"
+#include "tree-vect.h"
+
+TEST_FN(__ROUND_MOD, 19, mod)
+
+int main (void)
+{
+  check_vect ();
+  int * a = (int*)&arr;
+  init_arr(a, N);
+  mod(a);
+  for (int i=0; i<N; i++)
+  {
+    int expected = rd_mod (i - N/2, 19);
+    if (expected != a[i])
+      abort ();
+  }
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1 "vect" } } 
*/
+
+
diff --git a/gcc/testsuite/gcc.dg/vect/pr104116-round-udiv-2.c 
b/gcc/testsuite/gcc.dg/vect/pr104116-round-udiv-2.c
new file mode 100644
index 00000000000..4d42f4e3c02
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr104116-round-udiv-2.c
@@ -0,0 +1,31 @@
+/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */
+/* { dg-require-effective-target vect_int} */
+/* { dg-require-effective-target vect_condition} */
+/* { dg-require-effective-target vect_shift} */
+
+
+
+#include "pr104116.h"
+#include "tree-vect.h"
+
+TEST_FN_UNSIGNED(__ROUND_DIV, 2u, div)
+
+int main (void)
+{
+  check_vect ();
+  unsigned int * a = (unsigned int*)&uarr;
+  init_uarr(a, N);
+  div(a);
+  for (unsigned int i=0; i<N; i++)
+  {
+    unsigned int expected = rd_udiv (0xf0000000 + i, 2);
+    if (expected != a[i])
+      abort ();
+  }
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1 "vect" } } 
*/
+
+
diff --git a/gcc/testsuite/gcc.dg/vect/pr104116-round-udiv-pow2.c 
b/gcc/testsuite/gcc.dg/vect/pr104116-round-udiv-pow2.c
new file mode 100644
index 00000000000..137b249dc44
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr104116-round-udiv-pow2.c
@@ -0,0 +1,31 @@
+/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */
+/* { dg-require-effective-target vect_int} */
+/* { dg-require-effective-target vect_condition} */
+/* { dg-require-effective-target vect_shift} */
+
+
+
+#include "pr104116.h"
+#include "tree-vect.h"
+
+TEST_FN_UNSIGNED(__ROUND_DIV, 8u, div)
+
+int main (void)
+{
+  check_vect ();
+  unsigned int * a = (unsigned int*)&uarr;
+  init_uarr(a, N);
+  div(a);
+  for (unsigned int i=0; i<N; i++)
+  {
+    unsigned int expected = rd_udiv (0xf0000000 + i, 8);
+    if (expected != a[i])
+      abort ();
+  }
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1 "vect" } } 
*/
+
+
diff --git a/gcc/testsuite/gcc.dg/vect/pr104116-round-udiv.c 
b/gcc/testsuite/gcc.dg/vect/pr104116-round-udiv.c
new file mode 100644
index 00000000000..183a930aef9
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr104116-round-udiv.c
@@ -0,0 +1,32 @@
+/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */
+/* { dg-require-effective-target vect_int} */
+/* { dg-require-effective-target vect_condition} */
+/* { dg-require-effective-target vect_shift} */
+
+
+
+#include "pr104116.h"
+#include "tree-vect.h"
+
+TEST_FN_UNSIGNED(__ROUND_DIV, 19u, div)
+
+int main (void)
+{
+  check_vect ();
+  unsigned int * a = (unsigned int*)&uarr;
+  init_uarr(a, N);
+  div(a);
+  for (unsigned int i=0; i<N; i++)
+  {
+    unsigned int expected = rd_udiv (0xf0000000 + i, 19);
+    if (expected != a[i])
+      abort ();
+
+  }
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1 "vect" } } 
*/
+
+
diff --git a/gcc/testsuite/gcc.dg/vect/pr104116-round-umod-2.c 
b/gcc/testsuite/gcc.dg/vect/pr104116-round-umod-2.c
new file mode 100644
index 00000000000..f321e0e5c2d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr104116-round-umod-2.c
@@ -0,0 +1,31 @@
+/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */
+/* { dg-require-effective-target vect_int} */
+/* { dg-require-effective-target vect_condition} */
+/* { dg-require-effective-target vect_shift} */
+
+
+
+#include "pr104116.h"
+#include "tree-vect.h"
+
+TEST_FN_UNSIGNED(__ROUND_MOD, 2u, mod)
+
+int main (void)
+{
+  check_vect ();
+  unsigned int * a = (unsigned int*)&uarr;
+  init_uarr(a, N);
+  mod(a);
+  for (unsigned int i=0; i<N; i++)
+  {
+    unsigned int expected = rd_umod (0xf0000000 + i, 2);
+    if (expected != a[i])
+      abort ();
+  }
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1 "vect" } } 
*/
+
+
diff --git a/gcc/testsuite/gcc.dg/vect/pr104116-round-umod-pow2.c 
b/gcc/testsuite/gcc.dg/vect/pr104116-round-umod-pow2.c
new file mode 100644
index 00000000000..041ecd17f56
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr104116-round-umod-pow2.c
@@ -0,0 +1,31 @@
+/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */
+/* { dg-require-effective-target vect_int} */
+/* { dg-require-effective-target vect_condition} */
+/* { dg-require-effective-target vect_shift} */
+
+
+
+#include "pr104116.h"
+#include "tree-vect.h"
+
+TEST_FN_UNSIGNED(__ROUND_MOD, 8u, mod)
+
+int main (void)
+{
+  check_vect ();
+  unsigned int * a = (unsigned int*)&uarr;
+  init_uarr(a, N);
+  mod(a);
+  for (unsigned int i=0; i<N; i++)
+  {
+    unsigned int expected = rd_umod (0xf0000000 + i, 8);
+    if (expected != a[i])
+      abort ();
+  }
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1 "vect" } } 
*/
+
+
diff --git a/gcc/testsuite/gcc.dg/vect/pr104116-round-umod.c 
b/gcc/testsuite/gcc.dg/vect/pr104116-round-umod.c
new file mode 100644
index 00000000000..b5ddad1d472
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr104116-round-umod.c
@@ -0,0 +1,31 @@
+/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */
+/* { dg-require-effective-target vect_int} */
+/* { dg-require-effective-target vect_condition} */
+/* { dg-require-effective-target vect_shift} */
+
+
+
+#include "pr104116.h"
+#include "tree-vect.h"
+
+TEST_FN_UNSIGNED(__ROUND_MOD, 19u, mod)
+
+int main (void)
+{
+  check_vect ();
+  unsigned int * a = (unsigned int*)&uarr;
+  init_uarr(a, N);
+  mod(a);
+  for (unsigned int i=0; i<N; i++)
+  {
+    unsigned int expected = rd_umod (0xf0000000 + i, 19);
+    if (expected != a[i])
+      abort ();
+  }
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1 "vect" } } 
*/
+
+
diff --git a/gcc/testsuite/gcc.dg/vect/pr104116.h 
b/gcc/testsuite/gcc.dg/vect/pr104116.h
new file mode 100644
index 00000000000..03fbc5f764b
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr104116.h
@@ -0,0 +1,201 @@
+#define TEST_FN(OP, CONST, NAME) \
+__attribute__((noinline)) \
+void __GIMPLE (ssa,guessed_local(10737416)) \
+NAME (int * a) \
+{ \
+  int i; \
+  long unsigned int _1; \
+  long unsigned int _2; \
+  int * _3; \
+  int _4; \
+  int _5; \
+  unsigned int _12; \
+  unsigned int _13; \
+ \
+  __BB(2,guessed_local(10737416)): \
+  goto __BB3(precise(134217728)); \
+ \
+  __BB(3,loop_header(1),guessed_local(1063004408)): \
+  i_14 = __PHI (__BB5: i_11, __BB2: 0); \
+  _13 = __PHI (__BB5: _12, __BB2: 1024u); \
+  _1 = (long unsigned int) i_14; \
+  _2 = _1 * 4ul; \
+  _3 = a_9(D) + _2; \
+  _4 = __MEM <int> (_3); \
+  _5 = _4 OP CONST; \
+  __MEM <int> (_3) = _5; \
+  i_11 = i_14 + 1; \
+  _12 = _13 - 1u; \
+  if (_12 != 0u) \
+    goto __BB5(guessed(132861994)); \
+  else \
+    goto __BB4(guessed(1355734)); \
+ \
+  __BB(5,guessed_local(1052266995)): \
+  goto __BB3(precise(134217728)); \
+ \
+  __BB(4,guessed_local(10737416)): \
+  return; \
+ \
+} \
+
+#define TEST_FN_UNSIGNED(OP, CONST, NAME) \
+__attribute__((noinline)) \
+void __GIMPLE (ssa,guessed_local(10737416)) \
+NAME (unsigned int * a) \
+{ \
+  int i; \
+  long unsigned int _1; \
+  long unsigned int _2; \
+  unsigned int * _3; \
+  unsigned int _4; \
+  unsigned int _5; \
+  unsigned int _12; \
+  unsigned int _13; \
+ \
+  __BB(2,guessed_local(10737416)): \
+  goto __BB3(precise(134217728)); \
+ \
+  __BB(3,loop_header(1),guessed_local(1063004408)): \
+  i_14 = __PHI (__BB5: i_11, __BB2: 0); \
+  _13 = __PHI (__BB5: _12, __BB2: 1024u); \
+  _1 = (long unsigned int) i_14; \
+  _2 = _1 * 4ul; \
+  _3 = a_9(D) + _2; \
+  _4 = __MEM <unsigned int> (_3); \
+  _5 = _4 OP CONST; \
+  __MEM <unsigned int> (_3) = _5; \
+  i_11 = i_14 + 1; \
+  _12 = _13 - 1u; \
+  if (_12 != 0u) \
+    goto __BB5(guessed(132861994)); \
+  else \
+    goto __BB4(guessed(1355734)); \
+ \
+  __BB(5,guessed_local(1052266995)): \
+  goto __BB3(precise(134217728)); \
+ \
+  __BB(4,guessed_local(10737416)): \
+  return; \
+} \
+
+
+#define N 1024
+int arr[N];
+__attribute__((optimize("O0")))
+void init_arr (int *a, int n)
+{
+       for (int i=0; i<n; i++)
+               a[i] = i - n/2;
+}
+
+unsigned int uarr[N];
+__attribute__((optimize("O0")))
+void init_uarr (unsigned int *a, int n)
+{
+  for (unsigned int i=0; i<n; i++)
+    a[i] = 0xf0000000 + i;
+}
+
+int cl_div (int x, int y)
+{
+  int r = x % y;
+  int q = x / y;
+  if (r != 0 && (x ^ y) >= 0)
+    q++;
+  return q;
+}
+
+unsigned int cl_udiv (unsigned int x, unsigned int y)
+{
+  unsigned int r = x % y;
+  unsigned int q = x / y;
+  if (r > 0)
+      q++;
+  return q;
+}
+
+int cl_mod (int x, int y)
+{
+  int r = x % y;
+  if (r != 0 && (x ^ y) >= 0)
+    r -= y;
+  return r;
+}
+
+unsigned int cl_umod (unsigned int x, unsigned int y)
+{
+  unsigned int r = x % y;
+  unsigned int q = x / y;
+  if (r > 0)
+      r-=y;
+  return r;
+}
+
+int fl_div (int x, int y)
+{
+  int r = x % y;
+  int q = x / y;
+  if (r != 0 && (x ^ y) < 0)
+    q--;
+  return q;
+}
+
+
+int fl_mod (int x, int y)
+{
+  int r = x % y;
+  if (r != 0 && (x ^ y) < 0)
+    r += y;
+  return r;
+}
+
+int abs(int x)
+{
+  if (x < 0) return -x;
+  return x;
+}
+
+int rd_mod (int x, int y)
+{
+  int r = x % y;
+  if (abs(r) > abs((y-1) >> 1))
+  {
+    if ((x ^ y) < 0)
+      r += y;
+    else 
+      r -= y;
+  }
+  return r;
+}
+
+int rd_div (int x, int y)
+{
+  int r = x % y;
+  int q = x / y;
+  if (abs(r) > abs((y-1) >> 1))
+  {
+    if ((x ^ y) < 0)
+      q--;
+    else 
+      q++;
+  }
+  return q;
+}
+
+unsigned int rd_umod (unsigned int x, unsigned int y)
+{
+  unsigned int r = x % y;
+  if (r > ((y-1) >> 1))
+      r -= y;
+  return r;
+}
+
+unsigned int rd_udiv (unsigned int x, unsigned int y)
+{
+  unsigned int r = x % y;
+  unsigned int q = x / y;
+  if (r > ((y-1) >> 1))
+      q++;
+  return q;
+}
diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
index 782327235db..673db2bc444 100644
--- a/gcc/tree-vect-patterns.cc
+++ b/gcc/tree-vect-patterns.cc
@@ -4837,6 +4837,262 @@ vect_recog_sat_trunc_pattern (vec_info *vinfo, 
stmt_vec_info stmt_vinfo,
   return NULL;
 }
 
+/* Function add_code_for_floorceilround_divmod
+ *
+ * A helper function to add compensation code for implementing FLOOR_MOD_EXPR,
+ * FLOOR_DIV_EXPR, CEIL_MOD_EXPR, CEIL_DIV_EXPR, ROUND_MOD_EXPR and
+ * ROUND_DIV_EXPR
+ * The quotient and remainder are needed for implemented these operators.
+ * FLOOR cases
+ * r = x %[fl] y; r = x/[fl] y;
+ * is
+ * r = x % y; if (r && (x ^ y) < 0) r += y;
+ * r = x % y; d = x/y; if (r && (x ^ y) < 0) d--; Respectively
+ * Produce following sequence
+ * v0 = x^y
+ * v1 = -r
+ * v2 = r | -r
+ * v3 = v0 & v2
+ * v4 = v3 < 0
+ * if (floor_mod)
+ *   v5 = v4 ? y : 0
+ *   v6 = r + v5
+ * if (floor_div)
+ *   v5 = v4 ? 1 : 0
+ *   v6 = d - 1
+ * Similar sequences of vector instructions are produces for following cases
+ * CEIL cases
+ * r = x %[cl] y; r = x/[cl] y;
+ * is
+ * r = x % y; if (r && (x ^ y) >= 0) r -= y;
+ * r = x % y; if (r) r -= y; (unsigned)
+ * r = x % y; d = x/y; if (r && (x ^ y) >= 0) d++;
+ * r = x % y; d = x/y; if (r) d++; (unsigned)
+ * ROUND cases
+ * r = x %[rd] y; r = x/[rd] y;
+ * is
+ * r = x % y; if (r > ((y-1)/2)) if ((x ^ y) >= 0) r -= y; else r += y;
+ * r = x % y; if (r > ((y-1)/2)) r -= y; (unsigned)
+ * r = x % y; d = x/y; if (r > ((y-1)/2)) if ((x ^ y) >= 0) d++; else d--;
+ * r = x % y; d = x/y; if (r > ((y-1)/2)) d++; (unsigned)
+ * Inputs:
+ *   VECTYPE: Vector type of the operands
+ *   STMT_VINFO: Statement where pattern begins
+ *   RHS_CODE: Should either be FLOOR_MOD_EXPR or FLOOR_DIV_EXPR
+ *   Q: The quotient of division
+ *   R: Remainder of division
+ *   OPRDN0/OPRND1: Actual operands involved
+ * Output:
+ *   NULL if vectorization not possible
+ *   Gimple statement based on rhs_code
+ */
+static gimple *
+add_code_for_floorceilround_divmod (tree vectype, vec_info* vinfo,
+  stmt_vec_info stmt_vinfo, enum tree_code rhs_code,
+  tree q, tree r, tree oprnd0, tree oprnd1, tree itype)
+{
+  gimple *def_stmt;
+  tree mask_vectype = truth_type_for (vectype);
+  if (!mask_vectype)
+    return NULL;
+  tree bool_cond;
+  bool unsigned_p = TYPE_UNSIGNED (itype);
+
+  switch (rhs_code)
+  {
+    case FLOOR_MOD_EXPR:
+    case FLOOR_DIV_EXPR:
+    case CEIL_MOD_EXPR:
+    case CEIL_DIV_EXPR:
+    {
+      if (!target_has_vecop_for_code (NEGATE_EXPR, vectype)
+      || !target_has_vecop_for_code (BIT_XOR_EXPR, vectype)
+      || !target_has_vecop_for_code (BIT_IOR_EXPR, vectype)
+      || !target_has_vecop_for_code (PLUS_EXPR, vectype)
+      || !target_has_vecop_for_code (MINUS_EXPR, vectype)
+      || !expand_vec_cmp_expr_p (vectype, mask_vectype, LT_EXPR)
+      || !expand_vec_cond_expr_p (vectype, mask_vectype))
+       return NULL;
+      if (unsigned_p)
+      {
+       gcc_assert (rhs_code == CEIL_MOD_EXPR || rhs_code == CEIL_DIV_EXPR);
+
+       if (!expand_vec_cmp_expr_p (vectype, mask_vectype, GT_EXPR))
+         return NULL;
+       bool is_mod = rhs_code == CEIL_MOD_EXPR;
+       // r > 0
+       bool_cond = vect_recog_temp_ssa_var (boolean_type_node,NULL);
+       def_stmt = gimple_build_assign (bool_cond, GT_EXPR, r,
+         build_int_cst (itype, 0));
+       append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, mask_vectype,
+         itype);
+
+       // (r > 0) ? y : 0 (mod)
+       // (r > 0) ? 1 : 0 (ceil)
+       tree extr_cond = vect_recog_temp_ssa_var (itype, NULL);
+       def_stmt = gimple_build_assign (extr_cond, COND_EXPR, bool_cond,
+         is_mod ? oprnd1 : build_int_cst (itype, 1), build_int_cst (itype, 0));
+       append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
+
+       // r -= (r > 0) ? y : 0 (mod)
+       // d += (x^y < 0 && r) ? -1 : 0 (ceil)
+       tree result = vect_recog_temp_ssa_var (itype, NULL);
+       return gimple_build_assign (result,
+         is_mod ? MINUS_EXPR : PLUS_EXPR, is_mod ? r : q, extr_cond);
+      }
+      else
+      {
+       // x ^ y
+       tree xort = vect_recog_temp_ssa_var (itype, NULL);
+       def_stmt = gimple_build_assign (xort, BIT_XOR_EXPR, oprnd0, oprnd1);
+       append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
+
+       tree cond_reg = xort;
+       // ~(x ^ y) (ceil)
+       bool ceil_p = (rhs_code == CEIL_MOD_EXPR || rhs_code == CEIL_DIV_EXPR);
+       if (ceil_p)
+       {
+         if (!target_has_vecop_for_code (BIT_NOT_EXPR, vectype))
+           return NULL;
+         cond_reg = vect_recog_temp_ssa_var (itype, NULL);
+         def_stmt = gimple_build_assign (cond_reg, BIT_NOT_EXPR, xort);
+         append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
+       }
+
+       // -r
+       tree negate_r = vect_recog_temp_ssa_var (itype, NULL);
+       def_stmt = gimple_build_assign (negate_r, NEGATE_EXPR, r);
+       append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
+
+       // r | -r , sign bit is set if r!=0
+       tree r_or_negr = vect_recog_temp_ssa_var (itype, NULL);
+       def_stmt = gimple_build_assign (r_or_negr, BIT_IOR_EXPR, r, negate_r);
+       append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
+
+       // (x ^ y) & (r | -r)
+       // ~(x ^ y) & (r | -r) (ceil)
+       tree r_or_negr_and_xor = vect_recog_temp_ssa_var (itype, NULL);
+       def_stmt = gimple_build_assign (r_or_negr_and_xor, BIT_AND_EXPR,
+       r_or_negr, cond_reg);
+       append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
+
+       // (x ^ y) & (r | -r) < 0 which is equivalent to (x^y < 0 && r!=0)
+       bool_cond = vect_recog_temp_ssa_var (boolean_type_node,NULL);
+       def_stmt = gimple_build_assign (bool_cond, LT_EXPR, r_or_negr_and_xor,
+         build_int_cst (itype, 0));
+       append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, mask_vectype,
+         itype);
+
+       // (x^y < 0 && r) ? y : 0 (mod)
+       // (x^y < 0 && r) ? -1 : 0 (div)
+       bool is_mod = (rhs_code == FLOOR_MOD_EXPR || rhs_code == CEIL_MOD_EXPR);
+       tree extr_cond = vect_recog_temp_ssa_var (itype, NULL);
+       def_stmt = gimple_build_assign (extr_cond, COND_EXPR, bool_cond,
+         is_mod ? oprnd1 : build_int_cst (itype, -1),
+         build_int_cst (itype, 0));
+       append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
+
+       // r += (x ^ y < 0 && r) ? y : 0 (floor mod)
+       // d += (x^y < 0 && r) ? -1 : 0 (floor div)
+       // r -= (x ^ y < 0 && r) ? y : 0 (ceil mod)
+       // d -= (x^y < 0 && r) ? -1 : 0 (ceil div)
+       tree result = vect_recog_temp_ssa_var (itype, NULL);
+       return gimple_build_assign (result,
+         (rhs_code == FLOOR_MOD_EXPR || rhs_code == FLOOR_DIV_EXPR)
+         ? PLUS_EXPR : MINUS_EXPR, is_mod ? r : q, extr_cond);
+      }
+    }
+    case ROUND_MOD_EXPR:
+    case ROUND_DIV_EXPR:
+    {
+      if (!target_has_vecop_for_code (BIT_AND_EXPR, vectype)
+      || !target_has_vecop_for_code (PLUS_EXPR, vectype)
+      || !expand_vec_cmp_expr_p (vectype, mask_vectype, LT_EXPR)
+      || !expand_vec_cmp_expr_p (vectype, mask_vectype, GT_EXPR)
+      || !expand_vec_cond_expr_p (vectype, mask_vectype))
+       return NULL;
+
+      bool is_mod = rhs_code == ROUND_MOD_EXPR;
+      HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
+      unsigned HOST_WIDE_INT abs_d
+       = (d >= 0 ? (unsigned HOST_WIDE_INT) d : - (unsigned HOST_WIDE_INT) d);
+      unsigned HOST_WIDE_INT mid_d = (abs_d - 1) >> 1;
+      if (!unsigned_p)
+      {
+       // check availibility of abs expression for vector
+       if (!target_has_vecop_for_code (ABS_EXPR, vectype))
+         return NULL;
+       // abs (r)
+       tree abs_r = vect_recog_temp_ssa_var (itype, NULL);
+       def_stmt = gimple_build_assign (abs_r, ABS_EXPR, r);
+       append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
+
+       // abs (r) > (abs (y-1) >> 1)
+       tree round_p = vect_recog_temp_ssa_var (boolean_type_node, NULL);
+       def_stmt = gimple_build_assign (round_p, GT_EXPR, abs_r,
+         build_int_cst (itype, mid_d));
+       append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, mask_vectype,
+         itype);
+
+       // x ^ y
+       tree cond_reg = vect_recog_temp_ssa_var (itype, NULL);
+       def_stmt = gimple_build_assign (cond_reg, BIT_XOR_EXPR, oprnd0, oprnd1);
+       append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
+
+       // x ^ y < 0
+       bool_cond = vect_recog_temp_ssa_var (boolean_type_node,NULL);
+       def_stmt = gimple_build_assign (bool_cond, LT_EXPR, cond_reg,
+         build_int_cst (itype, 0));
+       append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, mask_vectype,
+         itype);
+
+       // x ^ y < 0 ? y : -y (mod)
+       // x ^ y < 0 ? -1 : 1 (div)
+       tree val1 = vect_recog_temp_ssa_var (itype, NULL);
+       def_stmt = gimple_build_assign (val1, COND_EXPR, bool_cond,
+         build_int_cst (itype,  is_mod ? d : -1),
+         build_int_cst (itype, is_mod ? -d : 1));
+       append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
+       int precision = TYPE_PRECISION (itype);
+       wide_int wmask = wi::mask (precision, false, precision);
+
+       // abs (r) > (abs (y-1) >> 1) ? 0xffffffff : 0
+       tree val2 = vect_recog_temp_ssa_var (itype, NULL);
+       def_stmt = gimple_build_assign (val2, COND_EXPR, round_p,
+         wide_int_to_tree (itype, wmask), build_int_cst (itype, 0));
+       append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
+
+       tree fval = vect_recog_temp_ssa_var (itype, NULL);
+       def_stmt = gimple_build_assign (fval, BIT_AND_EXPR, val1, val2);
+       append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
+
+       tree result = vect_recog_temp_ssa_var (itype, NULL);
+       return gimple_build_assign (result, PLUS_EXPR, is_mod ? r : q, fval);
+      }
+      else
+      {
+       // r > (y-1 >> 1)
+       tree round_p = vect_recog_temp_ssa_var (boolean_type_node, NULL);
+       def_stmt = gimple_build_assign (round_p, GT_EXPR, r,
+       build_int_cst (itype, mid_d));
+       append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, mask_vectype,
+         itype);
+
+       // (r > (y-1)>>1) ? -d : 1
+       tree val2 = vect_recog_temp_ssa_var (itype, NULL);
+       def_stmt = gimple_build_assign (val2, COND_EXPR, round_p,
+         build_int_cst (itype, is_mod ? -d : 1), build_int_cst (itype, 0));
+       append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
+
+       tree result = vect_recog_temp_ssa_var (itype, NULL);
+       return gimple_build_assign (result, PLUS_EXPR, is_mod ? r : q, val2);
+      }
+    }
+    default:
+      return NULL;
+  }
+}
+
 /* Detect a signed division by a constant that wouldn't be
    otherwise vectorized:
 
@@ -4881,7 +5137,8 @@ vect_recog_divmod_pattern (vec_info *vinfo,
 {
   gimple *last_stmt = stmt_vinfo->stmt;
   tree oprnd0, oprnd1, vectype, itype, cond;
-  gimple *pattern_stmt, *def_stmt;
+  gimple *pattern_stmt = NULL;
+  gimple *def_stmt = NULL;
   enum tree_code rhs_code;
   optab optab;
   tree q, cst;
@@ -4898,6 +5155,12 @@ vect_recog_divmod_pattern (vec_info *vinfo,
     case TRUNC_DIV_EXPR:
     case EXACT_DIV_EXPR:
     case TRUNC_MOD_EXPR:
+    case FLOOR_MOD_EXPR:
+    case FLOOR_DIV_EXPR:
+    case CEIL_MOD_EXPR:
+    case CEIL_DIV_EXPR:
+    case ROUND_MOD_EXPR:
+    case ROUND_DIV_EXPR:
       break;
     default:
       return NULL;
@@ -4931,7 +5194,9 @@ vect_recog_divmod_pattern (vec_info *vinfo,
   prec = TYPE_PRECISION (itype);
   if (integer_pow2p (oprnd1))
     {
-      if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
+      if ((TYPE_UNSIGNED (itype)
+       && (rhs_code == FLOOR_MOD_EXPR || rhs_code == FLOOR_DIV_EXPR))
+       || tree_int_cst_sgn (oprnd1) != 1)
        return NULL;
 
       /* Pattern detected.  */
@@ -4949,17 +5214,38 @@ vect_recog_divmod_pattern (vec_info *vinfo,
          gimple *div_stmt = gimple_build_call_internal (ifn, 2, oprnd0, shift);
          gimple_call_set_lhs (div_stmt, var_div);
 
-         if (rhs_code == TRUNC_MOD_EXPR)
-           {
+    if (rhs_code == TRUNC_MOD_EXPR
+       || rhs_code == FLOOR_MOD_EXPR
+       || rhs_code == FLOOR_DIV_EXPR
+       || rhs_code == CEIL_MOD_EXPR
+       || rhs_code == CEIL_DIV_EXPR
+       || rhs_code == ROUND_MOD_EXPR
+       || rhs_code == ROUND_DIV_EXPR)
+      {
              append_pattern_def_seq (vinfo, stmt_vinfo, div_stmt);
+             tree t1 = vect_recog_temp_ssa_var (itype, NULL);
              def_stmt
-               = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
-                                      LSHIFT_EXPR, var_div, shift);
+               = gimple_build_assign (t1, LSHIFT_EXPR, var_div, shift);
              append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
              pattern_stmt
                = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
-                                      MINUS_EXPR, oprnd0,
-                                      gimple_assign_lhs (def_stmt));
+             MINUS_EXPR, oprnd0, t1);
+       if (rhs_code == FLOOR_MOD_EXPR
+    || rhs_code == FLOOR_DIV_EXPR
+    || rhs_code == CEIL_MOD_EXPR
+    || rhs_code == CEIL_DIV_EXPR
+    || rhs_code == ROUND_MOD_EXPR
+    || rhs_code == ROUND_DIV_EXPR)
+       {
+         append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
+         pattern_stmt = add_code_for_floorceilround_divmod (vectype, vinfo,
+      stmt_vinfo,
+          rhs_code, var_div, t1, oprnd0, oprnd1,
+          itype);
+         if (pattern_stmt == NULL)
+           return NULL;
+       }
+
            }
          else
            pattern_stmt = div_stmt;
@@ -4973,8 +5259,12 @@ vect_recog_divmod_pattern (vec_info *vinfo,
                                      build_int_cst (itype, 0));
       append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt,
                              truth_type_for (vectype), itype);
+      tree div_result = NULL_TREE;
       if (rhs_code == TRUNC_DIV_EXPR
-         || rhs_code == EXACT_DIV_EXPR)
+       || rhs_code == EXACT_DIV_EXPR
+       || rhs_code == FLOOR_DIV_EXPR
+       || rhs_code == CEIL_DIV_EXPR
+       || rhs_code == ROUND_DIV_EXPR)
        {
          tree var = vect_recog_temp_ssa_var (itype, NULL);
          tree shift;
@@ -4991,12 +5281,24 @@ vect_recog_divmod_pattern (vec_info *vinfo,
          append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
 
          shift = build_int_cst (itype, tree_log2 (oprnd1));
+    div_result = vect_recog_temp_ssa_var (itype, NULL);
          pattern_stmt
-           = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
+           = gimple_build_assign (div_result,
                                   RSHIFT_EXPR, var, shift);
        }
-      else
+    if (rhs_code == TRUNC_MOD_EXPR
+    || rhs_code == FLOOR_MOD_EXPR
+    || rhs_code == FLOOR_DIV_EXPR
+    || rhs_code == CEIL_DIV_EXPR
+    || rhs_code == CEIL_MOD_EXPR
+    || rhs_code == ROUND_MOD_EXPR
+    || rhs_code == ROUND_DIV_EXPR)
        {
+    if (rhs_code == FLOOR_DIV_EXPR
+      || rhs_code == CEIL_DIV_EXPR
+      || rhs_code == ROUND_DIV_EXPR)
+      append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
+
          tree signmask;
          if (compare_tree_int (oprnd1, 2) == 0)
            {
@@ -5041,10 +5343,24 @@ vect_recog_divmod_pattern (vec_info *vinfo,
                                                build_int_cst (itype, 1)));
          append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
 
+    tree r = vect_recog_temp_ssa_var (itype, NULL);
          pattern_stmt
-           = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
-                                  MINUS_EXPR, gimple_assign_lhs (def_stmt),
+           = gimple_build_assign (r, MINUS_EXPR, gimple_assign_lhs (def_stmt),
                                   signmask);
+    if (rhs_code == FLOOR_MOD_EXPR
+      || rhs_code == FLOOR_DIV_EXPR
+      || rhs_code == CEIL_MOD_EXPR
+      || rhs_code == CEIL_DIV_EXPR
+      || rhs_code == ROUND_MOD_EXPR
+      || rhs_code == ROUND_DIV_EXPR)
+    {
+      append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
+      pattern_stmt = add_code_for_floorceilround_divmod (vectype, vinfo,
+      stmt_vinfo,
+      rhs_code, div_result, r, oprnd0, oprnd1, itype);
+      if (pattern_stmt == NULL)
+       return NULL;
+    }
        }
 
       return pattern_stmt;
@@ -5351,13 +5667,19 @@ vect_recog_divmod_pattern (vec_info *vinfo,
        }
     }
 
-  if (rhs_code == TRUNC_MOD_EXPR)
+  if (rhs_code == TRUNC_MOD_EXPR
+     || rhs_code == FLOOR_MOD_EXPR
+     || rhs_code == FLOOR_DIV_EXPR
+     || rhs_code == CEIL_MOD_EXPR
+     || rhs_code == CEIL_DIV_EXPR
+     || rhs_code == ROUND_MOD_EXPR
+     || rhs_code == ROUND_DIV_EXPR)
     {
       tree r, t1;
 
       /* We divided.  Now finish by:
-        t1 = q * oprnd1;
-        r = oprnd0 - t1;  */
+      t1 = q * oprnd1;
+      r = oprnd0 - t1;  */
       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
 
       t1 = vect_recog_temp_ssa_var (itype, NULL);
@@ -5366,6 +5688,20 @@ vect_recog_divmod_pattern (vec_info *vinfo,
 
       r = vect_recog_temp_ssa_var (itype, NULL);
       pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
+
+      if (rhs_code == FLOOR_MOD_EXPR
+      || rhs_code == FLOOR_DIV_EXPR
+      || rhs_code == CEIL_MOD_EXPR
+      || rhs_code == CEIL_DIV_EXPR
+      || rhs_code == ROUND_MOD_EXPR
+      || rhs_code == ROUND_DIV_EXPR)
+      {
+       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
+       pattern_stmt = add_code_for_floorceilround_divmod (vectype, vinfo,
+    stmt_vinfo, rhs_code, q, r, oprnd0, oprnd1, itype);
+       if (pattern_stmt == NULL)
+         return NULL;
+      }
     }
 
   /* Pattern detected.  */
-- 
2.51.0

Reply via email to