diff --git a/gcc/testsuite/gcc.target/aarch64/pr122749_1.c b/gcc/testsuite/gcc.target/aarch64/pr122749_1.c
new file mode 100644
index 0000000000000000000000000000000000000000..25311fce4e3a79b389cbb750231c1277ccaf0611
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr122749_1.c
@@ -0,0 +1,48 @@
+/* { dg-do run { target arm_v8_neon_hw } } */
+/* { dg-additional-options "-Ofast -std=gnu99 --param vect-epilogues-nomask=0 -fdump-tree-vect-details -fdump-tree-widening_mul" } */
+
+#include <limits.h>
+#include <stdint.h>
+
+typedef int8_t elem_t;
+
+__attribute__ ((noipa))
+elem_t
+foo2 (elem_t *buf, int len)
+{
+  elem_t x = 0;
+
+  for (int i = 0; i < len; i++)
+    x += (elem_t) i * buf[i];
+
+  return x;
+}
+
+static elem_t
+reference (elem_t *buf, int len)
+{
+  elem_t x = 0;
+
+#pragma GCC novector
+  for (int i = 0; i < len; i++)
+    x += (elem_t) i * buf[i];
+
+  return x;
+}
+
+int
+main (void)
+{
+  elem_t buf[] = { 1, -2, INT8_MAX, INT8_MIN, 5, -7, 3, -4 };
+  int len = sizeof (buf) / sizeof (buf[0]);
+  elem_t want = reference (buf, len);
+  elem_t got = foo2 (buf, len);
+
+  if (want != got)
+    __builtin_abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "\.FMA" 1 "widening_mul" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/pr122749_2.c b/gcc/testsuite/gcc.target/aarch64/pr122749_2.c
new file mode 100644
index 0000000000000000000000000000000000000000..f4a70a611176893e9fa55d8bc1826805ed5d966d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr122749_2.c
@@ -0,0 +1,48 @@
+/* { dg-do run { target arm_v8_neon_hw } } */
+/* { dg-additional-options "-Ofast -std=gnu99 --param vect-epilogues-nomask=0 -fdump-tree-vect-details -fdump-tree-widening_mul" } */
+
+#include <limits.h>
+#include <stdint.h>
+
+typedef int16_t elem_t;
+
+__attribute__ ((noipa))
+elem_t
+foo2 (elem_t *buf, int len)
+{
+  elem_t x = 0;
+
+  for (int i = 0; i < len; i++)
+    x += (elem_t) i * buf[i];
+
+  return x;
+}
+
+static elem_t
+reference (elem_t *buf, int len)
+{
+  elem_t x = 0;
+
+#pragma GCC novector
+  for (int i = 0; i < len; i++)
+    x += (elem_t) i * buf[i];
+
+  return x;
+}
+
+int
+main (void)
+{
+  elem_t buf[] = { 1, -2, INT16_MAX, INT16_MIN, 5, -7, 3, -4 };
+  int len = sizeof (buf) / sizeof (buf[0]);
+  elem_t want = reference (buf, len);
+  elem_t got = foo2 (buf, len);
+
+  if (want != got)
+    __builtin_abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "\.FMA" 1 "widening_mul" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/pr122749_3.c b/gcc/testsuite/gcc.target/aarch64/pr122749_3.c
new file mode 100644
index 0000000000000000000000000000000000000000..61bcd30be2b47f482e8b3f0a024b2a1d51c4fda7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr122749_3.c
@@ -0,0 +1,48 @@
+/* { dg-do run { target arm_v8_neon_hw } } */
+/* { dg-additional-options "-Ofast -std=gnu99 --param vect-epilogues-nomask=0 -fdump-tree-vect-details -fdump-tree-widening_mul" } */
+
+#include <limits.h>
+#include <stdint.h>
+
+typedef int32_t elem_t;
+
+__attribute__ ((noipa))
+elem_t
+foo2 (elem_t *buf, int len)
+{
+  elem_t x = 0;
+
+  for (int i = 0; i < len; i++)
+    x += (elem_t) i * buf[i];
+
+  return x;
+}
+
+static elem_t
+reference (elem_t *buf, int len)
+{
+  elem_t x = 0;
+
+#pragma GCC novector
+  for (int i = 0; i < len; i++)
+    x += (elem_t) i * buf[i];
+
+  return x;
+}
+
+int
+main (void)
+{
+  elem_t buf[] = { 1, -2, INT32_MAX, INT32_MIN, 5, -7, 3, -4 };
+  int len = sizeof (buf) / sizeof (buf[0]);
+  elem_t want = reference (buf, len);
+  elem_t got = foo2 (buf, len);
+
+  if (want != got)
+    __builtin_abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "\.FMA" 1 "widening_mul" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/pr122749_4.c b/gcc/testsuite/gcc.target/aarch64/pr122749_4.c
new file mode 100644
index 0000000000000000000000000000000000000000..6089716b0ca7498f9b8089f1b72d2968b1c2ee76
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr122749_4.c
@@ -0,0 +1,45 @@
+/* { dg-do run { target arm_v8_neon_hw } } */
+/* { dg-additional-options "-Ofast -std=gnu99 --param vect-epilogues-nomask=0 -fdump-tree-vect-details -fdump-tree-widening_mul" } */
+
+typedef float elem_t;
+
+__attribute__ ((noipa))
+elem_t
+foo2 (elem_t *buf, int len)
+{
+  elem_t x = 0;
+
+  for (int i = 0; i < len; i++)
+    x += (elem_t) i * buf[i];
+
+  return x;
+}
+
+static elem_t
+reference (elem_t *buf, int len)
+{
+  elem_t x = 0;
+
+#pragma GCC novector
+  for (int i = 0; i < len; i++)
+    x += (elem_t) i * buf[i];
+
+  return x;
+}
+
+int
+main (void)
+{
+  elem_t buf[] = { 1.0f, 2.0f, 1.0f, 2.0f, 1.0f, 2.0f };
+  int len = sizeof (buf) / sizeof (buf[0]);
+  elem_t want = reference (buf, len);
+  elem_t got = foo2 (buf, len);
+
+  if (want != got)
+    __builtin_abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "\\.FMA" 4 "widening_mul" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/pr122749_5.c b/gcc/testsuite/gcc.target/aarch64/pr122749_5.c
new file mode 100644
index 0000000000000000000000000000000000000000..562dc5be861762272ea8d23b8304e1abb439e20f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr122749_5.c
@@ -0,0 +1,45 @@
+/* { dg-do run { target arm_v8_neon_hw } } */
+/* { dg-additional-options "-Ofast -std=gnu99 --param vect-epilogues-nomask=0 -fdump-tree-vect-details -fdump-tree-widening_mul" } */
+
+typedef double elem_t;
+
+__attribute__ ((noipa))
+elem_t
+foo2 (elem_t *buf, int len)
+{
+  elem_t x = 0;
+
+  for (int i = 0; i < len; i++)
+    x += (elem_t) i * buf[i];
+
+  return x;
+}
+
+static elem_t
+reference (elem_t *buf, int len)
+{
+  elem_t x = 0;
+
+#pragma GCC novector
+  for (int i = 0; i < len; i++)
+    x += (elem_t) i * buf[i];
+
+  return x;
+}
+
+int
+main (void)
+{
+  elem_t buf[] = { 1.0, 2.0, 1.0, 2.0, 1.0, 2.0 };
+  int len = sizeof (buf) / sizeof (buf[0]);
+  elem_t want = reference (buf, len);
+  elem_t got = foo2 (buf, len);
+
+  if (want != got)
+    __builtin_abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "\\.FMA" 2 "widening_mul" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/pr122749_6.c b/gcc/testsuite/gcc.target/aarch64/pr122749_6.c
new file mode 100644
index 0000000000000000000000000000000000000000..3e51c5e22a18a9a3acd2416c3ba72496c9621adf
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr122749_6.c
@@ -0,0 +1,45 @@
+/* { dg-do run { target arm_v8_neon_hw } } */
+/* { dg-additional-options "-Ofast -std=gnu99 --param vect-epilogues-nomask=0 -fwrapv -fdump-tree-vect-details -fdump-tree-widening_mul" } */
+
+typedef float elem_t;
+
+__attribute__ ((noipa))
+elem_t
+foo2 (elem_t *buf, int len)
+{
+  elem_t x = 0;
+
+  for (int i = 0; i < len; i++)
+    x += (elem_t) i * buf[i];
+
+  return x;
+}
+
+static elem_t
+reference (elem_t *buf, int len)
+{
+  elem_t x = 0;
+
+#pragma GCC novector
+  for (int i = 0; i < len; i++)
+    x += (elem_t) i * buf[i];
+
+  return x;
+}
+
+int
+main (void)
+{
+  elem_t buf[] = { 1.0f, 2.0f, 1.0f, 2.0f, 1.0f, 2.0f };
+  int len = sizeof (buf) / sizeof (buf[0]);
+  elem_t want = reference (buf, len);
+  elem_t got = foo2 (buf, len);
+
+  if (want != got)
+    __builtin_abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "\\.FMA" 4 "widening_mul" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/pr122749_8.c b/gcc/testsuite/gcc.target/aarch64/pr122749_8.c
new file mode 100644
index 0000000000000000000000000000000000000000..6aa729c13d1616273d579077253d3fcdf55cc555
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr122749_8.c
@@ -0,0 +1,48 @@
+/* { dg-do run { target arm_v8_neon_hw } } */
+/* { dg-additional-options "-Ofast -std=gnu99 --param vect-epilogues-nomask=0 -fdump-tree-vect-details -fdump-tree-widening_mul" } */
+
+#include <limits.h>
+#include <stdint.h>
+
+typedef uint8_t elem_t;
+
+__attribute__ ((noipa))
+elem_t
+foo2 (elem_t *buf, int len)
+{
+  elem_t x = 0;
+
+  for (int i = 0; i < len; i++)
+    x += (elem_t) i * buf[i];
+
+  return x;
+}
+
+static elem_t
+reference (elem_t *buf, int len)
+{
+  elem_t x = 0;
+
+#pragma GCC novector
+  for (int i = 0; i < len; i++)
+    x += (elem_t) i * buf[i];
+
+  return x;
+}
+
+int
+main (void)
+{
+  elem_t buf[] = { 1, 2, UINT8_MAX, 7, 0, UINT8_MAX, 5, 9 };
+  int len = sizeof (buf) / sizeof (buf[0]);
+  elem_t want = reference (buf, len);
+  elem_t got = foo2 (buf, len);
+
+  if (want != got)
+    __builtin_abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "\.FMA" 1 "widening_mul" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/pr122749_9.c b/gcc/testsuite/gcc.target/aarch64/pr122749_9.c
new file mode 100644
index 0000000000000000000000000000000000000000..d987a9936afb2cb4ba19e62736fa4ed171669e25
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr122749_9.c
@@ -0,0 +1,48 @@
+/* { dg-do run { target arm_v8_neon_hw } } */
+/* { dg-additional-options "-Ofast -std=gnu99 --param vect-epilogues-nomask=0 -fdump-tree-vect-details -fdump-tree-widening_mul" } */
+
+#include <limits.h>
+#include <stdint.h>
+
+typedef uint16_t elem_t;
+
+__attribute__ ((noipa))
+elem_t
+foo2 (elem_t *buf, int len)
+{
+  elem_t x = 0;
+
+  for (int i = 0; i < len; i++)
+    x += (elem_t) i * buf[i];
+
+  return x;
+}
+
+static elem_t
+reference (elem_t *buf, int len)
+{
+  elem_t x = 0;
+
+#pragma GCC novector
+  for (int i = 0; i < len; i++)
+    x += (elem_t) i * buf[i];
+
+  return x;
+}
+
+int
+main (void)
+{
+  elem_t buf[] = { 1, 2, UINT16_MAX, 7, 0, UINT16_MAX, 5, 9 };
+  int len = sizeof (buf) / sizeof (buf[0]);
+  elem_t want = reference (buf, len);
+  elem_t got = foo2 (buf, len);
+
+  if (want != got)
+    __builtin_abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "\.FMA" 1 "widening_mul" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr122749_1.c b/gcc/testsuite/gcc.target/aarch64/sve/pr122749_1.c
new file mode 100644
index 0000000000000000000000000000000000000000..32a36461fbc7bb78048ae68c8dc0bdd81b11a2cd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pr122749_1.c
@@ -0,0 +1,48 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-additional-options "-Ofast -std=gnu99 -fdump-tree-vect-details -fdump-tree-widening_mul" } */
+
+#include <limits.h>
+#include <stdint.h>
+
+typedef int8_t elem_t;
+
+__attribute__ ((noipa))
+elem_t
+foo2 (elem_t *buf, int len)
+{
+  elem_t x = 0;
+
+  for (int i = 0; i < len; i++)
+    x += (elem_t) i * buf[i];
+
+  return x;
+}
+
+static elem_t
+reference (elem_t *buf, int len)
+{
+  elem_t x = 0;
+
+#pragma GCC novector
+  for (int i = 0; i < len; i++)
+    x += (elem_t) i * buf[i];
+
+  return x;
+}
+
+int
+main (void)
+{
+  elem_t buf[] = { 1, -2, INT8_MAX, INT8_MIN, 5, -7, 3, -4 };
+  int len = sizeof (buf) / sizeof (buf[0]);
+  elem_t want = reference (buf, len);
+  elem_t got = foo2 (buf, len);
+
+  if (want != got)
+    __builtin_abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "\\.COND_FMA" 1 "widening_mul" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr122749_11.c b/gcc/testsuite/gcc.target/aarch64/sve/pr122749_11.c
new file mode 100644
index 0000000000000000000000000000000000000000..bd160dd0ebf515a3ff3ddd1969303aabf8c03aea
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pr122749_11.c
@@ -0,0 +1,48 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-additional-options "-Ofast -std=gnu99 -fdump-tree-vect-details -fdump-tree-widening_mul" } */
+
+#include <limits.h>
+#include <stdint.h>
+
+typedef uint8_t elem_t;
+
+__attribute__ ((noipa))
+elem_t
+foo2 (elem_t *buf, int len)
+{
+  elem_t x = 0;
+
+  for (int i = 0; i < len; i++)
+    x += (elem_t) i * buf[i];
+
+  return x;
+}
+
+static elem_t
+reference (elem_t *buf, int len)
+{
+  elem_t x = 0;
+
+#pragma GCC novector
+  for (int i = 0; i < len; i++)
+    x += (elem_t) i * buf[i];
+
+  return x;
+}
+
+int
+main (void)
+{
+  elem_t buf[] = { 1, 2, UINT8_MAX, 7, 0, UINT8_MAX, 5, 9 };
+  int len = sizeof (buf) / sizeof (buf[0]);
+  elem_t want = reference (buf, len);
+  elem_t got = foo2 (buf, len);
+
+  if (want != got)
+    __builtin_abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "\\.COND_FMA" 1 "widening_mul" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr122749_12.c b/gcc/testsuite/gcc.target/aarch64/sve/pr122749_12.c
new file mode 100644
index 0000000000000000000000000000000000000000..8f0198ce42600b0fe92bf483123ad1cb71ff9f24
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pr122749_12.c
@@ -0,0 +1,48 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-additional-options "-Ofast -std=gnu99 -fdump-tree-vect-details -fdump-tree-widening_mul" } */
+
+#include <limits.h>
+#include <stdint.h>
+
+typedef uint16_t elem_t;
+
+__attribute__ ((noipa))
+elem_t
+foo2 (elem_t *buf, int len)
+{
+  elem_t x = 0;
+
+  for (int i = 0; i < len; i++)
+    x += (elem_t) i * buf[i];
+
+  return x;
+}
+
+static elem_t
+reference (elem_t *buf, int len)
+{
+  elem_t x = 0;
+
+#pragma GCC novector
+  for (int i = 0; i < len; i++)
+    x += (elem_t) i * buf[i];
+
+  return x;
+}
+
+int
+main (void)
+{
+  elem_t buf[] = { 1, 2, UINT16_MAX, 7, 0, UINT16_MAX, 5, 9 };
+  int len = sizeof (buf) / sizeof (buf[0]);
+  elem_t want = reference (buf, len);
+  elem_t got = foo2 (buf, len);
+
+  if (want != got)
+    __builtin_abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "\\.COND_FMA" 1 "widening_mul" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr122749_13.c b/gcc/testsuite/gcc.target/aarch64/sve/pr122749_13.c
new file mode 100644
index 0000000000000000000000000000000000000000..218afde13984fc64755d3c4567a05a33b5485411
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pr122749_13.c
@@ -0,0 +1,48 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-additional-options "-Ofast -std=gnu99 -fdump-tree-vect-details -fdump-tree-widening_mul" } */
+
+#include <limits.h>
+#include <stdint.h>
+
+typedef uint32_t elem_t;
+
+__attribute__ ((noipa))
+elem_t
+foo2 (elem_t *buf, int len)
+{
+  elem_t x = 0;
+
+  for (int i = 0; i < len; i++)
+    x += (elem_t) i * buf[i];
+
+  return x;
+}
+
+static elem_t
+reference (elem_t *buf, int len)
+{
+  elem_t x = 0;
+
+#pragma GCC novector
+  for (int i = 0; i < len; i++)
+    x += (elem_t) i * buf[i];
+
+  return x;
+}
+
+int
+main (void)
+{
+  elem_t buf[] = { 1, 2, UINT32_MAX, 7, 0, UINT32_MAX, 5, 9 };
+  int len = sizeof (buf) / sizeof (buf[0]);
+  elem_t want = reference (buf, len);
+  elem_t got = foo2 (buf, len);
+
+  if (want != got)
+    __builtin_abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "\\.COND_FMA" 1 "widening_mul" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr122749_14.c b/gcc/testsuite/gcc.target/aarch64/sve/pr122749_14.c
new file mode 100644
index 0000000000000000000000000000000000000000..1587628757e28f66dfd515e191ef04331c549434
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pr122749_14.c
@@ -0,0 +1,48 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-additional-options "-Ofast -std=gnu99 -fdump-tree-vect-details -fdump-tree-widening_mul" } */
+
+#include <limits.h>
+#include <stdint.h>
+
+typedef uint64_t elem_t;
+
+__attribute__ ((noipa))
+elem_t
+foo2 (elem_t *buf, int len)
+{
+  elem_t x = 0;
+
+  for (int i = 0; i < len; i++)
+    x += (elem_t) i * buf[i];
+
+  return x;
+}
+
+static elem_t
+reference (elem_t *buf, int len)
+{
+  elem_t x = 0;
+
+#pragma GCC novector
+  for (int i = 0; i < len; i++)
+    x += (elem_t) i * buf[i];
+
+  return x;
+}
+
+int
+main (void)
+{
+  elem_t buf[] = { 1, 2, UINT64_MAX, 7, 0, UINT64_MAX, 5, 9 };
+  int len = sizeof (buf) / sizeof (buf[0]);
+  elem_t want = reference (buf, len);
+  elem_t got = foo2 (buf, len);
+
+  if (want != got)
+    __builtin_abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "\\.COND_FMA" 1 "widening_mul" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr122749_2.c b/gcc/testsuite/gcc.target/aarch64/sve/pr122749_2.c
new file mode 100644
index 0000000000000000000000000000000000000000..0f5918a9023521b06ac20ef922b025dc6a1e8f01
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pr122749_2.c
@@ -0,0 +1,48 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-additional-options "-Ofast -std=gnu99 -fdump-tree-vect-details -fdump-tree-widening_mul" } */
+
+#include <limits.h>
+#include <stdint.h>
+
+typedef int16_t elem_t;
+
+__attribute__ ((noipa))
+elem_t
+foo2 (elem_t *buf, int len)
+{
+  elem_t x = 0;
+
+  for (int i = 0; i < len; i++)
+    x += (elem_t) i * buf[i];
+
+  return x;
+}
+
+static elem_t
+reference (elem_t *buf, int len)
+{
+  elem_t x = 0;
+
+#pragma GCC novector
+  for (int i = 0; i < len; i++)
+    x += (elem_t) i * buf[i];
+
+  return x;
+}
+
+int
+main (void)
+{
+  elem_t buf[] = { 1, -2, INT16_MAX, INT16_MIN, 5, -7, 3, -4 };
+  int len = sizeof (buf) / sizeof (buf[0]);
+  elem_t want = reference (buf, len);
+  elem_t got = foo2 (buf, len);
+
+  if (want != got)
+    __builtin_abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "\\.COND_FMA" 1 "widening_mul" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr122749_3.c b/gcc/testsuite/gcc.target/aarch64/sve/pr122749_3.c
new file mode 100644
index 0000000000000000000000000000000000000000..92548cb6ec4fdc4a3d133669fb914c5ab9a103ba
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pr122749_3.c
@@ -0,0 +1,48 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-additional-options "-Ofast -std=gnu99 -fdump-tree-vect-details -fdump-tree-widening_mul" } */
+
+#include <limits.h>
+#include <stdint.h>
+
+typedef int32_t elem_t;
+
+__attribute__ ((noipa))
+elem_t
+foo2 (elem_t *buf, int len)
+{
+  elem_t x = 0;
+
+  for (int i = 0; i < len; i++)
+    x += (elem_t) i * buf[i];
+
+  return x;
+}
+
+static elem_t
+reference (elem_t *buf, int len)
+{
+  elem_t x = 0;
+
+#pragma GCC novector
+  for (int i = 0; i < len; i++)
+    x += (elem_t) i * buf[i];
+
+  return x;
+}
+
+int
+main (void)
+{
+  elem_t buf[] = { 1, -2, INT32_MAX, INT32_MIN, 5, -7, 3, -4 };
+  int len = sizeof (buf) / sizeof (buf[0]);
+  elem_t want = reference (buf, len);
+  elem_t got = foo2 (buf, len);
+
+  if (want != got)
+    __builtin_abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "\\.COND_FMA" 1 "widening_mul" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr122749_4.c b/gcc/testsuite/gcc.target/aarch64/sve/pr122749_4.c
new file mode 100644
index 0000000000000000000000000000000000000000..6085a18bab7f2ae0e5855a982e186f831705bf52
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pr122749_4.c
@@ -0,0 +1,48 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-additional-options "-Ofast -std=gnu99 -fdump-tree-vect-details -fdump-tree-widening_mul" } */
+
+#include <limits.h>
+#include <stdint.h>
+
+typedef int64_t elem_t;
+
+__attribute__ ((noipa))
+elem_t
+foo2 (elem_t *buf, int len)
+{
+  elem_t x = 0;
+
+  for (int i = 0; i < len; i++)
+    x += (elem_t) i * buf[i];
+
+  return x;
+}
+
+static elem_t
+reference (elem_t *buf, int len)
+{
+  elem_t x = 0;
+
+#pragma GCC novector
+  for (int i = 0; i < len; i++)
+    x += (elem_t) i * buf[i];
+
+  return x;
+}
+
+int
+main (void)
+{
+  elem_t buf[] = { 1, -2, INT64_MAX, INT64_MIN, 5, -7, 3, -4 };
+  int len = sizeof (buf) / sizeof (buf[0]);
+  elem_t want = reference (buf, len);
+  elem_t got = foo2 (buf, len);
+
+  if (want != got)
+    __builtin_abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "\\.COND_FMA" 1 "widening_mul" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr122749_5.c b/gcc/testsuite/gcc.target/aarch64/sve/pr122749_5.c
new file mode 100644
index 0000000000000000000000000000000000000000..d61b91bb06dc0a035bd6adfabccc580eac7f78a6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pr122749_5.c
@@ -0,0 +1,45 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-additional-options "-Ofast -std=gnu99 -fdump-tree-vect-details -fdump-tree-widening_mul" } */
+
+typedef float elem_t;
+
+__attribute__ ((noipa))
+elem_t
+foo2 (elem_t *buf, int len)
+{
+  elem_t x = 0;
+
+  for (int i = 0; i < len; i++)
+    x += (elem_t) i * buf[i];
+
+  return x;
+}
+
+static elem_t
+reference (elem_t *buf, int len)
+{
+  elem_t x = 0;
+
+#pragma GCC novector
+  for (int i = 0; i < len; i++)
+    x += (elem_t) i * buf[i];
+
+  return x;
+}
+
+int
+main (void)
+{
+  elem_t buf[] = { 1.0f, 2.0f, 1.0f, 2.0f, 1.0f, 2.0f };
+  int len = sizeof (buf) / sizeof (buf[0]);
+  elem_t want = reference (buf, len);
+  elem_t got = foo2 (buf, len);
+
+  if (want != got)
+    __builtin_abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "\\.COND_FMA" 1 "widening_mul" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr122749_6.c b/gcc/testsuite/gcc.target/aarch64/sve/pr122749_6.c
new file mode 100644
index 0000000000000000000000000000000000000000..7598f7a28bcf1745ce672c0bab22fec0fda37a3f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pr122749_6.c
@@ -0,0 +1,45 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-additional-options "-Ofast -std=gnu99 -fdump-tree-vect-details -fdump-tree-widening_mul" } */
+
+typedef double elem_t;
+
+__attribute__ ((noipa))
+elem_t
+foo2 (elem_t *buf, int len)
+{
+  elem_t x = 0;
+
+  for (int i = 0; i < len; i++)
+    x += (elem_t) i * buf[i];
+
+  return x;
+}
+
+static elem_t
+reference (elem_t *buf, int len)
+{
+  elem_t x = 0;
+
+#pragma GCC novector
+  for (int i = 0; i < len; i++)
+    x += (elem_t) i * buf[i];
+
+  return x;
+}
+
+int
+main (void)
+{
+  elem_t buf[] = { 1.0, 2.0, 1.0, 2.0, 1.0, 2.0 };
+  int len = sizeof (buf) / sizeof (buf[0]);
+  elem_t want = reference (buf, len);
+  elem_t got = foo2 (buf, len);
+
+  if (want != got)
+    __builtin_abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "\\.COND_FMA" 1 "widening_mul" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr122749_8.c b/gcc/testsuite/gcc.target/aarch64/sve/pr122749_8.c
new file mode 100644
index 0000000000000000000000000000000000000000..e1c337d44ead96d868d71f0ae54960f2189e499e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pr122749_8.c
@@ -0,0 +1,45 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-additional-options "-Ofast -std=gnu99 -fwrapv -fdump-tree-vect-details -fdump-tree-widening_mul" } */
+
+typedef float elem_t;
+
+__attribute__ ((noipa))
+elem_t
+foo2 (elem_t *buf, int len)
+{
+  elem_t x = 0;
+
+  for (int i = 0; i < len; i++)
+    x += (elem_t) i * buf[i];
+
+  return x;
+}
+
+static elem_t
+reference (elem_t *buf, int len)
+{
+  elem_t x = 0;
+
+#pragma GCC novector
+  for (int i = 0; i < len; i++)
+    x += (elem_t) i * buf[i];
+
+  return x;
+}
+
+int
+main (void)
+{
+  elem_t buf[] = { 1.0f, 2.0f, 1.0f, 2.0f, 1.0f, 2.0f };
+  int len = sizeof (buf) / sizeof (buf[0]);
+  elem_t want = reference (buf, len);
+  elem_t got = foo2 (buf, len);
+
+  if (want != got)
+    __builtin_abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "\\.COND_FMA" 1 "widening_mul" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr122749_9.c b/gcc/testsuite/gcc.target/aarch64/sve/pr122749_9.c
new file mode 100644
index 0000000000000000000000000000000000000000..13d962e2130f986910f1a94489e4014761e917b5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pr122749_9.c
@@ -0,0 +1,45 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-additional-options "-Ofast -std=gnu99 -fwrapv -fdump-tree-vect-details -fdump-tree-widening_mul" } */
+
+typedef double elem_t;
+
+__attribute__ ((noipa))
+elem_t
+foo2 (elem_t *buf, int len)
+{
+  elem_t x = 0;
+
+  for (int i = 0; i < len; i++)
+    x += (elem_t) i * buf[i];
+
+  return x;
+}
+
+static elem_t
+reference (elem_t *buf, int len)
+{
+  elem_t x = 0;
+
+#pragma GCC novector
+  for (int i = 0; i < len; i++)
+    x += (elem_t) i * buf[i];
+
+  return x;
+}
+
+int
+main (void)
+{
+  elem_t buf[] = { 1.0, 2.0, 1.0, 2.0, 1.0, 2.0 };
+  int len = sizeof (buf) / sizeof (buf[0]);
+  elem_t want = reference (buf, len);
+  elem_t got = foo2 (buf, len);
+
+  if (want != got)
+    __builtin_abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "\\.COND_FMA" 1 "widening_mul" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" } } */
diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
index 4c3fb0f4fc5313199357d19ab809a7d8d88ed2d6..4b50a96ad3aa19857c5b8436ee8d6d3080d3c9ed 100644
--- a/gcc/tree-ssa-math-opts.cc
+++ b/gcc/tree-ssa-math-opts.cc
@@ -3120,6 +3120,26 @@ convert_mult_to_fma_1 (tree mul_result, tree op1, tree op2)
       if (is_gimple_debug (use_stmt))
 	continue;
 
+      /* If the use is a type convert, look further into it if the operations
+	 are the same under two's complement.  */
+      tree lhs_type;
+      if (gimple_assign_cast_p (use_stmt)
+	  && (lhs_type = TREE_TYPE (gimple_get_lhs (use_stmt)))
+	  && tree_nop_conversion_p (lhs_type, TREE_TYPE (op1)))
+	{
+	  tree cast_lhs = gimple_get_lhs (use_stmt);
+	  gimple *tmp_use;
+	  use_operand_p tmp_use_p;
+	  if (single_imm_use (cast_lhs, &tmp_use_p, &tmp_use))
+	    {
+	      release_defs (use_stmt);
+	      use_stmt = tmp_use;
+	      result = cast_lhs;
+	      gsi_remove (&gsi, true);
+	      gsi = gsi_for_stmt (use_stmt);
+	    }
+	}
+
       if (is_gimple_assign (use_stmt)
 	  && gimple_assign_rhs_code (use_stmt) == NEGATE_EXPR)
 	{
@@ -3159,6 +3179,13 @@ convert_mult_to_fma_1 (tree mul_result, tree op1, tree op2)
       if (seq)
 	gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
 
+      /* Ensure all the operands are of the same type.  Use the type of the
+	 addend as that's the statement being replaced.  */
+      op2 = gimple_convert (&gsi, true, GSI_SAME_STMT,
+			    UNKNOWN_LOCATION, TREE_TYPE (addop), op2);
+      mulop1 = gimple_convert (&gsi, true, GSI_SAME_STMT,
+			       UNKNOWN_LOCATION, TREE_TYPE (addop), mulop1);
+
       if (len)
 	fma_stmt
 	  = gimple_build_call_internal (IFN_COND_LEN_FMA, 7, cond, mulop1, op2,
@@ -3419,6 +3446,20 @@ convert_mult_to_fma (gimple *mul_stmt, tree op1, tree op2,
       if (is_gimple_debug (use_stmt))
 	continue;
 
+      /* If the use is a type convert, look further into it if the operations
+	 are the same under two's complement.  */
+      tree lhs_type;
+      if (gimple_assign_cast_p (use_stmt)
+	  && (lhs_type = TREE_TYPE (gimple_get_lhs (use_stmt)))
+	  && tree_nop_conversion_p (lhs_type, TREE_TYPE (op1)))
+	{
+	  tree cast_lhs = gimple_get_lhs (use_stmt);
+	  gimple *tmp_use;
+	  use_operand_p tmp_use_p;
+	  if (single_imm_use (cast_lhs, &tmp_use_p, &tmp_use))
+	    use_stmt = tmp_use;
+	}
+
       /* For now restrict this operations to single basic blocks.  In theory
 	 we would want to support sinking the multiplication in
 	 m = a*b;
