Hi,
PR48183 is a case where ARM NEON instrinsics, under -O -g, produce debug
insns that tries to expand OImode (32-byte integer) zero constants, much
too large to represent as two HOST_WIDE_INTs; as the internals manual
indicates, such large constants are not supported in general, and ICEs
on the GET_MODE_BITSIZE(mode) == 2*HOST_BITS_PER_WIDE_INT assertion.
This patch allows the cases where the large integer constant is still
representable using a single CONST_INT, such as zero(0). Bootstrapped
and tested on i686 and x86_64, cross-tested on ARM, all without
regressions. Okay for trunk?
Thanks,
Chung-Lin
2011-03-20 Chung-Lin Tang <[email protected]>
* emit-rtl.c (immed_double_const): Allow wider than
2*HOST_BITS_PER_WIDE_INT mode constants when they are
representable as a single const_int RTX.
Index: emit-rtl.c
===================================================================
--- emit-rtl.c (revision 171074)
+++ emit-rtl.c (working copy)
@@ -547,6 +547,13 @@
if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
return gen_int_mode (i0, mode);
+ /* For modes larger than 2 * HOST_BITS_PER_WIDE_INT, the integer may
+ still be representable if it fits in one word. For other cases,
+ assert fail below. */
+ if (GET_MODE_BITSIZE (mode) > 2 * HOST_BITS_PER_WIDE_INT
+ && ((i1 == 0 && i0 >= 0) || (i1 == ~0 && i0 < 0)))
+ return GEN_INT (i0);
+
gcc_assert (GET_MODE_BITSIZE (mode) == 2 * HOST_BITS_PER_WIDE_INT);
}
Index: testsuite/gcc.target/arm/pr48183.c
===================================================================
--- testsuite/gcc.target/arm/pr48183.c (revision 0)
+++ testsuite/gcc.target/arm/pr48183.c (revision 0)
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-O -g" } */
+/* { dg-add-options arm_neon } */
+
+#include <arm_neon.h>
+
+void move_16bit_to_32bit (int32_t *dst, const short *src, unsigned n)
+{
+ unsigned i;
+ int16x4x2_t input;
+ int32x4x2_t mid;
+ int32x4x2_t output;
+
+ for (i = 0; i < n/2; i += 8) {
+ input = vld2_s16(src + i);
+ mid.val[0] = vmovl_s16(input.val[0]);
+ mid.val[1] = vmovl_s16(input.val[1]);
+ output.val[0] = vshlq_n_s32(mid.val[0], 8);
+ output.val[1] = vshlq_n_s32(mid.val[1], 8);
+ vst2q_s32((int32_t *)dst + i, output);
+ }
+}