Hi,
PR48183 is a case where ARM NEON instrinsics, under -O -g, produce debug
insns that tries to expand OImode (32-byte integer) zero constants, much
too large to represent as two HOST_WIDE_INTs; as the internals manual
indicates, such large constants are not supported in general, and ICEs
on the GET_MODE_BITSIZE(mode) == 2*HOST_BITS_PER_WIDE_INT assertion.

This patch allows the cases where the large integer constant is still
representable using a single CONST_INT, such as zero(0). Bootstrapped
and tested on i686 and x86_64, cross-tested on ARM, all without
regressions. Okay for trunk?

Thanks,
Chung-Lin

2011-03-20  Chung-Lin Tang  <clt...@codesourcery.com>

        * emit-rtl.c (immed_double_const): Allow wider than
        2*HOST_BITS_PER_WIDE_INT mode constants when they are
        representable as a single const_int RTX.
Index: emit-rtl.c
===================================================================
--- emit-rtl.c  (revision 171074)
+++ emit-rtl.c  (working copy)
@@ -547,6 +547,13 @@
       if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
        return gen_int_mode (i0, mode);
 
+      /* For modes larger than 2 * HOST_BITS_PER_WIDE_INT, the integer may
+        still be representable if it fits in one word. For other cases,
+        assert fail below.  */
+      if (GET_MODE_BITSIZE (mode) > 2 * HOST_BITS_PER_WIDE_INT
+         && ((i1 == 0 && i0 >= 0) || (i1 == ~0 && i0 < 0)))
+       return GEN_INT (i0);
+
       gcc_assert (GET_MODE_BITSIZE (mode) == 2 * HOST_BITS_PER_WIDE_INT);
     }
 
Index: testsuite/gcc.target/arm/pr48183.c
===================================================================
--- testsuite/gcc.target/arm/pr48183.c  (revision 0)
+++ testsuite/gcc.target/arm/pr48183.c  (revision 0)
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-O -g" } */
+/* { dg-add-options arm_neon } */
+
+#include <arm_neon.h>
+
+void move_16bit_to_32bit (int32_t *dst, const short *src, unsigned n)
+{
+    unsigned i;
+    int16x4x2_t input;
+    int32x4x2_t mid;
+    int32x4x2_t output;
+
+    for (i = 0; i < n/2; i += 8) {
+        input = vld2_s16(src + i);
+        mid.val[0] = vmovl_s16(input.val[0]);
+        mid.val[1] = vmovl_s16(input.val[1]);
+        output.val[0] = vshlq_n_s32(mid.val[0], 8);
+        output.val[1] = vshlq_n_s32(mid.val[1], 8);
+        vst2q_s32((int32_t *)dst + i, output);
+    }
+}

Reply via email to