Hi ,
While testing my neon intrinsics work with some testcases that I was writing up, I ran into PR54051 . The one change which is probably a bit long standing is the fact that for register only addressing modes i.e. something like mem (reg:SI) we were printing out addresses with an immediate of #0. Historically the reason for this appears to be to deal with an assembler bug of yesteryears where the assembler couldn't sometimes properly distinguish between auto-inc addressing forms and the register indirect addressing form which I'm informed is fixed. This patch has gone through a full test run with qemu in a cross environment with no regressions for armv7-a / neon / arm/ thumb with a v5t multilib for c, c++ . I intend to backport this to 4.7 as this is a regression compared to 4.6, after letting it be on trunk for a few days to see if the auto-testers pick anything else up unless there is an objection from anyone. regards Ramana PR target/54051 * config/arm/arm.c (arm_print_operand_address): Remove superfluous printing of #0. * config/arm/neon.md ("neon_vld3_lane<mode>":VD): Remove alignment specifier. ("neon_vld3_lane<mode>":VMQ): Likewise. ("neon_vld3_dup<mode>":VDX): Likewise. ("neon_vst3_lane<mode>":VD): Likewise. ("neon_vst3_lane<mode>":VMQ): Likewise. PR target/54051 * gcc.target/arm/pr54051.c: New. * gcc.target/arm/vfp-1.c: Adjust test.
Index: gcc/testsuite/gcc.target/arm/pr54051.c =================================================================== --- gcc/testsuite/gcc.target/arm/pr54051.c (revision 0) +++ gcc/testsuite/gcc.target/arm/pr54051.c (revision 189808) @@ -0,0 +1,20 @@ +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_neon } */ +/* { dg-options "-O2" } */ +/* { dg-add-options arm_neon } */ + +#include <arm_neon.h> + +int32_t a __attribute__ ((aligned (64))); + +int32x2x3_t test (void) +{ + return vld3_dup_s32 (&a); +} + +int32x2x3_t test1 (void) +{ + int32x2x3_t res ; + return vld3_lane_s32 (&a, res, 1); +} + Index: gcc/testsuite/gcc.target/arm/vfp-1.c =================================================================== --- gcc/testsuite/gcc.target/arm/vfp-1.c (revision 189807) +++ gcc/testsuite/gcc.target/arm/vfp-1.c (revision 189808) @@ -129,7 +129,7 @@ /* { dg-final { scan-assembler "flds.+ \\\[r0, #1020\\\]" } } */ /* { dg-final { scan-assembler "flds.+ \\\[r\[0-9\], #-1020\\\]" { target { arm32 && { ! arm_thumb2_ok } } } } } */ /* { dg-final { scan-assembler "add.+ r0, #1024" } } */ - /* { dg-final { scan-assembler "fsts.+ \\\[r\[0-9\], #0\\\]\n" } } */ + /* { dg-final { scan-assembler "fsts.+ \\\[r\[0-9\]\\\]\n" } } */ f[256] = f[255] + f[-255]; /* { dg-final { scan-assembler "fldd.+ \\\[r1, #1016\\\]" } } */ Index: gcc/config/arm/arm.c =================================================================== --- gcc/config/arm/arm.c (revision 189807) +++ gcc/config/arm/arm.c (revision 189808) @@ -17420,7 +17420,7 @@ int is_minus = GET_CODE (x) == MINUS; if (GET_CODE (x) == REG) - asm_fprintf (stream, "[%r, #0]", REGNO (x)); + asm_fprintf (stream, "[%r]", REGNO (x)); else if (GET_CODE (x) == PLUS || is_minus) { rtx base = XEXP (x, 0); Index: gcc/config/arm/neon.md =================================================================== --- gcc/config/arm/neon.md (revision 189807) +++ gcc/config/arm/neon.md (revision 189808) @@ -4806,7 +4806,7 @@ ops[2] = gen_rtx_REG (DImode, regno + 4); ops[3] = operands[1]; ops[4] = operands[3]; - output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %A3", + output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3", ops); return ""; } @@ -4838,7 +4838,7 @@ ops[2] = gen_rtx_REG (DImode, regno + 8); ops[3] = operands[1]; ops[4] = GEN_INT (lane); - output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %A3", + output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3", ops); return ""; } @@ -4860,7 +4860,7 @@ ops[1] = gen_rtx_REG (DImode, regno + 2); ops[2] = gen_rtx_REG (DImode, regno + 4); ops[3] = operands[1]; - output_asm_insn ("vld3.<V_sz_elem>\t{%P0[], %P1[], %P2[]}, %A3", ops); + output_asm_insn ("vld3.<V_sz_elem>\t{%P0[], %P1[], %P2[]}, %3", ops); return ""; } else @@ -4978,7 +4978,7 @@ ops[2] = gen_rtx_REG (DImode, regno + 2); ops[3] = gen_rtx_REG (DImode, regno + 4); ops[4] = operands[2]; - output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %A0", + output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0", ops); return ""; } @@ -5010,7 +5010,7 @@ ops[2] = gen_rtx_REG (DImode, regno + 4); ops[3] = gen_rtx_REG (DImode, regno + 8); ops[4] = GEN_INT (lane); - output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %A0", + output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0", ops); return ""; }