The patterns for the Neon vld and vst intrinsics used the following sort
of construct to refer to memory:
(mem:FOO (match_operand:SI X "register_operand" "r"))
This patch changes them to use:
(match_operand:FOO' X "neon_struct_operand" "(=)Um")
instead. This allows the loads to use post-increment addresses as well
as bare registers, and also matches the form that the vec_load_lanes
and vec_store_lanes optabs need. (Those optabs will be in a later
autovectorisation merge.)
The patch is a backport of:
http://gcc.gnu.org/ml/gcc-patches/2011-03/msg01996.html
which has been applied to 4.7. There are three differences in the
4.5 version:
* Our 4.5 code prints alignments as "[rN, :ALIGN]" rather than
"[rN:ALIGN]". I've fixed that here. The initial commit to FSF trunk
used the correct form, so there isn't a separate fix that could be
backported.
* 4.5 doesn't have MEM_REF, so neon_dereference_pointer uses an
INDIRECT_REF instead.
* 4.5 defines the mode attributes in neon.md rather than in a
separate iterators.md.
Richard
gcc/
Backport from mainline:
2011-04-12 Richard Sandiford <[email protected]>
* config/arm/arm.c (arm_print_operand): Use MEM_SIZE to get the
size of a '%A' memory reference.
(T_DREG, T_QREG): New neon_builtin_type_bits.
(arm_init_neon_builtins): Assert that the load and store operands
are neon_struct_operands.
(locate_neon_builtin_icode): Provide the neon_builtin_type_bits.
(NEON_ARG_MEMORY): New builtin_arg.
(neon_dereference_pointer): New function.
(arm_expand_neon_args): Add a neon_builtin_type_bits argument.
Handle NEON_ARG_MEMORY.
(arm_expand_neon_builtin): Update after above interface changes.
Use NEON_ARG_MEMORY for loads and stores.
* config/arm/predicates.md (neon_struct_operand): New predicate.
* config/arm/neon.md (V_two_elem): Tweak formatting.
(V_three_elem): Use BLKmode for accesses that have no associated mode.
(neon_vld1<mode>, neon_vld1_dup<mode>)
(neon_vst1_lane<mode>, neon_vst1<mode>, neon_vld2<mode>)
(neon_vld2_lane<mode>, neon_vld2_dup<mode>, neon_vst2<mode>)
(neon_vst2_lane<mode>, neon_vld3<mode>, neon_vld3_lane<mode>)
(neon_vld3_dup<mode>, neon_vst3<mode>, neon_vst3_lane<mode>)
(neon_vld4<mode>, neon_vld4_lane<mode>, neon_vld4_dup<mode>)
(neon_vst4<mode>): Replace pointer operand with a memory operand.
Use %A in the output template.
(neon_vld3qa<mode>, neon_vld3qb<mode>, neon_vst3qa<mode>)
(neon_vst3qb<mode>, neon_vld4qa<mode>, neon_vld4qb<mode>)
(neon_vst4qa<mode>, neon_vst4qb<mode>): Likewise, but halve
the width of the memory access. Remove post-increment.
* config/arm/neon-testgen.ml: Allow addresses to have an alignment.
gcc/testsuite/
Backport from mainline:
2011-04-12 Richard Sandiford <[email protected]>
* gcc.target/arm/neon-vld3-1.c: New test.
* gcc.target/arm/neon-vst3-1.c: New test.
* gcc.target/arm/neon/v*.c: Regenerate.
Index: gcc/config/arm/arm.c
===================================================================
--- gcc/config/arm/arm.c 2011-04-20 08:29:44.000000000 +0000
+++ gcc/config/arm/arm.c 2011-04-20 09:32:44.000000000 +0000
@@ -16847,7 +16847,7 @@ arm_print_operand (FILE *stream, rtx x,
{
rtx addr;
bool postinc = FALSE;
- unsigned align, modesize, align_bits;
+ unsigned align, memsize, align_bits;
gcc_assert (GET_CODE (x) == MEM);
addr = XEXP (x, 0);
@@ -16862,12 +16862,12 @@ arm_print_operand (FILE *stream, rtx x,
instruction (for some alignments) as an aid to the memory subsystem
of the target. */
align = MEM_ALIGN (x) >> 3;
- modesize = GET_MODE_SIZE (GET_MODE (x));
+ memsize = INTVAL (MEM_SIZE (x));
/* Only certain alignment specifiers are supported by the hardware. */
- if (modesize == 16 && (align % 32) == 0)
+ if (memsize == 16 && (align % 32) == 0)
align_bits = 256;
- else if ((modesize == 8 || modesize == 16) && (align % 16) == 0)
+ else if ((memsize == 8 || memsize == 16) && (align % 16) == 0)
align_bits = 128;
else if ((align % 8) == 0)
align_bits = 64;
@@ -16875,7 +16875,7 @@ arm_print_operand (FILE *stream, rtx x,
align_bits = 0;
if (align_bits != 0)
- asm_fprintf (stream, ", :%d", align_bits);
+ asm_fprintf (stream, ":%d", align_bits);
asm_fprintf (stream, "]");
@@ -18398,12 +18398,14 @@ enum neon_builtin_type_bits {
T_V2SI = 0x0004,
T_V2SF = 0x0008,
T_DI = 0x0010,
+ T_DREG = 0x001F,
T_V16QI = 0x0020,
T_V8HI = 0x0040,
T_V4SI = 0x0080,
T_V4SF = 0x0100,
T_V2DI = 0x0200,
T_TI = 0x0400,
+ T_QREG = 0x07E0,
T_EI = 0x0800,
T_OI = 0x1000
};
@@ -19049,10 +19051,9 @@ arm_init_neon_builtins (void)
if (is_load && k == 1)
{
/* Neon load patterns always have the memory operand
- (a SImode pointer) in the operand 1 position. We
- want a const pointer to the element type in that
- position. */
- gcc_assert (insn_data[icode].operand[k].mode == SImode);
+ in the operand 1 position. */
+ gcc_assert (insn_data[icode].operand[k].predicate
+ == neon_struct_operand);
switch (1 << j)
{
@@ -19087,10 +19088,9 @@ arm_init_neon_builtins (void)
else if (is_store && k == 0)
{
/* Similarly, Neon store patterns use operand 0 as
- the memory location to store to (a SImode pointer).
- Use a pointer to the element type of the store in
- that position. */
- gcc_assert (insn_data[icode].operand[k].mode == SImode);
+ the memory location to store to. */
+ gcc_assert (insn_data[icode].operand[k].predicate
+ == neon_struct_operand);
switch (1 << j)
{
@@ -19410,10 +19410,11 @@ neon_builtin_compare (const void *a, con
}
static enum insn_code
-locate_neon_builtin_icode (int fcode, neon_itype *itype)
+locate_neon_builtin_icode (int fcode, neon_itype *itype,
+ enum neon_builtin_type_bits *type_bit)
{
neon_builtin_datum key, *found;
- int idx;
+ int idx, type, ntypes;
key.base_fcode = fcode;
found = (neon_builtin_datum *)
@@ -19426,20 +19427,83 @@ locate_neon_builtin_icode (int fcode, ne
if (itype)
*itype = found->itype;
+ if (type_bit)
+ {
+ ntypes = 0;
+ for (type = 0; type < T_MAX; type++)
+ if (found->bits & (1 << type))
+ {
+ if (ntypes == idx)
+ break;
+ ntypes++;
+ }
+ gcc_assert (type < T_MAX);
+ *type_bit = (enum neon_builtin_type_bits) (1 << type);
+ }
return found->codes[idx];
}
typedef enum {
NEON_ARG_COPY_TO_REG,
NEON_ARG_CONSTANT,
+ NEON_ARG_MEMORY,
NEON_ARG_STOP
} builtin_arg;
#define NEON_MAX_BUILTIN_ARGS 5
+/* EXP is a pointer argument to a Neon load or store intrinsic. Derive
+ and return an expression for the accessed memory.
+
+ The intrinsic function operates on a block of registers that has
+ mode REG_MODE. This block contains vectors of type TYPE_BIT.
+ The function references the memory at EXP in mode MEM_MODE;
+ this mode may be BLKmode if no more suitable mode is available. */
+
+static tree
+neon_dereference_pointer (tree exp, enum machine_mode mem_mode,
+ enum machine_mode reg_mode,
+ enum neon_builtin_type_bits type_bit)
+{
+ HOST_WIDE_INT reg_size, vector_size, nvectors, nelems;
+ tree elem_type, upper_bound, array_type;
+
+ /* Work out the size of the register block in bytes. */
+ reg_size = GET_MODE_SIZE (reg_mode);
+
+ /* Work out the size of each vector in bytes. */
+ gcc_assert (type_bit & (T_DREG | T_QREG));
+ vector_size = (type_bit & T_QREG ? 16 : 8);
+
+ /* Work out how many vectors there are. */
+ gcc_assert (reg_size % vector_size == 0);
+ nvectors = reg_size / vector_size;
+
+ /* Work out how many elements are being loaded or stored.
+ MEM_MODE == REG_MODE implies a one-to-one mapping between register
+ and memory elements; anything else implies a lane load or store. */
+ if (mem_mode == reg_mode)
+ nelems = vector_size * nvectors;
+ else
+ nelems = nvectors;
+
+ /* Work out the type of each element. */
+ gcc_assert (POINTER_TYPE_P (TREE_TYPE (exp)));
+ elem_type = TREE_TYPE (TREE_TYPE (exp));
+
+ /* Create a type that describes the full access. */
+ upper_bound = build_int_cst (size_type_node, nelems - 1);
+ array_type = build_array_type (elem_type, build_index_type (upper_bound));
+
+ /* Dereference EXP using that type. */
+ exp = convert (build_pointer_type (array_type), exp);
+ return fold_build1 (INDIRECT_REF, array_type, exp);
+}
+
/* Expand a Neon builtin. */
static rtx
arm_expand_neon_args (rtx target, int icode, int have_retval,
+ enum neon_builtin_type_bits type_bit,
tree exp, ...)
{
va_list ap;
@@ -19448,7 +19512,9 @@ arm_expand_neon_args (rtx target, int ic
rtx op[NEON_MAX_BUILTIN_ARGS];
enum machine_mode tmode = insn_data[icode].operand[0].mode;
enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
+ enum machine_mode other_mode;
int argc = 0;
+ int opno;
if (have_retval
&& (!target
@@ -19466,26 +19532,46 @@ arm_expand_neon_args (rtx target, int ic
break;
else
{
+ opno = argc + have_retval;
+ mode[argc] = insn_data[icode].operand[opno].mode;
arg[argc] = CALL_EXPR_ARG (exp, argc);
+ if (thisarg == NEON_ARG_MEMORY)
+ {
+ other_mode = insn_data[icode].operand[1 - opno].mode;
+ arg[argc] = neon_dereference_pointer (arg[argc], mode[argc],
+ other_mode, type_bit);
+ }
op[argc] = expand_normal (arg[argc]);
- mode[argc] = insn_data[icode].operand[argc + have_retval].mode;
switch (thisarg)
{
case NEON_ARG_COPY_TO_REG:
/*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
- if (!(*insn_data[icode].operand[argc + have_retval].predicate)
+ if (!(*insn_data[icode].operand[opno].predicate)
(op[argc], mode[argc]))
op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
break;
case NEON_ARG_CONSTANT:
/* FIXME: This error message is somewhat unhelpful. */
- if (!(*insn_data[icode].operand[argc + have_retval].predicate)
+ if (!(*insn_data[icode].operand[opno].predicate)
(op[argc], mode[argc]))
error ("argument must be a constant");
break;
+ case NEON_ARG_MEMORY:
+ gcc_assert (MEM_P (op[argc]));
+ PUT_MODE (op[argc], mode[argc]);
+ /* ??? arm_neon.h uses the same built-in functions for signed
+ and unsigned accesses, casting where necessary. This isn't
+ alias safe. */
+ set_mem_alias_set (op[argc], 0);
+ if (!(*insn_data[icode].operand[opno].predicate)
+ (op[argc], mode[argc]))
+ op[argc] = (replace_equiv_address
+ (op[argc], force_reg (Pmode, XEXP (op[argc], 0))));
+ break;
+
case NEON_ARG_STOP:
gcc_unreachable ();
}
@@ -19564,14 +19650,15 @@ arm_expand_neon_args (rtx target, int ic
arm_expand_neon_builtin (int fcode, tree exp, rtx target)
{
neon_itype itype;
- enum insn_code icode = locate_neon_builtin_icode (fcode, &itype);
+ enum neon_builtin_type_bits type_bit;
+ enum insn_code icode = locate_neon_builtin_icode (fcode, &itype, &type_bit);
switch (itype)
{
case NEON_UNOP:
case NEON_CONVERT:
case NEON_DUPLANE:
- return arm_expand_neon_args (target, icode, 1, exp,
+ return arm_expand_neon_args (target, icode, 1, type_bit, exp,
NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
case NEON_BINOP:
@@ -19581,90 +19668,90 @@ arm_expand_neon_builtin (int fcode, tree
case NEON_SCALARMULH:
case NEON_SHIFTINSERT:
case NEON_LOGICBINOP:
- return arm_expand_neon_args (target, icode, 1, exp,
+ return arm_expand_neon_args (target, icode, 1, type_bit, exp,
NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
NEON_ARG_STOP);
case NEON_TERNOP:
- return arm_expand_neon_args (target, icode, 1, exp,
+ return arm_expand_neon_args (target, icode, 1, type_bit, exp,
NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
NEON_ARG_CONSTANT, NEON_ARG_STOP);
case NEON_GETLANE:
case NEON_FIXCONV:
case NEON_SHIFTIMM:
- return arm_expand_neon_args (target, icode, 1, exp,
+ return arm_expand_neon_args (target, icode, 1, type_bit, exp,
NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
NEON_ARG_STOP);
case NEON_CREATE:
- return arm_expand_neon_args (target, icode, 1, exp,
+ return arm_expand_neon_args (target, icode, 1, type_bit, exp,
NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
case NEON_DUP:
case NEON_SPLIT:
case NEON_REINTERP:
- return arm_expand_neon_args (target, icode, 1, exp,
+ return arm_expand_neon_args (target, icode, 1, type_bit, exp,
NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
case NEON_COMBINE:
case NEON_VTBL:
- return arm_expand_neon_args (target, icode, 1, exp,
+ return arm_expand_neon_args (target, icode, 1, type_bit, exp,
NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
case NEON_RESULTPAIR:
- return arm_expand_neon_args (target, icode, 0, exp,
+ return arm_expand_neon_args (target, icode, 0, type_bit, exp,
NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
NEON_ARG_STOP);
case NEON_LANEMUL:
case NEON_LANEMULL:
case NEON_LANEMULH:
- return arm_expand_neon_args (target, icode, 1, exp,
+ return arm_expand_neon_args (target, icode, 1, type_bit, exp,
NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
NEON_ARG_CONSTANT, NEON_ARG_STOP);
case NEON_LANEMAC:
- return arm_expand_neon_args (target, icode, 1, exp,
+ return arm_expand_neon_args (target, icode, 1, type_bit, exp,
NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
case NEON_SHIFTACC:
- return arm_expand_neon_args (target, icode, 1, exp,
+ return arm_expand_neon_args (target, icode, 1, type_bit, exp,
NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
NEON_ARG_CONSTANT, NEON_ARG_STOP);
case NEON_SCALARMAC:
- return arm_expand_neon_args (target, icode, 1, exp,
+ return arm_expand_neon_args (target, icode, 1, type_bit, exp,
NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
NEON_ARG_CONSTANT, NEON_ARG_STOP);
case NEON_SELECT:
case NEON_VTBX:
- return arm_expand_neon_args (target, icode, 1, exp,
+ return arm_expand_neon_args (target, icode, 1, type_bit, exp,
NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
NEON_ARG_STOP);
case NEON_LOAD1:
case NEON_LOADSTRUCT:
- return arm_expand_neon_args (target, icode, 1, exp,
- NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
+ return arm_expand_neon_args (target, icode, 1, type_bit, exp,
+ NEON_ARG_MEMORY, NEON_ARG_STOP);
case NEON_LOAD1LANE:
case NEON_LOADSTRUCTLANE:
- return arm_expand_neon_args (target, icode, 1, exp,
- NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
+ return arm_expand_neon_args (target, icode, 1, type_bit, exp,
+ NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
NEON_ARG_STOP);
case NEON_STORE1:
case NEON_STORESTRUCT:
- return arm_expand_neon_args (target, icode, 0, exp,
- NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
+ return arm_expand_neon_args (target, icode, 0, type_bit, exp,
+ NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
case NEON_STORE1LANE:
case NEON_STORESTRUCTLANE:
- return arm_expand_neon_args (target, icode, 0, exp,
- NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
+ return arm_expand_neon_args (target, icode, 0, type_bit, exp,
+ NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
NEON_ARG_STOP);
}
Index: gcc/config/arm/predicates.md
===================================================================
--- gcc/config/arm/predicates.md 2011-04-20 08:29:44.000000000 +0000
+++ gcc/config/arm/predicates.md 2011-04-20 08:29:52.000000000 +0000
@@ -681,3 +681,7 @@ (define_special_predicate "vect_par_cons
}
return true;
})
+
+(define_special_predicate "neon_struct_operand"
+ (and (match_code "mem")
+ (match_test "TARGET_32BIT && neon_vector_mem_operand (op, 2)")))
Index: gcc/config/arm/neon.md
===================================================================
--- gcc/config/arm/neon.md 2011-04-20 08:29:44.000000000 +0000
+++ gcc/config/arm/neon.md 2011-04-20 08:29:52.000000000 +0000
@@ -259,20 +259,18 @@ (define_mode_attr V_ext [(V8QI "SI") (V1
;; Mode of pair of elements for each vector mode, to define transfer
;; size for structure lane/dup loads and stores.
-(define_mode_attr V_two_elem [(V8QI "HI") (V16QI "HI")
- (V4HI "SI") (V8HI "SI")
+(define_mode_attr V_two_elem [(V8QI "HI") (V16QI "HI")
+ (V4HI "SI") (V8HI "SI")
(V2SI "V2SI") (V4SI "V2SI")
(V2SF "V2SF") (V4SF "V2SF")
(DI "V2DI") (V2DI "V2DI")])
;; Similar, for three elements.
-;; ??? Should we define extra modes so that sizes of all three-element
-;; accesses can be accurately represented?
-(define_mode_attr V_three_elem [(V8QI "SI") (V16QI "SI")
- (V4HI "V4HI") (V8HI "V4HI")
- (V2SI "V4SI") (V4SI "V4SI")
- (V2SF "V4SF") (V4SF "V4SF")
- (DI "EI") (V2DI "EI")])
+(define_mode_attr V_three_elem [(V8QI "BLK") (V16QI "BLK")
+ (V4HI "BLK") (V8HI "BLK")
+ (V2SI "BLK") (V4SI "BLK")
+ (V2SF "BLK") (V4SF "BLK")
+ (DI "EI") (V2DI "EI")])
;; Similar, for four elements.
(define_mode_attr V_four_elem [(V8QI "SI") (V16QI "SI")
@@ -4567,16 +4565,16 @@ (define_expand "neon_vreinterpretv2di<mo
(define_insn "neon_vld1<mode>"
[(set (match_operand:VDQX 0 "s_register_operand" "=w")
- (unspec:VDQX [(mem:VDQX (match_operand:SI 1 "s_register_operand" "r"))]
+ (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand" "Um")]
UNSPEC_VLD1))]
"TARGET_NEON"
- "vld1.<V_sz_elem>\t%h0, [%1]"
+ "vld1.<V_sz_elem>\t%h0, %A1"
[(set_attr "neon_type" "neon_vld1_1_2_regs")]
)
(define_insn "neon_vld1_lane<mode>"
[(set (match_operand:VDX 0 "s_register_operand" "=w")
- (unspec:VDX [(mem:<V_elem> (match_operand:SI 1 "s_register_operand"
"r"))
+ (unspec:VDX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
(match_operand:VDX 2 "s_register_operand" "0")
(match_operand:SI 3 "immediate_operand" "i")]
UNSPEC_VLD1_LANE))]
@@ -4587,9 +4585,9 @@ (define_insn "neon_vld1_lane<mode>"
if (lane < 0 || lane >= max)
error ("lane out of range");
if (max == 1)
- return "vld1.<V_sz_elem>\t%P0, [%1]";
+ return "vld1.<V_sz_elem>\t%P0, %A1";
else
- return "vld1.<V_sz_elem>\t{%P0[%c3]}, [%1]";
+ return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
}
[(set (attr "neon_type")
(if_then_else (eq (const_string "<V_mode_nunits>") (const_int 2))
@@ -4599,7 +4597,7 @@ (define_insn "neon_vld1_lane<mode>"
(define_insn "neon_vld1_lane<mode>"
[(set (match_operand:VQX 0 "s_register_operand" "=w")
- (unspec:VQX [(mem:<V_elem> (match_operand:SI 1 "s_register_operand"
"r"))
+ (unspec:VQX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
(match_operand:VQX 2 "s_register_operand" "0")
(match_operand:SI 3 "immediate_operand" "i")]
UNSPEC_VLD1_LANE))]
@@ -4618,9 +4616,9 @@ (define_insn "neon_vld1_lane<mode>"
}
operands[0] = gen_rtx_REG (<V_HALF>mode, regno);
if (max == 2)
- return "vld1.<V_sz_elem>\t%P0, [%1]";
+ return "vld1.<V_sz_elem>\t%P0, %A1";
else
- return "vld1.<V_sz_elem>\t{%P0[%c3]}, [%1]";
+ return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
}
[(set (attr "neon_type")
(if_then_else (eq (const_string "<V_mode_nunits>") (const_int 2))
@@ -4630,14 +4628,14 @@ (define_insn "neon_vld1_lane<mode>"
(define_insn "neon_vld1_dup<mode>"
[(set (match_operand:VDX 0 "s_register_operand" "=w")
- (unspec:VDX [(mem:<V_elem> (match_operand:SI 1 "s_register_operand"
"r"))]
+ (unspec:VDX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")]
UNSPEC_VLD1_DUP))]
"TARGET_NEON"
{
if (GET_MODE_NUNITS (<MODE>mode) > 1)
- return "vld1.<V_sz_elem>\t{%P0[]}, [%1]";
+ return "vld1.<V_sz_elem>\t{%P0[]}, %A1";
else
- return "vld1.<V_sz_elem>\t%h0, [%1]";
+ return "vld1.<V_sz_elem>\t%h0, %A1";
}
[(set (attr "neon_type")
(if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
@@ -4647,14 +4645,14 @@ (define_insn "neon_vld1_dup<mode>"
(define_insn "neon_vld1_dup<mode>"
[(set (match_operand:VQX 0 "s_register_operand" "=w")
- (unspec:VQX [(mem:<V_elem> (match_operand:SI 1 "s_register_operand"
"r"))]
+ (unspec:VQX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")]
UNSPEC_VLD1_DUP))]
"TARGET_NEON"
{
if (GET_MODE_NUNITS (<MODE>mode) > 2)
- return "vld1.<V_sz_elem>\t{%e0[], %f0[]}, [%1]";
+ return "vld1.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
else
- return "vld1.<V_sz_elem>\t%h0, [%1]";
+ return "vld1.<V_sz_elem>\t%h0, %A1";
}
[(set (attr "neon_type")
(if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
@@ -4663,15 +4661,15 @@ (define_insn "neon_vld1_dup<mode>"
)
(define_insn "neon_vst1<mode>"
- [(set (mem:VDQX (match_operand:SI 0 "s_register_operand" "r"))
+ [(set (match_operand:VDQX 0 "neon_struct_operand" "=Um")
(unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")]
UNSPEC_VST1))]
"TARGET_NEON"
- "vst1.<V_sz_elem>\t%h1, [%0]"
+ "vst1.<V_sz_elem>\t%h1, %A0"
[(set_attr "neon_type" "neon_vst1_1_2_regs_vst2_2_regs")])
(define_insn "neon_vst1_lane<mode>"
- [(set (mem:<V_elem> (match_operand:SI 0 "s_register_operand" "r"))
+ [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
(vec_select:<V_elem>
(match_operand:VDX 1 "s_register_operand" "w")
(parallel [(match_operand:SI 2 "neon_lane_number" "i")])))]
@@ -4682,9 +4680,9 @@ (define_insn "neon_vst1_lane<mode>"
if (lane < 0 || lane >= max)
error ("lane out of range");
if (max == 1)
- return "vst1.<V_sz_elem>\t{%P1}, [%0]";
+ return "vst1.<V_sz_elem>\t{%P1}, %A0";
else
- return "vst1.<V_sz_elem>\t{%P1[%c2]}, [%0]";
+ return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
}
[(set (attr "neon_type")
(if_then_else (eq (const_string "<V_mode_nunits>") (const_int 1))
@@ -4692,7 +4690,7 @@ (define_insn "neon_vst1_lane<mode>"
(const_string "neon_vst1_vst2_lane")))])
(define_insn "neon_vst1_lane<mode>"
- [(set (mem:<V_elem> (match_operand:SI 0 "s_register_operand" "r"))
+ [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
(vec_select:<V_elem>
(match_operand:VQX 1 "s_register_operand" "w")
(parallel [(match_operand:SI 2 "neon_lane_number" "i")])))]
@@ -4711,24 +4709,24 @@ (define_insn "neon_vst1_lane<mode>"
}
operands[1] = gen_rtx_REG (<V_HALF>mode, regno);
if (max == 2)
- return "vst1.<V_sz_elem>\t{%P1}, [%0]";
+ return "vst1.<V_sz_elem>\t{%P1}, %A0";
else
- return "vst1.<V_sz_elem>\t{%P1[%c2]}, [%0]";
+ return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
}
[(set_attr "neon_type" "neon_vst1_vst2_lane")]
)
(define_insn "neon_vld2<mode>"
[(set (match_operand:TI 0 "s_register_operand" "=w")
- (unspec:TI [(mem:TI (match_operand:SI 1 "s_register_operand" "r"))
+ (unspec:TI [(match_operand:TI 1 "neon_struct_operand" "Um")
(unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
UNSPEC_VLD2))]
"TARGET_NEON"
{
if (<V_sz_elem> == 64)
- return "vld1.64\t%h0, [%1]";
+ return "vld1.64\t%h0, %A1";
else
- return "vld2.<V_sz_elem>\t%h0, [%1]";
+ return "vld2.<V_sz_elem>\t%h0, %A1";
}
[(set (attr "neon_type")
(if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
@@ -4738,16 +4736,16 @@ (define_insn "neon_vld2<mode>"
(define_insn "neon_vld2<mode>"
[(set (match_operand:OI 0 "s_register_operand" "=w")
- (unspec:OI [(mem:OI (match_operand:SI 1 "s_register_operand" "r"))
+ (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
(unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
UNSPEC_VLD2))]
"TARGET_NEON"
- "vld2.<V_sz_elem>\t%h0, [%1]"
+ "vld2.<V_sz_elem>\t%h0, %A1"
[(set_attr "neon_type" "neon_vld2_2_regs_vld1_vld2_all_lanes")])
(define_insn "neon_vld2_lane<mode>"
[(set (match_operand:TI 0 "s_register_operand" "=w")
- (unspec:TI [(mem:<V_two_elem> (match_operand:SI 1 "s_register_operand"
"r"))
+ (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
(match_operand:TI 2 "s_register_operand" "0")
(match_operand:SI 3 "immediate_operand" "i")
(unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
@@ -4764,7 +4762,7 @@ (define_insn "neon_vld2_lane<mode>"
ops[1] = gen_rtx_REG (DImode, regno + 2);
ops[2] = operands[1];
ops[3] = operands[3];
- output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, [%2]", ops);
+ output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
return "";
}
[(set_attr "neon_type" "neon_vld1_vld2_lane")]
@@ -4772,7 +4770,7 @@ (define_insn "neon_vld2_lane<mode>"
(define_insn "neon_vld2_lane<mode>"
[(set (match_operand:OI 0 "s_register_operand" "=w")
- (unspec:OI [(mem:<V_two_elem> (match_operand:SI 1 "s_register_operand"
"r"))
+ (unspec:OI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
(match_operand:OI 2 "s_register_operand" "0")
(match_operand:SI 3 "immediate_operand" "i")
(unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
@@ -4794,7 +4792,7 @@ (define_insn "neon_vld2_lane<mode>"
ops[1] = gen_rtx_REG (DImode, regno + 4);
ops[2] = operands[1];
ops[3] = GEN_INT (lane);
- output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, [%2]", ops);
+ output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
return "";
}
[(set_attr "neon_type" "neon_vld1_vld2_lane")]
@@ -4802,15 +4800,15 @@ (define_insn "neon_vld2_lane<mode>"
(define_insn "neon_vld2_dup<mode>"
[(set (match_operand:TI 0 "s_register_operand" "=w")
- (unspec:TI [(mem:<V_two_elem> (match_operand:SI 1 "s_register_operand"
"r"))
+ (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
(unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
UNSPEC_VLD2_DUP))]
"TARGET_NEON"
{
if (GET_MODE_NUNITS (<MODE>mode) > 1)
- return "vld2.<V_sz_elem>\t{%e0[], %f0[]}, [%1]";
+ return "vld2.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
else
- return "vld1.<V_sz_elem>\t%h0, [%1]";
+ return "vld1.<V_sz_elem>\t%h0, %A1";
}
[(set (attr "neon_type")
(if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
@@ -4819,16 +4817,16 @@ (define_insn "neon_vld2_dup<mode>"
)
(define_insn "neon_vst2<mode>"
- [(set (mem:TI (match_operand:SI 0 "s_register_operand" "r"))
+ [(set (match_operand:TI 0 "neon_struct_operand" "=Um")
(unspec:TI [(match_operand:TI 1 "s_register_operand" "w")
(unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
UNSPEC_VST2))]
"TARGET_NEON"
{
if (<V_sz_elem> == 64)
- return "vst1.64\t%h1, [%0]";
+ return "vst1.64\t%h1, %A0";
else
- return "vst2.<V_sz_elem>\t%h1, [%0]";
+ return "vst2.<V_sz_elem>\t%h1, %A0";
}
[(set (attr "neon_type")
(if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
@@ -4837,17 +4835,17 @@ (define_insn "neon_vst2<mode>"
)
(define_insn "neon_vst2<mode>"
- [(set (mem:OI (match_operand:SI 0 "s_register_operand" "r"))
+ [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
(unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
(unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
UNSPEC_VST2))]
"TARGET_NEON"
- "vst2.<V_sz_elem>\t%h1, [%0]"
+ "vst2.<V_sz_elem>\t%h1, %A0"
[(set_attr "neon_type" "neon_vst1_1_2_regs_vst2_2_regs")]
)
(define_insn "neon_vst2_lane<mode>"
- [(set (mem:<V_two_elem> (match_operand:SI 0 "s_register_operand" "r"))
+ [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
(unspec:<V_two_elem>
[(match_operand:TI 1 "s_register_operand" "w")
(match_operand:SI 2 "immediate_operand" "i")
@@ -4865,14 +4863,14 @@ (define_insn "neon_vst2_lane<mode>"
ops[1] = gen_rtx_REG (DImode, regno);
ops[2] = gen_rtx_REG (DImode, regno + 2);
ops[3] = operands[2];
- output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, [%0]", ops);
+ output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
return "";
}
[(set_attr "neon_type" "neon_vst1_vst2_lane")]
)
(define_insn "neon_vst2_lane<mode>"
- [(set (mem:<V_two_elem> (match_operand:SI 0 "s_register_operand" "r"))
+ [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
(unspec:<V_two_elem>
[(match_operand:OI 1 "s_register_operand" "w")
(match_operand:SI 2 "immediate_operand" "i")
@@ -4895,7 +4893,7 @@ (define_insn "neon_vst2_lane<mode>"
ops[1] = gen_rtx_REG (DImode, regno);
ops[2] = gen_rtx_REG (DImode, regno + 4);
ops[3] = GEN_INT (lane);
- output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, [%0]", ops);
+ output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
return "";
}
[(set_attr "neon_type" "neon_vst1_vst2_lane")]
@@ -4903,15 +4901,15 @@ (define_insn "neon_vst2_lane<mode>"
(define_insn "neon_vld3<mode>"
[(set (match_operand:EI 0 "s_register_operand" "=w")
- (unspec:EI [(mem:EI (match_operand:SI 1 "s_register_operand" "r"))
+ (unspec:EI [(match_operand:EI 1 "neon_struct_operand" "Um")
(unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
UNSPEC_VLD3))]
"TARGET_NEON"
{
if (<V_sz_elem> == 64)
- return "vld1.64\t%h0, [%1]";
+ return "vld1.64\t%h0, %A1";
else
- return "vld3.<V_sz_elem>\t%h0, [%1]";
+ return "vld3.<V_sz_elem>\t%h0, %A1";
}
[(set (attr "neon_type")
(if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
@@ -4920,25 +4918,25 @@ (define_insn "neon_vld3<mode>"
)
(define_expand "neon_vld3<mode>"
- [(match_operand:CI 0 "s_register_operand" "=w")
- (match_operand:SI 1 "s_register_operand" "+r")
+ [(match_operand:CI 0 "s_register_operand")
+ (match_operand:CI 1 "neon_struct_operand")
(unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
"TARGET_NEON"
{
- emit_insn (gen_neon_vld3qa<mode> (operands[0], operands[1], operands[1]));
- emit_insn (gen_neon_vld3qb<mode> (operands[0], operands[0],
- operands[1], operands[1]));
+ rtx mem;
+
+ mem = adjust_address (operands[1], EImode, 0);
+ emit_insn (gen_neon_vld3qa<mode> (operands[0], mem));
+ mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
+ emit_insn (gen_neon_vld3qb<mode> (operands[0], mem, operands[0]));
DONE;
})
(define_insn "neon_vld3qa<mode>"
[(set (match_operand:CI 0 "s_register_operand" "=w")
- (unspec:CI [(mem:CI (match_operand:SI 2 "s_register_operand" "1"))
+ (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
(unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
- UNSPEC_VLD3A))
- (set (match_operand:SI 1 "s_register_operand" "=r")
- (plus:SI (match_dup 2)
- (const_int 24)))]
+ UNSPEC_VLD3A))]
"TARGET_NEON"
{
int regno = REGNO (operands[0]);
@@ -4947,7 +4945,7 @@ (define_insn "neon_vld3qa<mode>"
ops[1] = gen_rtx_REG (DImode, regno + 4);
ops[2] = gen_rtx_REG (DImode, regno + 8);
ops[3] = operands[1];
- output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, [%3]!", ops);
+ output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
return "";
}
[(set_attr "neon_type" "neon_vld3_vld4")]
@@ -4955,13 +4953,10 @@ (define_insn "neon_vld3qa<mode>"
(define_insn "neon_vld3qb<mode>"
[(set (match_operand:CI 0 "s_register_operand" "=w")
- (unspec:CI [(mem:CI (match_operand:SI 3 "s_register_operand" "2"))
- (match_operand:CI 1 "s_register_operand" "0")
+ (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
+ (match_operand:CI 2 "s_register_operand" "0")
(unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
- UNSPEC_VLD3B))
- (set (match_operand:SI 2 "s_register_operand" "=r")
- (plus:SI (match_dup 3)
- (const_int 24)))]
+ UNSPEC_VLD3B))]
"TARGET_NEON"
{
int regno = REGNO (operands[0]);
@@ -4969,8 +4964,8 @@ (define_insn "neon_vld3qb<mode>"
ops[0] = gen_rtx_REG (DImode, regno + 2);
ops[1] = gen_rtx_REG (DImode, regno + 6);
ops[2] = gen_rtx_REG (DImode, regno + 10);
- ops[3] = operands[2];
- output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, [%3]!", ops);
+ ops[3] = operands[1];
+ output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
return "";
}
[(set_attr "neon_type" "neon_vld3_vld4")]
@@ -4978,7 +4973,7 @@ (define_insn "neon_vld3qb<mode>"
(define_insn "neon_vld3_lane<mode>"
[(set (match_operand:EI 0 "s_register_operand" "=w")
- (unspec:EI [(mem:<V_three_elem> (match_operand:SI 1
"s_register_operand" "r"))
+ (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
(match_operand:EI 2 "s_register_operand" "0")
(match_operand:SI 3 "immediate_operand" "i")
(unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
@@ -4996,7 +4991,7 @@ (define_insn "neon_vld3_lane<mode>"
ops[2] = gen_rtx_REG (DImode, regno + 4);
ops[3] = operands[1];
ops[4] = operands[3];
- output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, [%3]",
+ output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %A3",
ops);
return "";
}
@@ -5005,7 +5000,7 @@ (define_insn "neon_vld3_lane<mode>"
(define_insn "neon_vld3_lane<mode>"
[(set (match_operand:CI 0 "s_register_operand" "=w")
- (unspec:CI [(mem:<V_three_elem> (match_operand:SI 1
"s_register_operand" "r"))
+ (unspec:CI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
(match_operand:CI 2 "s_register_operand" "0")
(match_operand:SI 3 "immediate_operand" "i")
(unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
@@ -5028,7 +5023,7 @@ (define_insn "neon_vld3_lane<mode>"
ops[2] = gen_rtx_REG (DImode, regno + 8);
ops[3] = operands[1];
ops[4] = GEN_INT (lane);
- output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, [%3]",
+ output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %A3",
ops);
return "";
}
@@ -5037,7 +5032,7 @@ (define_insn "neon_vld3_lane<mode>"
(define_insn "neon_vld3_dup<mode>"
[(set (match_operand:EI 0 "s_register_operand" "=w")
- (unspec:EI [(mem:<V_three_elem> (match_operand:SI 1
"s_register_operand" "r"))
+ (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
(unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
UNSPEC_VLD3_DUP))]
"TARGET_NEON"
@@ -5050,11 +5045,11 @@ (define_insn "neon_vld3_dup<mode>"
ops[1] = gen_rtx_REG (DImode, regno + 2);
ops[2] = gen_rtx_REG (DImode, regno + 4);
ops[3] = operands[1];
- output_asm_insn ("vld3.<V_sz_elem>\t{%P0[], %P1[], %P2[]}, [%3]", ops);
+ output_asm_insn ("vld3.<V_sz_elem>\t{%P0[], %P1[], %P2[]}, %A3", ops);
return "";
}
else
- return "vld1.<V_sz_elem>\t%h0, [%1]";
+ return "vld1.<V_sz_elem>\t%h0, %A1";
}
[(set (attr "neon_type")
(if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
@@ -5062,16 +5057,16 @@ (define_insn "neon_vld3_dup<mode>"
(const_string "neon_vld1_1_2_regs")))])
(define_insn "neon_vst3<mode>"
- [(set (mem:EI (match_operand:SI 0 "s_register_operand" "r"))
+ [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
(unspec:EI [(match_operand:EI 1 "s_register_operand" "w")
(unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
UNSPEC_VST3))]
"TARGET_NEON"
{
if (<V_sz_elem> == 64)
- return "vst1.64\t%h1, [%0]";
+ return "vst1.64\t%h1, %A0";
else
- return "vst3.<V_sz_elem>\t%h1, [%0]";
+ return "vst3.<V_sz_elem>\t%h1, %A0";
}
[(set (attr "neon_type")
(if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
@@ -5079,62 +5074,60 @@ (define_insn "neon_vst3<mode>"
(const_string "neon_vst2_4_regs_vst3_vst4")))])
(define_expand "neon_vst3<mode>"
- [(match_operand:SI 0 "s_register_operand" "+r")
- (match_operand:CI 1 "s_register_operand" "w")
+ [(match_operand:CI 0 "neon_struct_operand")
+ (match_operand:CI 1 "s_register_operand")
(unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
"TARGET_NEON"
{
- emit_insn (gen_neon_vst3qa<mode> (operands[0], operands[0], operands[1]));
- emit_insn (gen_neon_vst3qb<mode> (operands[0], operands[0], operands[1]));
+ rtx mem;
+
+ mem = adjust_address (operands[0], EImode, 0);
+ emit_insn (gen_neon_vst3qa<mode> (mem, operands[1]));
+ mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
+ emit_insn (gen_neon_vst3qb<mode> (mem, operands[1]));
DONE;
})
(define_insn "neon_vst3qa<mode>"
- [(set (mem:EI (match_operand:SI 1 "s_register_operand" "0"))
- (unspec:EI [(match_operand:CI 2 "s_register_operand" "w")
+ [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
+ (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
(unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
- UNSPEC_VST3A))
- (set (match_operand:SI 0 "s_register_operand" "=r")
- (plus:SI (match_dup 1)
- (const_int 24)))]
+ UNSPEC_VST3A))]
"TARGET_NEON"
{
- int regno = REGNO (operands[2]);
+ int regno = REGNO (operands[1]);
rtx ops[4];
ops[0] = operands[0];
ops[1] = gen_rtx_REG (DImode, regno);
ops[2] = gen_rtx_REG (DImode, regno + 4);
ops[3] = gen_rtx_REG (DImode, regno + 8);
- output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, [%0]!", ops);
+ output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
return "";
}
[(set_attr "neon_type" "neon_vst2_4_regs_vst3_vst4")]
)
(define_insn "neon_vst3qb<mode>"
- [(set (mem:EI (match_operand:SI 1 "s_register_operand" "0"))
- (unspec:EI [(match_operand:CI 2 "s_register_operand" "w")
+ [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
+ (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
(unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
- UNSPEC_VST3B))
- (set (match_operand:SI 0 "s_register_operand" "=r")
- (plus:SI (match_dup 1)
- (const_int 24)))]
+ UNSPEC_VST3B))]
"TARGET_NEON"
{
- int regno = REGNO (operands[2]);
+ int regno = REGNO (operands[1]);
rtx ops[4];
ops[0] = operands[0];
ops[1] = gen_rtx_REG (DImode, regno + 2);
ops[2] = gen_rtx_REG (DImode, regno + 6);
ops[3] = gen_rtx_REG (DImode, regno + 10);
- output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, [%0]!", ops);
+ output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
return "";
}
[(set_attr "neon_type" "neon_vst2_4_regs_vst3_vst4")]
)
(define_insn "neon_vst3_lane<mode>"
- [(set (mem:<V_three_elem> (match_operand:SI 0 "s_register_operand" "r"))
+ [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
(unspec:<V_three_elem>
[(match_operand:EI 1 "s_register_operand" "w")
(match_operand:SI 2 "immediate_operand" "i")
@@ -5153,7 +5146,7 @@ (define_insn "neon_vst3_lane<mode>"
ops[2] = gen_rtx_REG (DImode, regno + 2);
ops[3] = gen_rtx_REG (DImode, regno + 4);
ops[4] = operands[2];
- output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, [%0]",
+ output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %A0",
ops);
return "";
}
@@ -5161,7 +5154,7 @@ (define_insn "neon_vst3_lane<mode>"
)
(define_insn "neon_vst3_lane<mode>"
- [(set (mem:<V_three_elem> (match_operand:SI 0 "s_register_operand" "r"))
+ [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
(unspec:<V_three_elem>
[(match_operand:CI 1 "s_register_operand" "w")
(match_operand:SI 2 "immediate_operand" "i")
@@ -5185,7 +5178,7 @@ (define_insn "neon_vst3_lane<mode>"
ops[2] = gen_rtx_REG (DImode, regno + 4);
ops[3] = gen_rtx_REG (DImode, regno + 8);
ops[4] = GEN_INT (lane);
- output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, [%0]",
+ output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %A0",
ops);
return "";
}
@@ -5193,15 +5186,15 @@ (define_insn "neon_vst3_lane<mode>"
(define_insn "neon_vld4<mode>"
[(set (match_operand:OI 0 "s_register_operand" "=w")
- (unspec:OI [(mem:OI (match_operand:SI 1 "s_register_operand" "r"))
+ (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
(unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
UNSPEC_VLD4))]
"TARGET_NEON"
{
if (<V_sz_elem> == 64)
- return "vld1.64\t%h0, [%1]";
+ return "vld1.64\t%h0, %A1";
else
- return "vld4.<V_sz_elem>\t%h0, [%1]";
+ return "vld4.<V_sz_elem>\t%h0, %A1";
}
[(set (attr "neon_type")
(if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
@@ -5210,25 +5203,25 @@ (define_insn "neon_vld4<mode>"
)
(define_expand "neon_vld4<mode>"
- [(match_operand:XI 0 "s_register_operand" "=w")
- (match_operand:SI 1 "s_register_operand" "+r")
+ [(match_operand:XI 0 "s_register_operand")
+ (match_operand:XI 1 "neon_struct_operand")
(unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
"TARGET_NEON"
{
- emit_insn (gen_neon_vld4qa<mode> (operands[0], operands[1], operands[1]));
- emit_insn (gen_neon_vld4qb<mode> (operands[0], operands[0],
- operands[1], operands[1]));
+ rtx mem;
+
+ mem = adjust_address (operands[1], OImode, 0);
+ emit_insn (gen_neon_vld4qa<mode> (operands[0], mem));
+ mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
+ emit_insn (gen_neon_vld4qb<mode> (operands[0], mem, operands[0]));
DONE;
})
(define_insn "neon_vld4qa<mode>"
[(set (match_operand:XI 0 "s_register_operand" "=w")
- (unspec:XI [(mem:XI (match_operand:SI 2 "s_register_operand" "1"))
+ (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
(unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
- UNSPEC_VLD4A))
- (set (match_operand:SI 1 "s_register_operand" "=r")
- (plus:SI (match_dup 2)
- (const_int 32)))]
+ UNSPEC_VLD4A))]
"TARGET_NEON"
{
int regno = REGNO (operands[0]);
@@ -5238,7 +5231,7 @@ (define_insn "neon_vld4qa<mode>"
ops[2] = gen_rtx_REG (DImode, regno + 8);
ops[3] = gen_rtx_REG (DImode, regno + 12);
ops[4] = operands[1];
- output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, [%4]!", ops);
+ output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
return "";
}
[(set_attr "neon_type" "neon_vld3_vld4")]
@@ -5246,13 +5239,10 @@ (define_insn "neon_vld4qa<mode>"
(define_insn "neon_vld4qb<mode>"
[(set (match_operand:XI 0 "s_register_operand" "=w")
- (unspec:XI [(mem:XI (match_operand:SI 3 "s_register_operand" "2"))
- (match_operand:XI 1 "s_register_operand" "0")
+ (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
+ (match_operand:XI 2 "s_register_operand" "0")
(unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
- UNSPEC_VLD4B))
- (set (match_operand:SI 2 "s_register_operand" "=r")
- (plus:SI (match_dup 3)
- (const_int 32)))]
+ UNSPEC_VLD4B))]
"TARGET_NEON"
{
int regno = REGNO (operands[0]);
@@ -5261,8 +5251,8 @@ (define_insn "neon_vld4qb<mode>"
ops[1] = gen_rtx_REG (DImode, regno + 6);
ops[2] = gen_rtx_REG (DImode, regno + 10);
ops[3] = gen_rtx_REG (DImode, regno + 14);
- ops[4] = operands[2];
- output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, [%4]!", ops);
+ ops[4] = operands[1];
+ output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
return "";
}
[(set_attr "neon_type" "neon_vld3_vld4")]
@@ -5270,7 +5260,7 @@ (define_insn "neon_vld4qb<mode>"
(define_insn "neon_vld4_lane<mode>"
[(set (match_operand:OI 0 "s_register_operand" "=w")
- (unspec:OI [(mem:<V_four_elem> (match_operand:SI 1
"s_register_operand" "r"))
+ (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
(match_operand:OI 2 "s_register_operand" "0")
(match_operand:SI 3 "immediate_operand" "i")
(unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
@@ -5289,7 +5279,7 @@ (define_insn "neon_vld4_lane<mode>"
ops[3] = gen_rtx_REG (DImode, regno + 6);
ops[4] = operands[1];
ops[5] = operands[3];
- output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5],
%P3[%c5]}, [%4]",
+ output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5],
%P3[%c5]}, %A4",
ops);
return "";
}
@@ -5298,7 +5288,7 @@ (define_insn "neon_vld4_lane<mode>"
(define_insn "neon_vld4_lane<mode>"
[(set (match_operand:XI 0 "s_register_operand" "=w")
- (unspec:XI [(mem:<V_four_elem> (match_operand:SI 1
"s_register_operand" "r"))
+ (unspec:XI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
(match_operand:XI 2 "s_register_operand" "0")
(match_operand:SI 3 "immediate_operand" "i")
(unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
@@ -5322,7 +5312,7 @@ (define_insn "neon_vld4_lane<mode>"
ops[3] = gen_rtx_REG (DImode, regno + 12);
ops[4] = operands[1];
ops[5] = GEN_INT (lane);
- output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5],
%P3[%c5]}, [%4]",
+ output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5],
%P3[%c5]}, %A4",
ops);
return "";
}
@@ -5331,7 +5321,7 @@ (define_insn "neon_vld4_lane<mode>"
(define_insn "neon_vld4_dup<mode>"
[(set (match_operand:OI 0 "s_register_operand" "=w")
- (unspec:OI [(mem:<V_four_elem> (match_operand:SI 1
"s_register_operand" "r"))
+ (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
(unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
UNSPEC_VLD4_DUP))]
"TARGET_NEON"
@@ -5345,12 +5335,12 @@ (define_insn "neon_vld4_dup<mode>"
ops[2] = gen_rtx_REG (DImode, regno + 4);
ops[3] = gen_rtx_REG (DImode, regno + 6);
ops[4] = operands[1];
- output_asm_insn ("vld4.<V_sz_elem>\t{%P0[], %P1[], %P2[], %P3[]}, [%4]",
+ output_asm_insn ("vld4.<V_sz_elem>\t{%P0[], %P1[], %P2[], %P3[]}, %A4",
ops);
return "";
}
else
- return "vld1.<V_sz_elem>\t%h0, [%1]";
+ return "vld1.<V_sz_elem>\t%h0, %A1";
}
[(set (attr "neon_type")
(if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
@@ -5359,16 +5349,16 @@ (define_insn "neon_vld4_dup<mode>"
)
(define_insn "neon_vst4<mode>"
- [(set (mem:OI (match_operand:SI 0 "s_register_operand" "r"))
+ [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
(unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
(unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
UNSPEC_VST4))]
"TARGET_NEON"
{
if (<V_sz_elem> == 64)
- return "vst1.64\t%h1, [%0]";
+ return "vst1.64\t%h1, %A0";
else
- return "vst4.<V_sz_elem>\t%h1, [%0]";
+ return "vst4.<V_sz_elem>\t%h1, %A0";
}
[(set (attr "neon_type")
(if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
@@ -5377,64 +5367,62 @@ (define_insn "neon_vst4<mode>"
)
(define_expand "neon_vst4<mode>"
- [(match_operand:SI 0 "s_register_operand" "+r")
- (match_operand:XI 1 "s_register_operand" "w")
+ [(match_operand:XI 0 "neon_struct_operand")
+ (match_operand:XI 1 "s_register_operand")
(unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
"TARGET_NEON"
{
- emit_insn (gen_neon_vst4qa<mode> (operands[0], operands[0], operands[1]));
- emit_insn (gen_neon_vst4qb<mode> (operands[0], operands[0], operands[1]));
+ rtx mem;
+
+ mem = adjust_address (operands[0], OImode, 0);
+ emit_insn (gen_neon_vst4qa<mode> (mem, operands[1]));
+ mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
+ emit_insn (gen_neon_vst4qb<mode> (mem, operands[1]));
DONE;
})
(define_insn "neon_vst4qa<mode>"
- [(set (mem:OI (match_operand:SI 1 "s_register_operand" "0"))
- (unspec:OI [(match_operand:XI 2 "s_register_operand" "w")
+ [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
+ (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
(unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
- UNSPEC_VST4A))
- (set (match_operand:SI 0 "s_register_operand" "=r")
- (plus:SI (match_dup 1)
- (const_int 32)))]
+ UNSPEC_VST4A))]
"TARGET_NEON"
{
- int regno = REGNO (operands[2]);
+ int regno = REGNO (operands[1]);
rtx ops[5];
ops[0] = operands[0];
ops[1] = gen_rtx_REG (DImode, regno);
ops[2] = gen_rtx_REG (DImode, regno + 4);
ops[3] = gen_rtx_REG (DImode, regno + 8);
ops[4] = gen_rtx_REG (DImode, regno + 12);
- output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, [%0]!", ops);
+ output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
return "";
}
[(set_attr "neon_type" "neon_vst2_4_regs_vst3_vst4")]
)
(define_insn "neon_vst4qb<mode>"
- [(set (mem:OI (match_operand:SI 1 "s_register_operand" "0"))
- (unspec:OI [(match_operand:XI 2 "s_register_operand" "w")
+ [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
+ (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
(unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
- UNSPEC_VST4B))
- (set (match_operand:SI 0 "s_register_operand" "=r")
- (plus:SI (match_dup 1)
- (const_int 32)))]
+ UNSPEC_VST4B))]
"TARGET_NEON"
{
- int regno = REGNO (operands[2]);
+ int regno = REGNO (operands[1]);
rtx ops[5];
ops[0] = operands[0];
ops[1] = gen_rtx_REG (DImode, regno + 2);
ops[2] = gen_rtx_REG (DImode, regno + 6);
ops[3] = gen_rtx_REG (DImode, regno + 10);
ops[4] = gen_rtx_REG (DImode, regno + 14);
- output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, [%0]!", ops);
+ output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
return "";
}
[(set_attr "neon_type" "neon_vst2_4_regs_vst3_vst4")]
)
(define_insn "neon_vst4_lane<mode>"
- [(set (mem:<V_four_elem> (match_operand:SI 0 "s_register_operand" "r"))
+ [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
(unspec:<V_four_elem>
[(match_operand:OI 1 "s_register_operand" "w")
(match_operand:SI 2 "immediate_operand" "i")
@@ -5454,7 +5442,7 @@ (define_insn "neon_vst4_lane<mode>"
ops[3] = gen_rtx_REG (DImode, regno + 4);
ops[4] = gen_rtx_REG (DImode, regno + 6);
ops[5] = operands[2];
- output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5],
%P4[%c5]}, [%0]",
+ output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5],
%P4[%c5]}, %A0",
ops);
return "";
}
@@ -5462,7 +5450,7 @@ (define_insn "neon_vst4_lane<mode>"
)
(define_insn "neon_vst4_lane<mode>"
- [(set (mem:<V_four_elem> (match_operand:SI 0 "s_register_operand" "r"))
+ [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
(unspec:<V_four_elem>
[(match_operand:XI 1 "s_register_operand" "w")
(match_operand:SI 2 "immediate_operand" "i")
@@ -5487,7 +5475,7 @@ (define_insn "neon_vst4_lane<mode>"
ops[3] = gen_rtx_REG (DImode, regno + 8);
ops[4] = gen_rtx_REG (DImode, regno + 12);
ops[5] = GEN_INT (lane);
- output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5],
%P4[%c5]}, [%0]",
+ output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5],
%P4[%c5]}, %A0",
ops);
return "";
}
Index: gcc/config/arm/neon-testgen.ml
===================================================================
--- gcc/config/arm/neon-testgen.ml 2011-04-20 08:29:44.000000000 +0000
+++ gcc/config/arm/neon-testgen.ml 2011-04-20 08:29:52.000000000 +0000
@@ -177,7 +177,7 @@ let rec analyze_shape shape =
let alt2 = commas (fun x -> x) (n_things n elt_regexp) "" in
"\\\\\\{((" ^ alt1 ^ ")|(" ^ alt2 ^ "))\\\\\\}"
| (PtrTo elt | CstPtrTo elt) ->
- "\\\\\\[" ^ (analyze_shape_elt elt) ^ "\\\\\\]"
+ "\\\\\\[" ^ (analyze_shape_elt elt) ^ "\\(:\\[0-9\\]+\\)?\\\\\\]"
| Element_of_dreg -> (analyze_shape_elt Dreg) ^ "\\\\\\[\\[0-9\\]+\\\\\\]"
| Element_of_qreg -> (analyze_shape_elt Qreg) ^ "\\\\\\[\\[0-9\\]+\\\\\\]"
| All_elements_of_dreg -> (analyze_shape_elt Dreg) ^ "\\\\\\[\\\\\\]"
Index: gcc/testsuite/gcc.target/arm/neon-vld3-1.c
===================================================================
--- /dev/null 2010-10-05 15:55:33.000000000 +0000
+++ gcc/testsuite/gcc.target/arm/neon-vld3-1.c 2011-04-20 08:29:52.000000000
+0000
@@ -0,0 +1,27 @@
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_hw } */
+/* { dg-options "-O2" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+
+uint32_t buffer[12];
+
+void __attribute__((noinline))
+foo (uint32_t *a)
+{
+ uint32x4x3_t x;
+
+ x = vld3q_u32 (a);
+ x.val[0] = vaddq_u32 (x.val[0], x.val[1]);
+ vst3q_u32 (a, x);
+}
+
+int
+main (void)
+{
+ buffer[0] = 1;
+ buffer[1] = 2;
+ foo (buffer);
+ return buffer[0] != 3;
+}
Index: gcc/testsuite/gcc.target/arm/neon-vst3-1.c
===================================================================
--- /dev/null 2010-10-05 15:55:33.000000000 +0000
+++ gcc/testsuite/gcc.target/arm/neon-vst3-1.c 2011-04-20 08:29:52.000000000
+0000
@@ -0,0 +1,25 @@
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_hw } */
+/* { dg-options "-O2" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+
+uint32_t buffer[64];
+
+void __attribute__((noinline))
+foo (uint32_t *a)
+{
+ uint32x4x3_t x;
+
+ x = vld3q_u32 (a);
+ a[35] = 1;
+ vst3q_lane_u32 (a + 32, x, 1);
+}
+
+int
+main (void)
+{
+ foo (buffer);
+ return buffer[35] != 1;
+}
Index: gcc/testsuite/gcc.target/arm/neon/vld1Q_dupf32.c
===================================================================
--- gcc/testsuite/gcc.target/arm/neon/vld1Q_dupf32.c 2011-04-20
08:29:45.000000000 +0000
+++ gcc/testsuite/gcc.target/arm/neon/vld1Q_dupf32.c 2011-04-20
08:29:52.000000000 +0000
@@ -15,5 +15,5 @@ void test_vld1Q_dupf32 (void)
out_float32x4_t = vld1q_dup_f32 (0);
}
-/* { dg-final { scan-assembler "vld1\.32\[
\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\],
\[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@.*\)?\n" }
} */
+/* { dg-final { scan-assembler "vld1\.32\[
\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\],
\[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[
\]+@.*\)?\n" } } */
/* { dg-final { cleanup-saved-temps } } */
[...etc...]
_______________________________________________
linaro-toolchain mailing list
[email protected]
http://lists.linaro.org/mailman/listinfo/linaro-toolchain