Semantically the neon intrinsic vgetq_lane_[su]64 returns a 64 bit
sub-object of a 128-bit vector; there's no real need for the intrinsic
to map onto a specific machine instruction.
Indeed, if force a particular instruction that moves the result into a
core register, but then want to use the result in the vector unit, we
don't really want to have to move the result back to the other register
bank. However, that's what we do today.
This patch changes the way we expand these operations so that we
no-longer force selection of the get-lane operation.
A side effect of this change is that we now spit out the fmrrd mnemonic
rather than the vmov equivalent. As a consequence I've updated the
testsuite to allow for this change. The changes to the ML files are
pretty mechanical, but I don't speak ML so it would be helpful if
another pair of eyes could check that bit over and tell me if I've
missed something subtle.
Tested on trunk and gcc-4.7, but only installed on trunk.
R.
* neon.md (neon_vget_lanev2di): Use gen_lowpart and gen_highpart.
* config/arm/neon.ml (Fixed_return_reg): Renamed to fixed_vector_reg.
All callers changed.
(Fixed_core_reg): New feature.
(Vget_lane [sizes S64 and U64]): Add Fixed_core_reg. Allow
fmrrd in disassembly.
* neon-testgen.ml: Handle Fixed_core_reg.
testsuite/
* gcc.target/arm/neon/vgetQ_laneu64.c: Regenerated.
* gcc.target/arm/neon/vgetQ_lanes64.c: Likewise.
--- config/arm/neon-testgen.ml (revision 185587)
+++ config/arm/neon-testgen.ml (local)
@@ -79,9 +79,12 @@ let emit_automatics chan c_types feature
(* The intrinsic returns a value. We need to do explict register
allocation for vget_low tests or they fail because of copy
elimination. *)
- ((if List.mem Fixed_return_reg features then
+ ((if List.mem Fixed_vector_reg features then
Printf.fprintf chan " register %s out_%s asm (\"d18\");\n"
return_ty return_ty
+ else if List.mem Fixed_core_reg features then
+ Printf.fprintf chan " register %s out_%s asm (\"r0\");\n"
+ return_ty return_ty
else
Printf.fprintf chan " %s out_%s;\n" return_ty return_ty);
emit ())
--- config/arm/neon.md (revision 185587)
+++ config/arm/neon.md (local)
@@ -2720,14 +2720,24 @@ (define_expand "neon_vget_lanedi"
})
(define_expand "neon_vget_lanev2di"
- [(match_operand:DI 0 "s_register_operand" "=r")
- (match_operand:V2DI 1 "s_register_operand" "w")
- (match_operand:SI 2 "immediate_operand" "i")
- (match_operand:SI 3 "immediate_operand" "i")]
+ [(match_operand:DI 0 "s_register_operand" "")
+ (match_operand:V2DI 1 "s_register_operand" "")
+ (match_operand:SI 2 "immediate_operand" "")
+ (match_operand:SI 3 "immediate_operand" "")]
"TARGET_NEON"
{
- neon_lane_bounds (operands[2], 0, 2);
- emit_insn (gen_vec_extractv2di (operands[0], operands[1], operands[2]));
+ switch (INTVAL (operands[2]))
+ {
+ case 0:
+ emit_move_insn (operands[0], gen_lowpart (DImode, operands[1]));
+ break;
+ case 1:
+ emit_move_insn (operands[0], gen_highpart (DImode, operands[1]));
+ break;
+ default:
+ neon_lane_bounds (operands[2], 0, 1);
+ FAIL;
+ }
DONE;
})
--- config/arm/neon.ml (revision 185587)
+++ config/arm/neon.ml (local)
@@ -234,7 +234,8 @@ type features =
cases. The function supplied must return the integer to be written
into the testcase for the argument number (0-based) supplied to it. *)
| Const_valuator of (int -> int)
- | Fixed_return_reg
+ | Fixed_vector_reg
+ | Fixed_core_reg
exception MixedMode of elts * elts
@@ -1009,7 +1010,8 @@ let ops =
Vget_lane,
[InfoWord;
Disassembles_as [Use_operands [| Corereg; Corereg; Dreg |]];
- Instruction_name ["vmov"]; Const_valuator (fun _ -> 0)],
+ Instruction_name ["vmov"; "fmrrd"]; Const_valuator (fun _ -> 0);
+ Fixed_core_reg],
Use_operands [| Corereg; Qreg; Immed |],
"vgetQ_lane", notype_2, [S64; U64];
@@ -1125,7 +1127,7 @@ let ops =
notype_1, pf_su_8_64;
Vget_low, [Instruction_name ["vmov"];
Disassembles_as [Use_operands [| Dreg; Dreg |]];
- Fixed_return_reg],
+ Fixed_vector_reg],
Use_operands [| Dreg; Qreg |], "vget_low",
notype_1, pf_su_8_32;
Vget_low, [No_op],
--- testsuite/gcc.target/arm/neon/vgetQ_lanes64.c (revision 185587)
+++ testsuite/gcc.target/arm/neon/vgetQ_lanes64.c (local)
@@ -10,11 +10,11 @@
void test_vgetQ_lanes64 (void)
{
- int64_t out_int64_t;
+ register int64_t out_int64_t asm ("r0");
int64x2_t arg0_int64x2_t;
out_int64_t = vgetq_lane_s64 (arg0_int64x2_t, 0);
}
-/* { dg-final { scan-assembler "vmov\[ \]+\[rR\]\[0-9\]+,
\[rR\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "((vmov)|(fmrrd))\[ \]+\[rR\]\[0-9\]+,
\[rR\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { cleanup-saved-temps } } */
--- testsuite/gcc.target/arm/neon/vgetQ_laneu64.c (revision 185587)
+++ testsuite/gcc.target/arm/neon/vgetQ_laneu64.c (local)
@@ -10,11 +10,11 @@
void test_vgetQ_laneu64 (void)
{
- uint64_t out_uint64_t;
+ register uint64_t out_uint64_t asm ("r0");
uint64x2_t arg0_uint64x2_t;
out_uint64_t = vgetq_lane_u64 (arg0_uint64x2_t, 0);
}
-/* { dg-final { scan-assembler "vmov\[ \]+\[rR\]\[0-9\]+,
\[rR\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "((vmov)|(fmrrd))\[ \]+\[rR\]\[0-9\]+,
\[rR\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { cleanup-saved-temps } } */