Hi all,
On some of the saturating math expanders we need to perform a lane flip
on big-endian when expanding to RTL so that we keep consistent with
GCCs' view of lane numbering.
During assembly emission the pattern will perform another lane flip to
translate from GCCs' numbering to the architectural lane number.
To do this a few of the patterns were renamed to *_internal and given an
expander that will perform that first lane flip while the existing
expanders get a lane flip added to them.
The tests for these patterns will come soon in a separate patch.
With this patch, when the user uses something like vqdmlal_laneq_s16 (a,
b, c, 0) from arm_neon.h in big endian the resulting instruction will
access lane 0 of c now, whereas before it would access lane 7.
Tested and bootstrapped aarch64-none-linux-gnu and aarch64_be-none-elf.
Ok for trunk?
Thanks,
Kyrill
2014-06-10 Kyrylo Tkachov <kyrylo.tkac...@arm.com>
* config/aarch64/aarch64-simd.md (aarch64_sqdmulh_lane<mode>):
New expander.
(aarch64_sqrdmulh_lane<mode>): Likewise.
(aarch64_sq<r>dmulh_lane<mode>): Rename to...
(aarch64_sq<r>dmulh_lane<mode>_interna): ...this.
(aarch64_sqdmulh_laneq<mode>): New expander.
(aarch64_sqrdmulh_laneq<mode>): Likewise.
(aarch64_sq<r>dmulh_laneq<mode>): Rename to...
(aarch64_sq<r>dmulh_laneq<mode>_internal): ...this.
(aarch64_sqdmulh_lane<mode>): New expander.
(aarch64_sqrdmulh_lane<mode>): Likewise.
(aarch64_sq<r>dmulh_lane<mode>): Rename to...
(aarch64_sq<r>dmulh_lane<mode>_internal): ...this.
(aarch64_sqdmlal_lane<mode>): Add lane flip for big-endian.
(aarch64_sqdmlal_laneq<mode>): Likewise.
(aarch64_sqdmlsl_lane<mode>): Likewise.
(aarch64_sqdmlsl_laneq<mode>): Likewise.
(aarch64_sqdmlal2_lane<mode>): Likewise.
(aarch64_sqdmlal2_laneq<mode>): Likewise.
(aarch64_sqdmlsl2_lane<mode>): Likewise.
(aarch64_sqdmlsl2_laneq<mode>): Likewise.
(aarch64_sqdmull_lane<mode>): Likewise.
(aarch64_sqdmull_laneq<mode>): Likewise.
(aarch64_sqdmull2_lane<mode>): Likewise.
(aarch64_sqdmull2_laneq<mode>): Likewise.
commit 18ed07903bb21e7dea185a1618a130cd88ed9de7
Author: Kyrylo Tkachov <kyrylo.tkac...@arm.com>
Date: Tue Jun 3 15:27:09 2014 +0100
[AArch64] Saturating math lane fixes
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 108bc8d..fc028f5 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -2650,7 +2650,41 @@
;; sq<r>dmulh_lane
-(define_insn "aarch64_sq<r>dmulh_lane<mode>"
+(define_expand "aarch64_sqdmulh_lane<mode>"
+ [(match_operand:VDQHS 0 "register_operand" "")
+ (match_operand:VDQHS 1 "register_operand" "")
+ (match_operand:<VCOND> 2 "register_operand" "")
+ (match_operand:SI 3 "immediate_operand" "")]
+ "TARGET_SIMD"
+ {
+ aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCOND>mode));
+ operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
+ emit_insn (gen_aarch64_sqdmulh_lane<mode>_internal (operands[0],
+ operands[1],
+ operands[2],
+ operands[3]));
+ DONE;
+ }
+)
+
+(define_expand "aarch64_sqrdmulh_lane<mode>"
+ [(match_operand:VDQHS 0 "register_operand" "")
+ (match_operand:VDQHS 1 "register_operand" "")
+ (match_operand:<VCOND> 2 "register_operand" "")
+ (match_operand:SI 3 "immediate_operand" "")]
+ "TARGET_SIMD"
+ {
+ aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCOND>mode));
+ operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
+ emit_insn (gen_aarch64_sqrdmulh_lane<mode>_internal (operands[0],
+ operands[1],
+ operands[2],
+ operands[3]));
+ DONE;
+ }
+)
+
+(define_insn "aarch64_sq<r>dmulh_lane<mode>_internal"
[(set (match_operand:VDQHS 0 "register_operand" "=w")
(unspec:VDQHS
[(match_operand:VDQHS 1 "register_operand" "w")
@@ -2666,7 +2700,41 @@
[(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
)
-(define_insn "aarch64_sq<r>dmulh_laneq<mode>"
+(define_expand "aarch64_sqdmulh_laneq<mode>"
+ [(match_operand:VDQHS 0 "register_operand" "")
+ (match_operand:VDQHS 1 "register_operand" "")
+ (match_operand:<VCONQ> 2 "register_operand" "")
+ (match_operand:SI 3 "immediate_operand" "")]
+ "TARGET_SIMD"
+ {
+ aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCONQ>mode));
+ operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
+ emit_insn (gen_aarch64_sqdmulh_laneq<mode>_internal (operands[0],
+ operands[1],
+ operands[2],
+ operands[3]));
+ DONE;
+ }
+)
+
+(define_expand "aarch64_sqrdmulh_laneq<mode>"
+ [(match_operand:VDQHS 0 "register_operand" "")
+ (match_operand:VDQHS 1 "register_operand" "")
+ (match_operand:<VCONQ> 2 "register_operand" "")
+ (match_operand:SI 3 "immediate_operand" "")]
+ "TARGET_SIMD"
+ {
+ aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCONQ>mode));
+ operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
+ emit_insn (gen_aarch64_sqrdmulh_laneq<mode>_internal (operands[0],
+ operands[1],
+ operands[2],
+ operands[3]));
+ DONE;
+ }
+)
+
+(define_insn "aarch64_sq<r>dmulh_laneq<mode>_internal"
[(set (match_operand:VDQHS 0 "register_operand" "=w")
(unspec:VDQHS
[(match_operand:VDQHS 1 "register_operand" "w")
@@ -2676,13 +2744,46 @@
VQDMULH))]
"TARGET_SIMD"
"*
- aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCONQ>mode));
operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
[(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
)
-(define_insn "aarch64_sq<r>dmulh_lane<mode>"
+(define_expand "aarch64_sqdmulh_lane<mode>"
+ [(match_operand:SD_HSI 0 "register_operand" "")
+ (match_operand:SD_HSI 1 "register_operand" "")
+ (match_operand:<VCONQ> 2 "register_operand" "")
+ (match_operand:SI 3 "immediate_operand" "")]
+ "TARGET_SIMD"
+ {
+ aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCONQ>mode));
+ operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
+ emit_insn (gen_aarch64_sqdmulh_lane<mode>_internal (operands[0],
+ operands[1],
+ operands[2],
+ operands[3]));
+ DONE;
+ }
+)
+
+(define_expand "aarch64_sqrdmulh_lane<mode>"
+ [(match_operand:SD_HSI 0 "register_operand" "")
+ (match_operand:SD_HSI 1 "register_operand" "")
+ (match_operand:<VCONQ> 2 "register_operand" "")
+ (match_operand:SI 3 "immediate_operand" "")]
+ "TARGET_SIMD"
+ {
+ aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCONQ>mode));
+ operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
+ emit_insn (gen_aarch64_sqrdmulh_lane<mode>_internal (operands[0],
+ operands[1],
+ operands[2],
+ operands[3]));
+ DONE;
+ }
+)
+
+(define_insn "aarch64_sq<r>dmulh_lane<mode>_internal"
[(set (match_operand:SD_HSI 0 "register_operand" "=w")
(unspec:SD_HSI
[(match_operand:SD_HSI 1 "register_operand" "w")
@@ -2692,7 +2793,6 @@
VQDMULH))]
"TARGET_SIMD"
"*
- aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCONQ>mode));
operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
[(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
@@ -2774,6 +2874,7 @@
"TARGET_SIMD"
{
aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<VCON>mode) / 2);
+ operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
emit_insn (gen_aarch64_sqdmlal_lane<mode>_internal (operands[0], operands[1],
operands[2], operands[3],
operands[4]));
@@ -2789,6 +2890,7 @@
"TARGET_SIMD"
{
aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<VCON>mode));
+ operands[4] = GEN_INT (ENDIAN_LANE_N (<VCON>mode, INTVAL (operands[4])));
emit_insn (gen_aarch64_sqdmlal_lane<mode>_internal (operands[0], operands[1],
operands[2], operands[3],
operands[4]));
@@ -2804,6 +2906,7 @@
"TARGET_SIMD"
{
aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<VCON>mode) / 2);
+ operands[4] = GEN_INT (ENDIAN_LANE_N (<VCON>mode, INTVAL (operands[4])));
emit_insn (gen_aarch64_sqdmlsl_lane<mode>_internal (operands[0], operands[1],
operands[2], operands[3],
operands[4]));
@@ -2819,6 +2922,7 @@
"TARGET_SIMD"
{
aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<VCON>mode));
+ operands[4] = GEN_INT (ENDIAN_LANE_N (<VCON>mode, INTVAL (operands[4])));
emit_insn (gen_aarch64_sqdmlsl_lane<mode>_internal (operands[0], operands[1],
operands[2], operands[3],
operands[4]));
@@ -2930,6 +3034,7 @@
{
rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode) / 2);
+ operands[4] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[4])));
emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1],
operands[2], operands[3],
operands[4], p));
@@ -2946,6 +3051,7 @@
{
rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
+ operands[4] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[4])));
emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1],
operands[2], operands[3],
operands[4], p));
@@ -2962,6 +3068,7 @@
{
rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode) / 2);
+ operands[4] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[4])));
emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1],
operands[2], operands[3],
operands[4], p));
@@ -2978,6 +3085,7 @@
{
rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
+ operands[4] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[4])));
emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1],
operands[2], operands[3],
operands[4], p));
@@ -3098,6 +3206,7 @@
"TARGET_SIMD"
{
aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCON>mode) / 2);
+ operands[3] = GEN_INT (ENDIAN_LANE_N (<VCON>mode, INTVAL (operands[3])));
emit_insn (gen_aarch64_sqdmull_lane<mode>_internal (operands[0], operands[1],
operands[2], operands[3]));
DONE;
@@ -3111,6 +3220,7 @@
"TARGET_SIMD"
{
aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCON>mode));
+ operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
emit_insn (gen_aarch64_sqdmull_lane<mode>_internal
(operands[0], operands[1], operands[2], operands[3]));
DONE;
@@ -3203,6 +3313,7 @@
{
rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode) / 2);
+ operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])));
emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
operands[2], operands[3],
p));
@@ -3218,6 +3329,7 @@
{
rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
+ operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])));
emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
operands[2], operands[3],
p));