This patch imposes the same sort of structure on aarch64-sve2.md as we already have for aarch64-sve.md, before it grows a lot more patterns.
Tested on aarch64-linux-gnu and applied as 280058. Richard 2020-01-09 Richard Sandiford <richard.sandif...@arm.com> gcc/ * config/aarch64/aarch64-sve2.md: Add banner comments. (<su>mulh<r>s<mode>3): Move further up file. (<su>mull<bt><Vwide>, <r>shrnb<mode>, <r>shrnt<mode>) (*aarch64_sve2_sra<mode>): Move further down file. * config/aarch64/t-aarch64 (s-check-sve-md): Check aarch64-sve2.md too. Index: gcc/config/aarch64/aarch64-sve2.md =================================================================== --- gcc/config/aarch64/aarch64-sve2.md 2020-01-09 15:26:43.574129607 +0000 +++ gcc/config/aarch64/aarch64-sve2.md 2020-01-09 16:23:34.112124746 +0000 @@ -18,6 +18,75 @@ ;; along with GCC; see the file COPYING3. If not see ;; <http://www.gnu.org/licenses/>. +;; The file is organised into the following sections (search for the full +;; line): +;; +;; == Uniform binary arithmnetic +;; ---- [INT] Scaled high-part multiplication +;; ---- [INT] General binary arithmetic that maps to unspecs +;; +;; == Uniform ternary arithmnetic +;; ---- [INT] Ternary logic operations +;; ---- [INT] Shift-and-accumulate operations +;; +;; == Extending arithmetic +;; ---- [INT] Long binary arithmetic +;; +;; == Narrowing arithnetic +;; ---- [INT] Narrowing right shifts +;; +;; == General +;; ---- Check for aliases between pointers + +;; ========================================================================= +;; == Uniform binary arithmnetic +;; ========================================================================= + +;; ------------------------------------------------------------------------- +;; ---- [INT] Scaled high-part multiplication +;; ------------------------------------------------------------------------- +;; The patterns in this section are synthetic. +;; ------------------------------------------------------------------------- + +;; Unpredicated integer multiply-high-with-(round-and-)scale. +(define_expand "<su>mulh<r>s<mode>3" + [(set (match_operand:SVE_FULL_BHSI 0 "register_operand") + (unspec:SVE_FULL_BHSI + [(match_dup 3) + (unspec:SVE_FULL_BHSI + [(match_operand:SVE_FULL_BHSI 1 "register_operand") + (match_operand:SVE_FULL_BHSI 2 "register_operand")] + MULHRS)] + UNSPEC_PRED_X))] + "TARGET_SVE2" + { + operands[3] = aarch64_ptrue_reg (<VPRED>mode); + + rtx prod_b = gen_reg_rtx (<VWIDE>mode); + rtx prod_t = gen_reg_rtx (<VWIDE>mode); + emit_insn (gen_<su>mullb<Vwide> (prod_b, operands[1], operands[2])); + emit_insn (gen_<su>mullt<Vwide> (prod_t, operands[1], operands[2])); + + rtx shift = GEN_INT (GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1); + emit_insn (gen_<r>shrnb<mode> (operands[0], prod_b, shift)); + emit_insn (gen_<r>shrnt<mode> (operands[0], operands[0], prod_t, shift)); + + DONE; + } +) + +;; ------------------------------------------------------------------------- +;; ---- [INT] General binary arithmetic that maps to unspecs +;; ------------------------------------------------------------------------- +;; Includes: +;; - SHADD +;; - SHSUB +;; - SRHADD +;; - UHADD +;; - UHSUB +;; - URHADD +;; ------------------------------------------------------------------------- + ;; Integer average (floor). (define_expand "<u>avg<mode>3_floor" [(set (match_operand:SVE_FULL_I 0 "register_operand") @@ -67,85 +136,20 @@ (define_insn "*<sur>h<addsub><mode>" [(set_attr "movprfx" "*,yes")] ) -;; Multiply long top / bottom. -(define_insn "<su>mull<bt><Vwide>" - [(set (match_operand:<VWIDE> 0 "register_operand" "=w") - (unspec:<VWIDE> - [(match_operand:SVE_FULL_BHSI 1 "register_operand" "w") - (match_operand:SVE_FULL_BHSI 2 "register_operand" "w")] - MULLBT))] - "TARGET_SVE2" - "<su>mull<bt>\t%0.<Vewtype>, %1.<Vetype>, %2.<Vetype>" -) - -;; (Rounding) Right shift narrow bottom. -(define_insn "<r>shrnb<mode>" - [(set (match_operand:SVE_FULL_BHSI 0 "register_operand" "=w") - (unspec:SVE_FULL_BHSI - [(match_operand:<VWIDE> 1 "register_operand" "w") - (match_operand 2 "aarch64_simd_shift_imm_offset_<Vel>" "")] - SHRNB))] - "TARGET_SVE2" - "<r>shrnb\t%0.<Vetype>, %1.<Vewtype>, #%2" -) - -;; (Rounding) Right shift narrow top. -(define_insn "<r>shrnt<mode>" - [(set (match_operand:SVE_FULL_BHSI 0 "register_operand" "=w") - (unspec:SVE_FULL_BHSI - [(match_operand:SVE_FULL_BHSI 1 "register_operand" "0") - (match_operand:<VWIDE> 2 "register_operand" "w") - (match_operand 3 "aarch64_simd_shift_imm_offset_<Vel>" "i")] - SHRNT))] - "TARGET_SVE2" - "<r>shrnt\t%0.<Vetype>, %2.<Vewtype>, #%3" -) - -;; Unpredicated integer multiply-high-with-(round-and-)scale. -(define_expand "<su>mulh<r>s<mode>3" - [(set (match_operand:SVE_FULL_BHSI 0 "register_operand") - (unspec:SVE_FULL_BHSI - [(match_dup 3) - (unspec:SVE_FULL_BHSI - [(match_operand:SVE_FULL_BHSI 1 "register_operand") - (match_operand:SVE_FULL_BHSI 2 "register_operand")] - MULHRS)] - UNSPEC_PRED_X))] - "TARGET_SVE2" - { - operands[3] = aarch64_ptrue_reg (<VPRED>mode); - - rtx prod_b = gen_reg_rtx (<VWIDE>mode); - rtx prod_t = gen_reg_rtx (<VWIDE>mode); - emit_insn (gen_<su>mullb<Vwide> (prod_b, operands[1], operands[2])); - emit_insn (gen_<su>mullt<Vwide> (prod_t, operands[1], operands[2])); - - rtx shift = GEN_INT (GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1); - emit_insn (gen_<r>shrnb<mode> (operands[0], prod_b, shift)); - emit_insn (gen_<r>shrnt<mode> (operands[0], operands[0], prod_t, shift)); - - DONE; - } -) - -;; Unpredicated signed / unsigned shift-right accumulate. -(define_insn_and_rewrite "*aarch64_sve2_sra<mode>" - [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w") - (plus:SVE_FULL_I - (unspec:SVE_FULL_I - [(match_operand 4) - (SHIFTRT:SVE_FULL_I - (match_operand:SVE_FULL_I 2 "register_operand" "w") - (match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm" "Dr"))] - UNSPEC_PRED_X) - (match_operand:SVE_FULL_I 1 "register_operand" "0")))] - "TARGET_SVE2" - "<sra_op>sra\t%0.<Vetype>, %2.<Vetype>, #%3" - "&& !CONSTANT_P (operands[4])" - { - operands[4] = CONSTM1_RTX (<VPRED>mode); - } -) +;; ========================================================================= +;; == Uniform ternary arithmnetic +;; ========================================================================= + +;; ------------------------------------------------------------------------- +;; ---- [INT] Ternary logic operations +;; ------------------------------------------------------------------------- +;; Includes: +;; - BSL +;; - BSL1N +;; - BSL2N +;; - EOR3 +;; - NBSL +;; ------------------------------------------------------------------------- ;; Unpredicated 3-way exclusive OR. (define_insn "*aarch64_sve2_eor3<mode>" @@ -332,6 +336,106 @@ (define_insn_and_rewrite "*aarch64_sve2_ [(set_attr "movprfx" "*,yes")] ) +;; ------------------------------------------------------------------------- +;; ---- [INT] Shift-and-accumulate operations +;; ------------------------------------------------------------------------- +;; Includes: +;; - SSRA +;; - USRA +;; ------------------------------------------------------------------------- + +;; Unpredicated signed / unsigned shift-right accumulate. +(define_insn_and_rewrite "*aarch64_sve2_sra<mode>" + [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w") + (plus:SVE_FULL_I + (unspec:SVE_FULL_I + [(match_operand 4) + (SHIFTRT:SVE_FULL_I + (match_operand:SVE_FULL_I 2 "register_operand" "w") + (match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm" "Dr"))] + UNSPEC_PRED_X) + (match_operand:SVE_FULL_I 1 "register_operand" "0")))] + "TARGET_SVE2" + "<sra_op>sra\t%0.<Vetype>, %2.<Vetype>, #%3" + "&& !CONSTANT_P (operands[4])" + { + operands[4] = CONSTM1_RTX (<VPRED>mode); + } +) + +;; ========================================================================= +;; == Extending arithmetic +;; ========================================================================= + +;; ------------------------------------------------------------------------- +;; ---- [INT] Long binary arithmetic +;; ------------------------------------------------------------------------- +;; Includes: +;; - SMULLB +;; - SMULLT +;; - UMULLB +;; - UMULLT +;; ------------------------------------------------------------------------- + +;; Multiply long top / bottom. +(define_insn "<su>mull<bt><Vwide>" + [(set (match_operand:<VWIDE> 0 "register_operand" "=w") + (unspec:<VWIDE> + [(match_operand:SVE_FULL_BHSI 1 "register_operand" "w") + (match_operand:SVE_FULL_BHSI 2 "register_operand" "w")] + MULLBT))] + "TARGET_SVE2" + "<su>mull<bt>\t%0.<Vewtype>, %1.<Vetype>, %2.<Vetype>" +) + +;; ========================================================================= +;; == Narrowing arithnetic +;; ========================================================================= + +;; ------------------------------------------------------------------------- +;; ---- [INT] Narrowing right shifts +;; ------------------------------------------------------------------------- +;; Includes: +;; - RSHRNB +;; - RSHRNT +;; - SHRNB +;; - SHRNT +;; ------------------------------------------------------------------------- + +;; (Rounding) Right shift narrow bottom. +(define_insn "<r>shrnb<mode>" + [(set (match_operand:SVE_FULL_BHSI 0 "register_operand" "=w") + (unspec:SVE_FULL_BHSI + [(match_operand:<VWIDE> 1 "register_operand" "w") + (match_operand 2 "aarch64_simd_shift_imm_offset_<Vel>" "")] + SHRNB))] + "TARGET_SVE2" + "<r>shrnb\t%0.<Vetype>, %1.<Vewtype>, #%2" +) + +;; (Rounding) Right shift narrow top. +(define_insn "<r>shrnt<mode>" + [(set (match_operand:SVE_FULL_BHSI 0 "register_operand" "=w") + (unspec:SVE_FULL_BHSI + [(match_operand:SVE_FULL_BHSI 1 "register_operand" "0") + (match_operand:<VWIDE> 2 "register_operand" "w") + (match_operand 3 "aarch64_simd_shift_imm_offset_<Vel>" "i")] + SHRNT))] + "TARGET_SVE2" + "<r>shrnt\t%0.<Vetype>, %2.<Vewtype>, #%3" +) + +;; ========================================================================= +;; == General +;; ========================================================================= + +;; ------------------------------------------------------------------------- +;; ---- Check for aliases between pointers +;; ------------------------------------------------------------------------- +;; The patterns in this section are synthetic: WHILERW and WHILEWR are +;; defined in aarch64-sve.md instead. +;; ------------------------------------------------------------------------- + ;; Use WHILERW and WHILEWR to accelerate alias checks. This is only ;; possible if the accesses we're checking are exactly the same size ;; as an SVE vector. Index: gcc/config/aarch64/t-aarch64 =================================================================== --- gcc/config/aarch64/t-aarch64 2020-01-06 13:01:32.608504274 +0000 +++ gcc/config/aarch64/t-aarch64 2020-01-09 16:23:34.112124746 +0000 @@ -147,7 +147,10 @@ MULTILIB_DIRNAMES = $(subst $(comma), insn-conditions.md: s-check-sve-md s-check-sve-md: $(srcdir)/config/aarch64/check-sve-md.awk \ - $(srcdir)/config/aarch64/aarch64-sve.md + $(srcdir)/config/aarch64/aarch64-sve.md \ + $(srcdir)/config/aarch64/aarch64-sve2.md $(AWK) -f $(srcdir)/config/aarch64/check-sve-md.awk \ $(srcdir)/config/aarch64/aarch64-sve.md + $(AWK) -f $(srcdir)/config/aarch64/check-sve-md.awk \ + $(srcdir)/config/aarch64/aarch64-sve2.md $(STAMP) s-check-sve-md