Christophe Lyon via Gcc-patches <gcc-patches@gcc.gnu.org> writes: > @@ -31086,36 +31087,20 @@ arm_expand_vector_compare (rtx target, rtx_code > code, rtx op0, rtx op1, > case NE: > if (TARGET_HAVE_MVE) > { > - rtx vpr_p0; > - if (vcond_mve) > - vpr_p0 = target; > - else > - vpr_p0 = gen_reg_rtx (arm_mode_to_pred_mode (cmp_mode)); > - > switch (GET_MODE_CLASS (cmp_mode)) > { > case MODE_VECTOR_INT: > - emit_insn (gen_mve_vcmpq (code, cmp_mode, vpr_p0, op0, force_reg > (cmp_mode, op1))); > + emit_insn (gen_mve_vcmpq (code, cmp_mode, target, op0, force_reg > (cmp_mode, op1)));
Pre-existing nit: long line. Same for later calls in the same function. Richard > break; > case MODE_VECTOR_FLOAT: > if (TARGET_HAVE_MVE_FLOAT) > - emit_insn (gen_mve_vcmpq_f (code, cmp_mode, vpr_p0, op0, > force_reg (cmp_mode, op1))); > + emit_insn (gen_mve_vcmpq_f (code, cmp_mode, target, op0, > force_reg (cmp_mode, op1))); > else > gcc_unreachable (); > break; > default: > gcc_unreachable (); > } > - > - /* If we are not expanding a vcond, build the result here. */ > - if (!vcond_mve) > - { > - rtx zero = gen_reg_rtx (cmp_result_mode); > - rtx one = gen_reg_rtx (cmp_result_mode); > - emit_move_insn (zero, CONST0_RTX (cmp_result_mode)); > - emit_move_insn (one, CONST1_RTX (cmp_result_mode)); > - emit_insn (gen_mve_vpselq (VPSELQ_S, cmp_result_mode, target, > one, zero, vpr_p0)); > - } > } > else > emit_insn (gen_neon_vc (code, cmp_mode, target, op0, op1)); > @@ -31127,23 +31112,7 @@ arm_expand_vector_compare (rtx target, rtx_code > code, rtx op0, rtx op1, > case GEU: > case GTU: > if (TARGET_HAVE_MVE) > - { > - rtx vpr_p0; > - if (vcond_mve) > - vpr_p0 = target; > - else > - vpr_p0 = gen_reg_rtx (arm_mode_to_pred_mode (cmp_mode)); > - > - emit_insn (gen_mve_vcmpq (code, cmp_mode, vpr_p0, op0, force_reg > (cmp_mode, op1))); > - if (!vcond_mve) > - { > - rtx zero = gen_reg_rtx (cmp_result_mode); > - rtx one = gen_reg_rtx (cmp_result_mode); > - emit_move_insn (zero, CONST0_RTX (cmp_result_mode)); > - emit_move_insn (one, CONST1_RTX (cmp_result_mode)); > - emit_insn (gen_mve_vpselq (VPSELQ_S, cmp_result_mode, target, > one, zero, vpr_p0)); > - } > - } > + emit_insn (gen_mve_vcmpq (code, cmp_mode, target, op0, force_reg > (cmp_mode, op1))); > else > emit_insn (gen_neon_vc (code, cmp_mode, target, > op0, force_reg (cmp_mode, op1))); > @@ -31154,23 +31123,7 @@ arm_expand_vector_compare (rtx target, rtx_code > code, rtx op0, rtx op1, > case LEU: > case LTU: > if (TARGET_HAVE_MVE) > - { > - rtx vpr_p0; > - if (vcond_mve) > - vpr_p0 = target; > - else > - vpr_p0 = gen_reg_rtx (arm_mode_to_pred_mode (cmp_mode)); > - > - emit_insn (gen_mve_vcmpq (swap_condition (code), cmp_mode, vpr_p0, > force_reg (cmp_mode, op1), op0)); > - if (!vcond_mve) > - { > - rtx zero = gen_reg_rtx (cmp_result_mode); > - rtx one = gen_reg_rtx (cmp_result_mode); > - emit_move_insn (zero, CONST0_RTX (cmp_result_mode)); > - emit_move_insn (one, CONST1_RTX (cmp_result_mode)); > - emit_insn (gen_mve_vpselq (VPSELQ_S, cmp_result_mode, target, > one, zero, vpr_p0)); > - } > - } > + emit_insn (gen_mve_vcmpq (swap_condition (code), cmp_mode, target, > force_reg (cmp_mode, op1), op0)); > else > emit_insn (gen_neon_vc (swap_condition (code), cmp_mode, > target, force_reg (cmp_mode, op1), op0)); > @@ -31185,8 +31138,8 @@ arm_expand_vector_compare (rtx target, rtx_code code, > rtx op0, rtx op1, > rtx gt_res = gen_reg_rtx (cmp_result_mode); > rtx alt_res = gen_reg_rtx (cmp_result_mode); > rtx_code alt_code = (code == LTGT ? LT : LE); > - if (arm_expand_vector_compare (gt_res, GT, op0, op1, true, vcond_mve) > - || arm_expand_vector_compare (alt_res, alt_code, op0, op1, true, > vcond_mve)) > + if (arm_expand_vector_compare (gt_res, GT, op0, op1, true) > + || arm_expand_vector_compare (alt_res, alt_code, op0, op1, true)) > gcc_unreachable (); > emit_insn (gen_rtx_SET (target, gen_rtx_IOR (cmp_result_mode, > gt_res, alt_res))); > @@ -31206,19 +31159,15 @@ arm_expand_vcond (rtx *operands, machine_mode > cmp_result_mode) > { > /* When expanding for MVE, we do not want to emit a (useless) vpsel in > arm_expand_vector_compare, and another one here. */ > - bool vcond_mve=false; > rtx mask; > > if (TARGET_HAVE_MVE) > - { > - vcond_mve=true; > - mask = gen_reg_rtx (arm_mode_to_pred_mode (cmp_result_mode)); > - } > + mask = gen_reg_rtx (arm_mode_to_pred_mode (cmp_result_mode).require ()); > else > mask = gen_reg_rtx (cmp_result_mode); > > bool inverted = arm_expand_vector_compare (mask, GET_CODE (operands[3]), > - operands[4], operands[5], true, > vcond_mve); > + operands[4], operands[5], true); > if (inverted) > std::swap (operands[1], operands[2]); > if (TARGET_NEON) > @@ -31226,20 +31175,20 @@ arm_expand_vcond (rtx *operands, machine_mode > cmp_result_mode) > mask, operands[1], operands[2])); > else > { > - machine_mode cmp_mode = GET_MODE (operands[4]); > - rtx vpr_p0 = mask; > - rtx zero = gen_reg_rtx (cmp_mode); > - rtx one = gen_reg_rtx (cmp_mode); > - emit_move_insn (zero, CONST0_RTX (cmp_mode)); > - emit_move_insn (one, CONST1_RTX (cmp_mode)); > + machine_mode cmp_mode = GET_MODE (operands[0]); > + > switch (GET_MODE_CLASS (cmp_mode)) > { > case MODE_VECTOR_INT: > - emit_insn (gen_mve_vpselq (VPSELQ_S, cmp_result_mode, operands[0], > one, zero, vpr_p0)); > + emit_insn (gen_mve_vpselq (VPSELQ_S, cmp_mode, operands[0], > + operands[1], operands[2], mask)); > break; > case MODE_VECTOR_FLOAT: > if (TARGET_HAVE_MVE_FLOAT) > - emit_insn (gen_mve_vpselq_f (cmp_mode, operands[0], one, zero, > vpr_p0)); > + emit_insn (gen_mve_vpselq_f (cmp_mode, operands[0], > + operands[1], operands[2], mask)); > + else > + gcc_unreachable (); > break; > default: > gcc_unreachable (); > @@ -34149,4 +34098,15 @@ arm_mode_base_reg_class (machine_mode mode) > > struct gcc_target targetm = TARGET_INITIALIZER; > > +/* Implement TARGET_VECTORIZE_GET_MASK_MODE. */ > + > +opt_machine_mode > +arm_get_mask_mode (machine_mode mode) > +{ > + if (TARGET_HAVE_MVE) > + return arm_mode_to_pred_mode (mode); > + > + return default_get_mask_mode (mode); > +} > + > #include "gt-arm.h" > diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md > index 9da78657798..fb25cac1cfd 100644 > --- a/gcc/config/arm/mve.md > +++ b/gcc/config/arm/mve.md > @@ -10540,3 +10540,57 @@ (define_insn "*mve_mov<mode>" > vmsr%?\t P0, %1 > vmrs%?\t %0, P0" > ) > + > +;; Expanders for vec_cmp and vcond > + > +(define_expand "vec_cmp<mode><MVE_vpred>" > + [(set (match_operand:<MVE_VPRED> 0 "s_register_operand") > + (match_operator:<MVE_VPRED> 1 "comparison_operator" > + [(match_operand:MVE_VLD_ST 2 "s_register_operand") > + (match_operand:MVE_VLD_ST 3 "reg_or_zero_operand")]))] > + "TARGET_HAVE_MVE > + && (!<Is_float_mode> || flag_unsafe_math_optimizations)" > +{ > + arm_expand_vector_compare (operands[0], GET_CODE (operands[1]), > + operands[2], operands[3], false); > + DONE; > +}) > + > +(define_expand "vec_cmpu<mode><MVE_vpred>" > + [(set (match_operand:<MVE_VPRED> 0 "s_register_operand") > + (match_operator:<MVE_VPRED> 1 "comparison_operator" > + [(match_operand:MVE_2 2 "s_register_operand") > + (match_operand:MVE_2 3 "reg_or_zero_operand")]))] > + "TARGET_HAVE_MVE" > +{ > + arm_expand_vector_compare (operands[0], GET_CODE (operands[1]), > + operands[2], operands[3], false); > + DONE; > +}) > + > +(define_expand "vcond_mask_<mode><MVE_vpred>" > + [(set (match_operand:MVE_VLD_ST 0 "s_register_operand") > + (if_then_else:MVE_VLD_ST > + (match_operand:<MVE_VPRED> 3 "s_register_operand") > + (match_operand:MVE_VLD_ST 1 "s_register_operand") > + (match_operand:MVE_VLD_ST 2 "s_register_operand")))] > + "TARGET_HAVE_MVE" > +{ > + switch (GET_MODE_CLASS (<MODE>mode)) > + { > + case MODE_VECTOR_INT: > + emit_insn (gen_mve_vpselq (VPSELQ_S, <MODE>mode, operands[0], > + operands[1], operands[2], operands[3])); > + break; > + case MODE_VECTOR_FLOAT: > + if (TARGET_HAVE_MVE_FLOAT) > + emit_insn (gen_mve_vpselq_f (<MODE>mode, operands[0], > + operands[1], operands[2], operands[3])); > + else > + gcc_unreachable (); > + break; > + default: > + gcc_unreachable (); > + } > + DONE; > +}) > diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md > index 8b0a396947c..28310d93a4e 100644 > --- a/gcc/config/arm/neon.md > +++ b/gcc/config/arm/neon.md > @@ -1394,6 +1394,45 @@ (define_insn "*us_sub<mode>_neon" > [(set_attr "type" "neon_qsub<q>")] > ) > > +(define_expand "vec_cmp<mode><v_cmp_result>" > + [(set (match_operand:<V_cmp_result> 0 "s_register_operand") > + (match_operator:<V_cmp_result> 1 "comparison_operator" > + [(match_operand:VDQWH 2 "s_register_operand") > + (match_operand:VDQWH 3 "reg_or_zero_operand")]))] > + "TARGET_NEON > + && (!<Is_float_mode> || flag_unsafe_math_optimizations)" > +{ > + arm_expand_vector_compare (operands[0], GET_CODE (operands[1]), > + operands[2], operands[3], false); > + DONE; > +}) > + > +(define_expand "vec_cmpu<mode><mode>" > + [(set (match_operand:VDQIW 0 "s_register_operand") > + (match_operator:VDQIW 1 "comparison_operator" > + [(match_operand:VDQIW 2 "s_register_operand") > + (match_operand:VDQIW 3 "reg_or_zero_operand")]))] > + "TARGET_NEON" > +{ > + arm_expand_vector_compare (operands[0], GET_CODE (operands[1]), > + operands[2], operands[3], false); > + DONE; > +}) > + > +(define_expand "vcond_mask_<mode><v_cmp_result>" > + [(set (match_operand:VDQWH 0 "s_register_operand") > + (if_then_else:VDQWH > + (match_operand:<V_cmp_result> 3 "s_register_operand") > + (match_operand:VDQWH 1 "s_register_operand") > + (match_operand:VDQWH 2 "s_register_operand")))] > + "TARGET_NEON > + && (!<Is_float_mode> || flag_unsafe_math_optimizations)" > +{ > + emit_insn (gen_neon_vbsl<mode> (operands[0], operands[3], operands[1], > + operands[2])); > + DONE; > +}) > + > ;; Patterns for builtins. > > ; good for plain vadd, vaddq. > diff --git a/gcc/config/arm/vec-common.md b/gcc/config/arm/vec-common.md > index 68de4f0f943..9b461a76155 100644 > --- a/gcc/config/arm/vec-common.md > +++ b/gcc/config/arm/vec-common.md > @@ -363,33 +363,6 @@ (define_expand "vlshr<mode>3" > } > }) > > -(define_expand "vec_cmp<mode><v_cmp_result>" > - [(set (match_operand:<V_cmp_result> 0 "s_register_operand") > - (match_operator:<V_cmp_result> 1 "comparison_operator" > - [(match_operand:VDQWH 2 "s_register_operand") > - (match_operand:VDQWH 3 "reg_or_zero_operand")]))] > - "ARM_HAVE_<MODE>_ARITH > - && !TARGET_REALLY_IWMMXT > - && (!<Is_float_mode> || flag_unsafe_math_optimizations)" > -{ > - arm_expand_vector_compare (operands[0], GET_CODE (operands[1]), > - operands[2], operands[3], false, false); > - DONE; > -}) > - > -(define_expand "vec_cmpu<mode><mode>" > - [(set (match_operand:VDQIW 0 "s_register_operand") > - (match_operator:VDQIW 1 "comparison_operator" > - [(match_operand:VDQIW 2 "s_register_operand") > - (match_operand:VDQIW 3 "reg_or_zero_operand")]))] > - "ARM_HAVE_<MODE>_ARITH > - && !TARGET_REALLY_IWMMXT" > -{ > - arm_expand_vector_compare (operands[0], GET_CODE (operands[1]), > - operands[2], operands[3], false, false); > - DONE; > -}) > - > ;; Conditional instructions. These are comparisons with conditional moves > for > ;; vectors. They perform the assignment: > ;; > @@ -461,31 +434,6 @@ (define_expand "vcondu<mode><v_cmp_result>" > DONE; > }) > > -(define_expand "vcond_mask_<mode><v_cmp_result>" > - [(set (match_operand:VDQWH 0 "s_register_operand") > - (if_then_else:VDQWH > - (match_operand:<V_cmp_result> 3 "s_register_operand") > - (match_operand:VDQWH 1 "s_register_operand") > - (match_operand:VDQWH 2 "s_register_operand")))] > - "ARM_HAVE_<MODE>_ARITH > - && !TARGET_REALLY_IWMMXT > - && (!<Is_float_mode> || flag_unsafe_math_optimizations)" > -{ > - if (TARGET_NEON) > - { > - emit_insn (gen_neon_vbsl (<MODE>mode, operands[0], operands[3], > - operands[1], operands[2])); > - } > - else if (TARGET_HAVE_MVE) > - { > - emit_insn (gen_mve_vpselq (VPSELQ_S, <MODE>mode, operands[0], > - operands[1], operands[2], operands[3])); > - } > - else > - gcc_unreachable (); > - DONE; > -}) > - > (define_expand "vec_load_lanesoi<mode>" > [(set (match_operand:OI 0 "s_register_operand") > (unspec:OI [(match_operand:OI 1 "neon_struct_operand")