This adds support for detectioon of the ADDHN pattern in the vectorizer. Concretely try to detect
_1 = (W)a _2 = (W)b _3 = _1 + _2 _4 = _3 >> (precision(a) / 2) _5 = (N)_4 where W = precision (a) * 2 N = precision (a) / 2 Bootstrapped Regtested on aarch64-none-linux-gnu, arm-none-linux-gnueabihf, x86_64-pc-linux-gnu -m32, -m64 and no issues. Ok for master? Tests in the next patch which adds the optabs to AArch64. Thanks, Tamar gcc/ChangeLog: * internal-fn.def (VEC_ADD_HALFING_NARROW, IFN_VEC_ADD_HALFING_NARROW_LO, IFN_VEC_ADD_HALFING_NARROW_HI, IFN_VEC_ADD_HALFING_NARROW_EVEN, IFN_VEC_ADD_HALFING_NARROW_ODD): New. * internal-fn.cc (commutative_binary_fn_p): Add IFN_VEC_ADD_HALFING_NARROW, IFN_VEC_ADD_HALFING_NARROW_LO and IFN_VEC_ADD_HALFING_NARROW_EVEN. (commutative_ternary_fn_p): Add IFN_VEC_ADD_HALFING_NARROW_HI, IFN_VEC_ADD_HALFING_NARROW_ODD. * match.pd (add_half_narrowing_p): New. * optabs.def (vec_saddh_narrow_optab, vec_saddh_narrow_hi_optab, vec_saddh_narrow_lo_optab, vec_saddh_narrow_odd_optab, vec_saddh_narrow_even_optab, vec_uaddh_narrow_optab, vec_uaddh_narrow_hi_optab, vec_uaddh_narrow_lo_optab, vec_uaddh_narrow_odd_optab, vec_uaddh_narrow_even_optab): New. * tree-vect-patterns.cc (gimple_add_half_narrowing_p): New. (vect_recog_add_halfing_narrow_pattern): New. (vect_vect_recog_func_ptrs): Use it. * doc/generic.texi: Document them. * doc/md.texi: Likewise. --- diff --git a/gcc/doc/generic.texi b/gcc/doc/generic.texi index d4ac580a7a8b9cd339d26cb97f7eb963f83746a4..b32d99d4d1aad244a493d8f67b66151ff5363d0e 100644 --- a/gcc/doc/generic.texi +++ b/gcc/doc/generic.texi @@ -1834,6 +1834,11 @@ a value from @code{enum annot_expr_kind}, the third is an @code{INTEGER_CST}. @tindex IFN_VEC_WIDEN_MINUS_LO @tindex IFN_VEC_WIDEN_MINUS_EVEN @tindex IFN_VEC_WIDEN_MINUS_ODD +@tindex IFN_VEC_ADD_HALFING_NARROW +@tindex IFN_VEC_ADD_HALFING_NARROW_HI +@tindex IFN_VEC_ADD_HALFING_NARROW_LO +@tindex IFN_VEC_ADD_HALFING_NARROW_EVEN +@tindex IFN_VEC_ADD_HALFING_NARROW_ODD @tindex VEC_UNPACK_HI_EXPR @tindex VEC_UNPACK_LO_EXPR @tindex VEC_UNPACK_FLOAT_HI_EXPR @@ -1956,6 +1961,51 @@ vector of @code{N/2} subtractions. In the case of vector are subtracted from the odd @code{N/2} of the first to produce the vector of @code{N/2} subtractions. +@item IFN_VEC_ADD_HALFING_NARROW +This internal function represents widening vector addition of two input +vectors, extracting the top half of the result and narrow that value to a type +half that of the original input. +Congretely it does @code{((|bits(a)/2|)((a w+ b) >> |bits(a)/2|)}. Its operands +are vectors that contain the same number of elements (@code{N}) of the same +integral type. The result is a vector that contains the same amount (@code{N}) +of elements, of an integral type whose size is twice as narrow, as the input +vectors. If the current target does not implement the corresponding optabs the +vectorizer may choose to split it into either a pair +of @code{IFN_VEC_ADD_HALFING_NARROW_HI} and @code{IFN_VEC_ADD_HALFING_NARROW_LO} +or @code{IFN_VEC_ADD_HALFING_NARROW_EVEN} and +@code{IFN_VEC_ADD_HALFING_NARROW_ODD}, depending on what optabs the target +implements. + +@item IFN_VEC_ADD_HALFING_NARROW_HI +@itemx IFN_VEC_ADD_HALFING_NARROW_LO +This internal function represents widening vector addition of two input +vectors, extracting the top half of the result and narrow that value to a type +half that of the original input inserting the result as the high or low half of +the result vector. +Congretely it does @code{((|bits(a)/2|)((a w+ b) >> |bits(a)/2|)}. Their +operands are vectors that contain the same number of elements (@code{N}) of the +same integral type. The result is a vector that contains half as many elements, +of an integral type whose size is twice as narrow. In the case of +@code{IFN_VEC_ADD_HALFING_NARROW_HI} the high @code{N/2} elements of the result +is inserted into the given result vector with the low elements left untouched. +The operation is a RMW. In the case of @code{IFN_VEC_ADD_HALFING_NARROW_LO} the +low @code{N/2} elements of the result is used as the full result. + +@item IFN_VEC_ADD_HALFING_NARROW_EVEN +@itemx IFN_VEC_ADD_HALFING_NARROW_ODD +This internal function represents widening vector addition of two input +vectors, extracting the top half of the result and narrow that value to a type +half that of the original input inserting the result as the even or odd parts of +the result vector. +Congretely it does @code{((|bits(a)/2|)((a w+ b) >> |bits(a)/2|)}. Their +operands are vectors that contain the same number of elements (@code{N}) of the +same integral type. The result is a vector that contains half as many elements, +of an integral type whose size is twice as narrow. In the case of +@code{IFN_VEC_ADD_HALFING_NARROW_ODD} the odd @code{N/2} elements of the result +is inserted into the given result vector with the even elements left untouched. +The operation is a RMW. In the case of @code{IFN_VEC_ADD_HALFING_NARROW_EVEN} +the even @code{N/2} elements of the result is used as the full result. + @item VEC_UNPACK_HI_EXPR @itemx VEC_UNPACK_LO_EXPR These nodes represent unpacking of the high and low parts of the input vector, diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi index aba93f606eca59d31c103a05b2567fd4f3be55f3..cb691b56f137a0037f5178ba853911df5a65e5a7 100644 --- a/gcc/doc/md.texi +++ b/gcc/doc/md.texi @@ -6087,6 +6087,21 @@ vectors with N signed/unsigned elements of size S@. Find the absolute difference between operands 1 and 2 and widen the resulting elements. Put the N/2 results of size 2*S in the output vector (operand 0). +@cindex @code{vec_saddh_narrow_hi_@var{m}} instruction pattern +@cindex @code{vec_saddh_narrow_lo_@var{m}} instruction pattern +@cindex @code{vec_uaddh_narrow_hi_@var{m}} instruction pattern +@cindex @code{vec_uaddh_narrow_lo_@var{m}} instruction pattern +@item @samp{vec_uaddh_narrow_hi_@var{m}}, @samp{vec_uaddh_narrow_lo_@var{m}} +@itemx @samp{vec_saddh_narrow_hi_@var{m}}, @samp{vec_saddh_narrow_lo_@var{m}} +@item @samp{vec_uaddh_narrow_even_@var{m}}, @samp{vec_uaddh_narrow_even_@var{m}} +@itemx @samp{vec_saddh_narrow_odd_@var{m}}, @samp{vec_saddh_narrow_odd_@var{m}} +Signed/Unsigned widening add long extract high half and narrow. Operands 1 and +2 are vectors with N signed/unsigned elements of size S@. Add the high/low +elements of 1 and 2 together in a widened precision, extract the top half and +narrow the result to half the size of S@ abd store the results in the output +vector (operand 0). Congretely it does +@code{((|bits(a)/2|)((a w+ b) >> |bits(a)/2|)} + @cindex @code{vec_addsub@var{m}3} instruction pattern @item @samp{vec_addsub@var{m}3} Alternating subtract, add with even lanes doing subtract and odd diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc index 83438dd2ff57474cec999adaeabe92c0540e2a51..e600dbc4b3a0b27f78be00d52f7f6a54a13d7241 100644 --- a/gcc/internal-fn.cc +++ b/gcc/internal-fn.cc @@ -4442,6 +4442,9 @@ commutative_binary_fn_p (internal_fn fn) case IFN_VEC_WIDEN_PLUS_HI: case IFN_VEC_WIDEN_PLUS_EVEN: case IFN_VEC_WIDEN_PLUS_ODD: + case IFN_VEC_ADD_HALFING_NARROW: + case IFN_VEC_ADD_HALFING_NARROW_LO: + case IFN_VEC_ADD_HALFING_NARROW_EVEN: return true; default: @@ -4462,6 +4465,8 @@ commutative_ternary_fn_p (internal_fn fn) case IFN_FNMA: case IFN_FNMS: case IFN_UADDC: + case IFN_VEC_ADD_HALFING_NARROW_HI: + case IFN_VEC_ADD_HALFING_NARROW_ODD: return true; default: diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def index 69677dd10b980c83dec36487b1214ff066f4789b..152895f043b3ca60294b79c8301c6ff4014b955d 100644 --- a/gcc/internal-fn.def +++ b/gcc/internal-fn.def @@ -463,6 +463,12 @@ DEF_INTERNAL_WIDENING_OPTAB_FN (VEC_WIDEN_ABD, first, vec_widen_sabd, vec_widen_uabd, binary) +DEF_INTERNAL_NARROWING_OPTAB_FN (VEC_ADD_HALFING_NARROW, + ECF_CONST | ECF_NOTHROW, + first, + vec_saddh_narrow, vec_uaddh_narrow, + binary, ternary) + DEF_INTERNAL_OPTAB_FN (VEC_FMADDSUB, ECF_CONST, vec_fmaddsub, ternary) DEF_INTERNAL_OPTAB_FN (VEC_FMSUBADD, ECF_CONST, vec_fmsubadd, ternary) diff --git a/gcc/match.pd b/gcc/match.pd index 66e8a78744931c0137b83c5633c3a273fb69f003..d9d9046a8dcb7e5ca7cdf7c83e1945289950dc51 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -3181,6 +3181,18 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) || POINTER_TYPE_P (itype)) && wi::eq_p (wi::to_wide (int_cst), wi::max_value (itype)))))) +/* Detect (n)(((w)x + (w)y) >> bitsize(y)) where w is twice the bitsize of x and + y and n is half the bitsize of x and y. */ +(match (add_half_narrowing_p @0 @1) + (convert1? (rshift (plus:c (convert@3 @0) (convert @1)) INTEGER_CST@2)) + (with { unsigned n = TYPE_PRECISION (type); + unsigned w = TYPE_PRECISION (TREE_TYPE (@3)); + unsigned x = TYPE_PRECISION (TREE_TYPE (@0)); } + (if (INTEGRAL_TYPE_P (type) + && n == x / 2 + && w == x * 2 + && wi::eq_p (wi::to_wide (@2), x / 2))))) + /* Saturation add for unsigned integer. */ (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)) (match (usadd_overflow_mask @0 @1) diff --git a/gcc/optabs.def b/gcc/optabs.def index 87a8b85da1592646d0a3447572e842ceb158cd97..e226d85ddba7e43dd801faec61cac0372286314a 100644 --- a/gcc/optabs.def +++ b/gcc/optabs.def @@ -492,6 +492,16 @@ OPTAB_D (vec_widen_uabd_hi_optab, "vec_widen_uabd_hi_$a") OPTAB_D (vec_widen_uabd_lo_optab, "vec_widen_uabd_lo_$a") OPTAB_D (vec_widen_uabd_odd_optab, "vec_widen_uabd_odd_$a") OPTAB_D (vec_widen_uabd_even_optab, "vec_widen_uabd_even_$a") +OPTAB_D (vec_saddh_narrow_optab, "vec_saddh_narrow$a") +OPTAB_D (vec_saddh_narrow_hi_optab, "vec_saddh_narrow_hi_$a") +OPTAB_D (vec_saddh_narrow_lo_optab, "vec_saddh_narrow_lo_$a") +OPTAB_D (vec_saddh_narrow_odd_optab, "vec_saddh_narrow_odd_$a") +OPTAB_D (vec_saddh_narrow_even_optab, "vec_saddh_narrow_even_$a") +OPTAB_D (vec_uaddh_narrow_optab, "vec_uaddh_narrow$a") +OPTAB_D (vec_uaddh_narrow_hi_optab, "vec_uaddh_narrow_hi_$a") +OPTAB_D (vec_uaddh_narrow_lo_optab, "vec_uaddh_narrow_lo_$a") +OPTAB_D (vec_uaddh_narrow_odd_optab, "vec_uaddh_narrow_odd_$a") +OPTAB_D (vec_uaddh_narrow_even_optab, "vec_uaddh_narrow_even_$a") OPTAB_D (vec_addsub_optab, "vec_addsub$a3") OPTAB_D (vec_fmaddsub_optab, "vec_fmaddsub$a4") OPTAB_D (vec_fmsubadd_optab, "vec_fmsubadd$a4") diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc index ffb320fbf2330522f25a9f4380f4744079a42306..b590c36fad23e44ec3fb954a4d2bb856ce3fc139 100644 --- a/gcc/tree-vect-patterns.cc +++ b/gcc/tree-vect-patterns.cc @@ -4768,6 +4768,64 @@ vect_recog_sat_trunc_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo, return NULL; } +extern bool gimple_add_half_narrowing_p (tree, tree*, tree (*)(tree)); + +/* + * Try to detect add halfing and narrowing pattern. + * + * _1 = (W)a + * _2 = (W)b + * _3 = _1 + _2 + * _4 = _3 >> (precision(a) / 2) + * _5 = (N)_4 + * + * where + * W = precision (a) * 2 + * N = precision (a) / 2 + */ + +static gimple * +vect_recog_add_halfing_narrow_pattern (vec_info *vinfo, + stmt_vec_info stmt_vinfo, + tree *type_out) +{ + gimple *last_stmt = STMT_VINFO_STMT (stmt_vinfo); + + if (!is_gimple_assign (last_stmt)) + return NULL; + + tree ops[2]; + tree lhs = gimple_assign_lhs (last_stmt); + + if (gimple_add_half_narrowing_p (lhs, ops, NULL)) + { + tree itype = TREE_TYPE (ops[0]); + tree otype = TREE_TYPE (lhs); + tree v_itype = get_vectype_for_scalar_type (vinfo, itype); + tree v_otype = get_vectype_for_scalar_type (vinfo, otype); + internal_fn ifn = IFN_VEC_ADD_HALFING_NARROW; + + if (v_itype != NULL_TREE && v_otype != NULL_TREE + && direct_internal_fn_supported_p (ifn, v_itype, OPTIMIZE_FOR_BOTH)) + { + gcall *call = gimple_build_call_internal (ifn, 2, ops[0], ops[1]); + tree in_ssa = vect_recog_temp_ssa_var (otype, NULL); + + gimple_call_set_lhs (call, in_ssa); + gimple_call_set_nothrow (call, /* nothrow_p */ false); + gimple_set_location (call, + gimple_location (STMT_VINFO_STMT (stmt_vinfo))); + + *type_out = v_otype; + vect_pattern_detected ("vect_recog_add_halfing_narrow_pattern", + last_stmt); + return call; + } + } + + return NULL; +} + /* Detect a signed division by a constant that wouldn't be otherwise vectorized: @@ -6896,6 +6954,7 @@ static vect_recog_func vect_vect_recog_func_ptrs[] = { { vect_recog_bitfield_ref_pattern, "bitfield_ref" }, { vect_recog_bit_insert_pattern, "bit_insert" }, { vect_recog_abd_pattern, "abd" }, + { vect_recog_add_halfing_narrow_pattern, "addhn" }, { vect_recog_over_widening_pattern, "over_widening" }, /* Must come after over_widening, which narrows the shift as much as possible beforehand. */ --
diff --git a/gcc/doc/generic.texi b/gcc/doc/generic.texi index d4ac580a7a8b9cd339d26cb97f7eb963f83746a4..b32d99d4d1aad244a493d8f67b66151ff5363d0e 100644 --- a/gcc/doc/generic.texi +++ b/gcc/doc/generic.texi @@ -1834,6 +1834,11 @@ a value from @code{enum annot_expr_kind}, the third is an @code{INTEGER_CST}. @tindex IFN_VEC_WIDEN_MINUS_LO @tindex IFN_VEC_WIDEN_MINUS_EVEN @tindex IFN_VEC_WIDEN_MINUS_ODD +@tindex IFN_VEC_ADD_HALFING_NARROW +@tindex IFN_VEC_ADD_HALFING_NARROW_HI +@tindex IFN_VEC_ADD_HALFING_NARROW_LO +@tindex IFN_VEC_ADD_HALFING_NARROW_EVEN +@tindex IFN_VEC_ADD_HALFING_NARROW_ODD @tindex VEC_UNPACK_HI_EXPR @tindex VEC_UNPACK_LO_EXPR @tindex VEC_UNPACK_FLOAT_HI_EXPR @@ -1956,6 +1961,51 @@ vector of @code{N/2} subtractions. In the case of vector are subtracted from the odd @code{N/2} of the first to produce the vector of @code{N/2} subtractions. +@item IFN_VEC_ADD_HALFING_NARROW +This internal function represents widening vector addition of two input +vectors, extracting the top half of the result and narrow that value to a type +half that of the original input. +Congretely it does @code{((|bits(a)/2|)((a w+ b) >> |bits(a)/2|)}. Its operands +are vectors that contain the same number of elements (@code{N}) of the same +integral type. The result is a vector that contains the same amount (@code{N}) +of elements, of an integral type whose size is twice as narrow, as the input +vectors. If the current target does not implement the corresponding optabs the +vectorizer may choose to split it into either a pair +of @code{IFN_VEC_ADD_HALFING_NARROW_HI} and @code{IFN_VEC_ADD_HALFING_NARROW_LO} +or @code{IFN_VEC_ADD_HALFING_NARROW_EVEN} and +@code{IFN_VEC_ADD_HALFING_NARROW_ODD}, depending on what optabs the target +implements. + +@item IFN_VEC_ADD_HALFING_NARROW_HI +@itemx IFN_VEC_ADD_HALFING_NARROW_LO +This internal function represents widening vector addition of two input +vectors, extracting the top half of the result and narrow that value to a type +half that of the original input inserting the result as the high or low half of +the result vector. +Congretely it does @code{((|bits(a)/2|)((a w+ b) >> |bits(a)/2|)}. Their +operands are vectors that contain the same number of elements (@code{N}) of the +same integral type. The result is a vector that contains half as many elements, +of an integral type whose size is twice as narrow. In the case of +@code{IFN_VEC_ADD_HALFING_NARROW_HI} the high @code{N/2} elements of the result +is inserted into the given result vector with the low elements left untouched. +The operation is a RMW. In the case of @code{IFN_VEC_ADD_HALFING_NARROW_LO} the +low @code{N/2} elements of the result is used as the full result. + +@item IFN_VEC_ADD_HALFING_NARROW_EVEN +@itemx IFN_VEC_ADD_HALFING_NARROW_ODD +This internal function represents widening vector addition of two input +vectors, extracting the top half of the result and narrow that value to a type +half that of the original input inserting the result as the even or odd parts of +the result vector. +Congretely it does @code{((|bits(a)/2|)((a w+ b) >> |bits(a)/2|)}. Their +operands are vectors that contain the same number of elements (@code{N}) of the +same integral type. The result is a vector that contains half as many elements, +of an integral type whose size is twice as narrow. In the case of +@code{IFN_VEC_ADD_HALFING_NARROW_ODD} the odd @code{N/2} elements of the result +is inserted into the given result vector with the even elements left untouched. +The operation is a RMW. In the case of @code{IFN_VEC_ADD_HALFING_NARROW_EVEN} +the even @code{N/2} elements of the result is used as the full result. + @item VEC_UNPACK_HI_EXPR @itemx VEC_UNPACK_LO_EXPR These nodes represent unpacking of the high and low parts of the input vector, diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi index aba93f606eca59d31c103a05b2567fd4f3be55f3..cb691b56f137a0037f5178ba853911df5a65e5a7 100644 --- a/gcc/doc/md.texi +++ b/gcc/doc/md.texi @@ -6087,6 +6087,21 @@ vectors with N signed/unsigned elements of size S@. Find the absolute difference between operands 1 and 2 and widen the resulting elements. Put the N/2 results of size 2*S in the output vector (operand 0). +@cindex @code{vec_saddh_narrow_hi_@var{m}} instruction pattern +@cindex @code{vec_saddh_narrow_lo_@var{m}} instruction pattern +@cindex @code{vec_uaddh_narrow_hi_@var{m}} instruction pattern +@cindex @code{vec_uaddh_narrow_lo_@var{m}} instruction pattern +@item @samp{vec_uaddh_narrow_hi_@var{m}}, @samp{vec_uaddh_narrow_lo_@var{m}} +@itemx @samp{vec_saddh_narrow_hi_@var{m}}, @samp{vec_saddh_narrow_lo_@var{m}} +@item @samp{vec_uaddh_narrow_even_@var{m}}, @samp{vec_uaddh_narrow_even_@var{m}} +@itemx @samp{vec_saddh_narrow_odd_@var{m}}, @samp{vec_saddh_narrow_odd_@var{m}} +Signed/Unsigned widening add long extract high half and narrow. Operands 1 and +2 are vectors with N signed/unsigned elements of size S@. Add the high/low +elements of 1 and 2 together in a widened precision, extract the top half and +narrow the result to half the size of S@ abd store the results in the output +vector (operand 0). Congretely it does +@code{((|bits(a)/2|)((a w+ b) >> |bits(a)/2|)} + @cindex @code{vec_addsub@var{m}3} instruction pattern @item @samp{vec_addsub@var{m}3} Alternating subtract, add with even lanes doing subtract and odd diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc index 83438dd2ff57474cec999adaeabe92c0540e2a51..e600dbc4b3a0b27f78be00d52f7f6a54a13d7241 100644 --- a/gcc/internal-fn.cc +++ b/gcc/internal-fn.cc @@ -4442,6 +4442,9 @@ commutative_binary_fn_p (internal_fn fn) case IFN_VEC_WIDEN_PLUS_HI: case IFN_VEC_WIDEN_PLUS_EVEN: case IFN_VEC_WIDEN_PLUS_ODD: + case IFN_VEC_ADD_HALFING_NARROW: + case IFN_VEC_ADD_HALFING_NARROW_LO: + case IFN_VEC_ADD_HALFING_NARROW_EVEN: return true; default: @@ -4462,6 +4465,8 @@ commutative_ternary_fn_p (internal_fn fn) case IFN_FNMA: case IFN_FNMS: case IFN_UADDC: + case IFN_VEC_ADD_HALFING_NARROW_HI: + case IFN_VEC_ADD_HALFING_NARROW_ODD: return true; default: diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def index 69677dd10b980c83dec36487b1214ff066f4789b..152895f043b3ca60294b79c8301c6ff4014b955d 100644 --- a/gcc/internal-fn.def +++ b/gcc/internal-fn.def @@ -463,6 +463,12 @@ DEF_INTERNAL_WIDENING_OPTAB_FN (VEC_WIDEN_ABD, first, vec_widen_sabd, vec_widen_uabd, binary) +DEF_INTERNAL_NARROWING_OPTAB_FN (VEC_ADD_HALFING_NARROW, + ECF_CONST | ECF_NOTHROW, + first, + vec_saddh_narrow, vec_uaddh_narrow, + binary, ternary) + DEF_INTERNAL_OPTAB_FN (VEC_FMADDSUB, ECF_CONST, vec_fmaddsub, ternary) DEF_INTERNAL_OPTAB_FN (VEC_FMSUBADD, ECF_CONST, vec_fmsubadd, ternary) diff --git a/gcc/match.pd b/gcc/match.pd index 66e8a78744931c0137b83c5633c3a273fb69f003..d9d9046a8dcb7e5ca7cdf7c83e1945289950dc51 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -3181,6 +3181,18 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) || POINTER_TYPE_P (itype)) && wi::eq_p (wi::to_wide (int_cst), wi::max_value (itype)))))) +/* Detect (n)(((w)x + (w)y) >> bitsize(y)) where w is twice the bitsize of x and + y and n is half the bitsize of x and y. */ +(match (add_half_narrowing_p @0 @1) + (convert1? (rshift (plus:c (convert@3 @0) (convert @1)) INTEGER_CST@2)) + (with { unsigned n = TYPE_PRECISION (type); + unsigned w = TYPE_PRECISION (TREE_TYPE (@3)); + unsigned x = TYPE_PRECISION (TREE_TYPE (@0)); } + (if (INTEGRAL_TYPE_P (type) + && n == x / 2 + && w == x * 2 + && wi::eq_p (wi::to_wide (@2), x / 2))))) + /* Saturation add for unsigned integer. */ (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)) (match (usadd_overflow_mask @0 @1) diff --git a/gcc/optabs.def b/gcc/optabs.def index 87a8b85da1592646d0a3447572e842ceb158cd97..e226d85ddba7e43dd801faec61cac0372286314a 100644 --- a/gcc/optabs.def +++ b/gcc/optabs.def @@ -492,6 +492,16 @@ OPTAB_D (vec_widen_uabd_hi_optab, "vec_widen_uabd_hi_$a") OPTAB_D (vec_widen_uabd_lo_optab, "vec_widen_uabd_lo_$a") OPTAB_D (vec_widen_uabd_odd_optab, "vec_widen_uabd_odd_$a") OPTAB_D (vec_widen_uabd_even_optab, "vec_widen_uabd_even_$a") +OPTAB_D (vec_saddh_narrow_optab, "vec_saddh_narrow$a") +OPTAB_D (vec_saddh_narrow_hi_optab, "vec_saddh_narrow_hi_$a") +OPTAB_D (vec_saddh_narrow_lo_optab, "vec_saddh_narrow_lo_$a") +OPTAB_D (vec_saddh_narrow_odd_optab, "vec_saddh_narrow_odd_$a") +OPTAB_D (vec_saddh_narrow_even_optab, "vec_saddh_narrow_even_$a") +OPTAB_D (vec_uaddh_narrow_optab, "vec_uaddh_narrow$a") +OPTAB_D (vec_uaddh_narrow_hi_optab, "vec_uaddh_narrow_hi_$a") +OPTAB_D (vec_uaddh_narrow_lo_optab, "vec_uaddh_narrow_lo_$a") +OPTAB_D (vec_uaddh_narrow_odd_optab, "vec_uaddh_narrow_odd_$a") +OPTAB_D (vec_uaddh_narrow_even_optab, "vec_uaddh_narrow_even_$a") OPTAB_D (vec_addsub_optab, "vec_addsub$a3") OPTAB_D (vec_fmaddsub_optab, "vec_fmaddsub$a4") OPTAB_D (vec_fmsubadd_optab, "vec_fmsubadd$a4") diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc index ffb320fbf2330522f25a9f4380f4744079a42306..b590c36fad23e44ec3fb954a4d2bb856ce3fc139 100644 --- a/gcc/tree-vect-patterns.cc +++ b/gcc/tree-vect-patterns.cc @@ -4768,6 +4768,64 @@ vect_recog_sat_trunc_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo, return NULL; } +extern bool gimple_add_half_narrowing_p (tree, tree*, tree (*)(tree)); + +/* + * Try to detect add halfing and narrowing pattern. + * + * _1 = (W)a + * _2 = (W)b + * _3 = _1 + _2 + * _4 = _3 >> (precision(a) / 2) + * _5 = (N)_4 + * + * where + * W = precision (a) * 2 + * N = precision (a) / 2 + */ + +static gimple * +vect_recog_add_halfing_narrow_pattern (vec_info *vinfo, + stmt_vec_info stmt_vinfo, + tree *type_out) +{ + gimple *last_stmt = STMT_VINFO_STMT (stmt_vinfo); + + if (!is_gimple_assign (last_stmt)) + return NULL; + + tree ops[2]; + tree lhs = gimple_assign_lhs (last_stmt); + + if (gimple_add_half_narrowing_p (lhs, ops, NULL)) + { + tree itype = TREE_TYPE (ops[0]); + tree otype = TREE_TYPE (lhs); + tree v_itype = get_vectype_for_scalar_type (vinfo, itype); + tree v_otype = get_vectype_for_scalar_type (vinfo, otype); + internal_fn ifn = IFN_VEC_ADD_HALFING_NARROW; + + if (v_itype != NULL_TREE && v_otype != NULL_TREE + && direct_internal_fn_supported_p (ifn, v_itype, OPTIMIZE_FOR_BOTH)) + { + gcall *call = gimple_build_call_internal (ifn, 2, ops[0], ops[1]); + tree in_ssa = vect_recog_temp_ssa_var (otype, NULL); + + gimple_call_set_lhs (call, in_ssa); + gimple_call_set_nothrow (call, /* nothrow_p */ false); + gimple_set_location (call, + gimple_location (STMT_VINFO_STMT (stmt_vinfo))); + + *type_out = v_otype; + vect_pattern_detected ("vect_recog_add_halfing_narrow_pattern", + last_stmt); + return call; + } + } + + return NULL; +} + /* Detect a signed division by a constant that wouldn't be otherwise vectorized: @@ -6896,6 +6954,7 @@ static vect_recog_func vect_vect_recog_func_ptrs[] = { { vect_recog_bitfield_ref_pattern, "bitfield_ref" }, { vect_recog_bit_insert_pattern, "bit_insert" }, { vect_recog_abd_pattern, "abd" }, + { vect_recog_add_halfing_narrow_pattern, "addhn" }, { vect_recog_over_widening_pattern, "over_widening" }, /* Must come after over_widening, which narrows the shift as much as possible beforehand. */