On Thu, 20 Jun 2024, Hu, Lin1 wrote: > > > else if (ret_elt_bits > arg_elt_bits) > > > modifier = WIDEN; > > > > > > + if (supportable_convert_operation (code, ret_type, arg_type, &code1)) > > > + { > > > + g = gimple_build_assign (lhs, code1, arg); > > > + gsi_replace (gsi, g, false); > > > + return; > > > + } > > > > Given the API change I suggest below it might make sense to have > > supportable_indirect_convert_operation do the above and represent it as > > single- > > step conversion? > > > > OK, if you want to supportable_indirect_convert_operation can do > something like supportable_convert_operation, I'll give it a try. This > functionality is really the part that this function can cover. But this > would require some changes not only the API change, because > supportable_indirect_convert_operation originally only supported Float > -> Int or Int ->Float.
I think I'd like to see a single API to handle direct and (multi-)indirect-level converts that operate on vectors with all the same number of lanes. > > > > > + code_helper code2 = ERROR_MARK, code3 = ERROR_MARK; > > > + int multi_step_cvt = 0; > > > + vec<tree> interm_types = vNULL; > > > + if (supportable_indirect_convert_operation (NULL, > > > + code, > > > + ret_type, arg_type, > > > + &code2, &code3, > > > + &multi_step_cvt, > > > + &interm_types, arg)) > > > + { > > > + new_rhs = make_ssa_name (interm_types[0]); > > > + g = gimple_build_assign (new_rhs, (tree_code) code3, arg); > > > + gsi_insert_before (gsi, g, GSI_SAME_STMT); > > > + g = gimple_build_assign (lhs, (tree_code) code2, new_rhs); > > > + gsi_replace (gsi, g, false); > > > + return; > > > + } > > > + > > > if (modifier == NONE && (code == FIX_TRUNC_EXPR || code == > > FLOAT_EXPR)) > > > { > > > - if (supportable_convert_operation (code, ret_type, arg_type, > > > &code1)) > > > - { > > > - g = gimple_build_assign (lhs, code1, arg); > > > - gsi_replace (gsi, g, false); > > > - return; > > > - } > > > /* Can't use get_compute_type here, as > > > supportable_convert_operation > > > doesn't necessarily use an optab and needs two arguments. */ > > > tree vec_compute_type > > > diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index > > > 05a169ecb2d..0aa608202ca 100644 > > > --- a/gcc/tree-vect-stmts.cc > > > +++ b/gcc/tree-vect-stmts.cc > > > @@ -5175,7 +5175,7 @@ vectorizable_conversion (vec_info *vinfo, > > > tree scalar_dest; > > > tree op0, op1 = NULL_TREE; > > > loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo); > > > - tree_code tc1, tc2; > > > + tree_code tc1; > > > code_helper code, code1, code2; > > > code_helper codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK; > > > tree new_temp; > > > @@ -5384,92 +5384,17 @@ vectorizable_conversion (vec_info *vinfo, > > > break; > > > } > > > > > > - /* For conversions between float and integer types try whether > > > - we can use intermediate signed integer types to support the > > > - conversion. */ > > > - if (GET_MODE_SIZE (lhs_mode) != GET_MODE_SIZE (rhs_mode) > > > - && (code == FLOAT_EXPR || > > > - (code == FIX_TRUNC_EXPR && !flag_trapping_math))) > > > - { > > > - bool demotion = GET_MODE_SIZE (rhs_mode) > GET_MODE_SIZE > > (lhs_mode); > > > - bool float_expr_p = code == FLOAT_EXPR; > > > - unsigned short target_size; > > > - scalar_mode intermediate_mode; > > > - if (demotion) > > > - { > > > - intermediate_mode = lhs_mode; > > > - target_size = GET_MODE_SIZE (rhs_mode); > > > - } > > > - else > > > - { > > > - target_size = GET_MODE_SIZE (lhs_mode); > > > - if (!int_mode_for_size > > > - (GET_MODE_BITSIZE (rhs_mode), 0).exists > > (&intermediate_mode)) > > > - goto unsupported; > > > - } > > > - code1 = float_expr_p ? code : NOP_EXPR; > > > - codecvt1 = float_expr_p ? NOP_EXPR : code; > > > - opt_scalar_mode mode_iter; > > > - FOR_EACH_2XWIDER_MODE (mode_iter, intermediate_mode) > > > - { > > > - intermediate_mode = mode_iter.require (); > > > - > > > - if (GET_MODE_SIZE (intermediate_mode) > target_size) > > > - break; > > > - > > > - scalar_mode cvt_mode; > > > - if (!int_mode_for_size > > > - (GET_MODE_BITSIZE (intermediate_mode), 0).exists > > (&cvt_mode)) > > > - break; > > > - > > > - cvt_type = build_nonstandard_integer_type > > > - (GET_MODE_BITSIZE (cvt_mode), 0); > > > - > > > - /* Check if the intermediate type can hold OP0's range. > > > - When converting from float to integer this is not necessary > > > - because values that do not fit the (smaller) target type are > > > - unspecified anyway. */ > > > - if (demotion && float_expr_p) > > > - { > > > - wide_int op_min_value, op_max_value; > > > - if (!vect_get_range_info (op0, &op_min_value, > > &op_max_value)) > > > - break; > > > - > > > - if (cvt_type == NULL_TREE > > > - || (wi::min_precision (op_max_value, SIGNED) > > > - > TYPE_PRECISION (cvt_type)) > > > - || (wi::min_precision (op_min_value, SIGNED) > > > - > TYPE_PRECISION (cvt_type))) > > > - continue; > > > - } > > > - > > > - cvt_type = get_vectype_for_scalar_type (vinfo, cvt_type, > > > slp_node); > > > - /* This should only happened for SLP as long as loop vectorizer > > > - only supports same-sized vector. */ > > > - if (cvt_type == NULL_TREE > > > - || maybe_ne (TYPE_VECTOR_SUBPARTS (cvt_type), nunits_in) > > > - || !supportable_convert_operation ((tree_code) code1, > > > - vectype_out, > > > - cvt_type, &tc1) > > > - || !supportable_convert_operation ((tree_code) codecvt1, > > > - cvt_type, > > > - vectype_in, &tc2)) > > > - continue; > > > - > > > - found_mode = true; > > > - break; > > > - } > > > + if (supportable_indirect_convert_operation (vinfo, > > > + code, > > > + vectype_out, > > > + vectype_in, > > > + &code1, > > > + &codecvt1, > > > + &multi_step_cvt, > > > + &interm_types, > > > + op0,slp_node)) > > > + break; > > > > > > - if (found_mode) > > > - { > > > - multi_step_cvt++; > > > - interm_types.safe_push (cvt_type); > > > - cvt_type = NULL_TREE; > > > - code1 = tc1; > > > - codecvt1 = tc2; > > > - break; > > > - } > > > - } > > > /* FALLTHRU */ > > > unsupported: > > > if (dump_enabled_p ()) > > > @@ -14626,6 +14551,153 @@ supportable_narrowing_operation > > (code_helper code, > > > return false; > > > } > > > > > > +/* Function supportable_indirect_convert_operation > > > + > > > + Check whether an operation represented by the code CODE is two > > > + convert operations that are supported by the target platform in > > > + vector form (i.e., when operating on arguments of type VECTYPE_IN > > > + producing a result of type VECTYPE_OUT). > > > + > > > + Convert operations we currently support directly are FIX_TRUNC and > > > FLOAT. > > > + This function checks if these operations are supported > > > + by the target platform directly (via vector tree-codes). > > > + > > > + Output: > > > + - CODE1 is the code of a vector operation to be used when > > > + converting the operation in the first step, if available. > > > + - CODE2 is the code of a vector operation to be used when > > > + converting the operation in the second step, if available. > > > + - MULTI_STEP_CVT determines the number of required intermediate steps > > in > > > + case of multi-step conversion (like int->short->char - in that case > > > + MULTI_STEP_CVT will be 1). In the function, it should be 1. > > > + - INTERM_TYPES contains the intermediate type required to perform the > > > + convert operation (short in the above example). */ > > > +bool > > > +supportable_indirect_convert_operation (vec_info *vinfo, > > > + code_helper code, > > > + tree vectype_out, > > > + tree vectype_in, > > > + code_helper *code1, > > > + code_helper *code2, > > > + int *multi_step_cvt, > > > + vec<tree> *interm_types, > > > > This API is somewhat awkward, as we're inventing a new one I guess we can do > > better. I think we want > > > > vec<std::pair<tree, tree_code> > *converts, > > > > covering all code1, code2, multi_step_cvt and interm_types with the > > conversion > > sequence being > > > > converts[0].first tem0 = converts[0].second op0; > > converts[1].first tem1 = converts[1].second tem; > > > > That's great, this really makes the function work better. > > > > > ... while converts.length () determines the length of the chain, one being > > a direct > > conversion where then converts[0].first is vectype_out. That would allow > > double -> char to go double -> float -> int -> short -> char for example. > > > > I'm trying to determine the requirements, do you want this function to > support multiple conversions (the current implementation just does a > two-step conversion, like double -> char, which becomes double -> int -> > char). Actually we should be able to do all conversions in two steps, if > we have some suitable instructions. I can't think of a scenario where > multiple conversions are needed yet. Could you give me some examples? Of > course, I could tweak this feature in advance if it is for future > consideration. I think the API should support multi-level, not only two levels. The implementation doesn't need to cover that case unless we run into such a requirement. Usually vector ISAs allow 2x integer widening/shortening but not 4x, so a VnDImode -> VnQImode conversion would need to go via VnSImode and VnHImode (of course some targets might "help" the vectorizer by providing a VnDImode -> VnQImode pattern that does the intermediate conversions behind the vectorizers back). But yes, the original motivation for the vectorizer code was that float<->int conversions are limited. Thanks, Richard. > > > > > > + tree op0, > > > + slp_tree slp_node) > > > > I would like to avoid passing VINFO and SLP_NODE here, see below. > > The same is true for OP0 where the existing use is wrong for SLP already, > > but I > > guess that can stay for now (I opened PR115538 about the wrong-code issue). > > > > Thanks, I have removed them. > > > > > > +{ > > > + bool found_mode = false; > > > + scalar_mode lhs_mode = GET_MODE_INNER (TYPE_MODE (vectype_out)); > > > + scalar_mode rhs_mode = GET_MODE_INNER (TYPE_MODE (vectype_in)); > > > + opt_scalar_mode mode_iter; > > > + tree_code tc1, tc2; > > > + > > > + tree cvt_type = NULL_TREE; > > > + poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (vectype_in); > > > + > > > + (*multi_step_cvt) = 0; > > > + /* For conversions between float and integer types try whether > > > + we can use intermediate signed integer types to support the > > > + conversion. */ > > > + if (GET_MODE_SIZE (lhs_mode) != GET_MODE_SIZE (rhs_mode) > > > + && (code == FLOAT_EXPR > > > + || (code == FIX_TRUNC_EXPR && !flag_trapping_math))) > > > + { > > > + bool demotion = GET_MODE_SIZE (rhs_mode) > GET_MODE_SIZE > > (lhs_mode); > > > + bool float_expr_p = code == FLOAT_EXPR; > > > + unsigned short target_size; > > > + scalar_mode intermediate_mode; > > > + if (demotion) > > > + { > > > + intermediate_mode = lhs_mode; > > > + target_size = GET_MODE_SIZE (rhs_mode); > > > + } > > > + else > > > + { > > > + target_size = GET_MODE_SIZE (lhs_mode); > > > + if (!int_mode_for_size > > > + (GET_MODE_BITSIZE (rhs_mode), 0).exists (&intermediate_mode)) > > > + return false; > > > + } > > > + *code1 = float_expr_p ? code : NOP_EXPR; > > > + *code2 = float_expr_p ? NOP_EXPR : code; > > > + opt_scalar_mode mode_iter; > > > + FOR_EACH_2XWIDER_MODE (mode_iter, intermediate_mode) > > > + { > > > + intermediate_mode = mode_iter.require (); > > > + > > > + if (GET_MODE_SIZE (intermediate_mode) > target_size) > > > + break; > > > + > > > + scalar_mode cvt_mode; > > > + if (!int_mode_for_size > > > + (GET_MODE_BITSIZE (intermediate_mode), 0).exists (&cvt_mode)) > > > + break; > > > + > > > + cvt_type = build_nonstandard_integer_type > > > + (GET_MODE_BITSIZE (cvt_mode), 0); > > > + > > > + /* Check if the intermediate type can hold OP0's range. > > > + When converting from float to integer this is not necessary > > > + because values that do not fit the (smaller) target type are > > > + unspecified anyway. */ > > > + if (demotion && float_expr_p) > > > + { > > > + wide_int op_min_value, op_max_value; > > > + /* For vector form, it looks like op0 doesn't have RANGE_INFO. > > > + In the future, if it is supported, changes may need to be made > > > + to this part, such as checking the RANGE of each element > > > + in the vector. */ > > > + if (!SSA_NAME_RANGE_INFO (op0) > > > + || !vect_get_range_info (op0, &op_min_value, > > &op_max_value)) > > > + break; > > > + > > > + if (cvt_type == NULL_TREE > > > + || (wi::min_precision (op_max_value, SIGNED) > > > + > TYPE_PRECISION (cvt_type)) > > > + || (wi::min_precision (op_min_value, SIGNED) > > > + > TYPE_PRECISION (cvt_type))) > > > + continue; > > > + } > > > + > > > + if (vinfo != NULL && slp_node != NULL) > > > + cvt_type = get_vectype_for_scalar_type (vinfo, cvt_type, slp_node); > > > + else > > > + { > > > + bool uns = TYPE_UNSIGNED (TREE_TYPE (vectype_out)) > > > + || TYPE_UNSIGNED (TREE_TYPE (vectype_in)); > > > + cvt_type = build_nonstandard_integer_type > > > + (GET_MODE_BITSIZE (cvt_mode), uns); > > > + cvt_type = build_vector_type (cvt_type, nelts); > > > + } > > > > So this would then become > > > > cvt_type = get_related_vectype_for_scalar_type (TYPE_MODE > > (vectype_in), cvt_type, TYPE_VECTOR_SUBPARTS (vectype_in)); > > > > > + /* This should only happened for SLP as long as loop vectorizer > > > + only supports same-sized vector. */ > > > + if (cvt_type == NULL_TREE > > > + || maybe_ne (TYPE_VECTOR_SUBPARTS (cvt_type), nelts) > > > + || !supportable_convert_operation ((tree_code) *code1, > > > + vectype_out, > > > + cvt_type, &tc1) > > > + || !supportable_convert_operation ((tree_code) *code2, > > > + cvt_type, > > > + vectype_in, &tc2)) > > > + continue; > > > + > > > + found_mode = true; > > > + break; > > > + } > > > + > > > + if (found_mode) > > > + { > > > + (*multi_step_cvt)++; > > > + interm_types->safe_push (cvt_type); > > > + cvt_type = NULL_TREE; > > > + *code1 = tc1; > > > + *code2 = tc2; > > > + return true; > > > + } > > > + } > > > + interm_types->release (); > > > > Hmm, ownership of interm_types is somewhat unclear here - the caller should > > release it, or is the situation that the caller is confused by stray > > elements in it? In > > that case I'd suggest to instead do interm_types->truncate (0). > > > > It's my fault, I just imitate supportable_narrowing/widening_operation, > I think for this function, interm_types->release() is not needed.