On Thu, 2017-10-26 at 17:18 +0200, Richard Biener wrote: > On Thu, Oct 26, 2017 at 5:13 PM, Richard Biener > <richard.guent...@gmail.com> wrote: > > On Thu, Oct 26, 2017 at 4:30 PM, Will Schmidt <will_schm...@vnet.ibm.com> > > wrote: > >> On Thu, 2017-10-26 at 11:05 +0200, Richard Biener wrote: > >>> On Wed, Oct 25, 2017 at 4:38 PM, Will Schmidt <will_schm...@vnet.ibm.com> > >>> wrote: > >>> > Hi, > >>> > > >>> > Add support for gimple folding of the vec_madd() (vector multiply-add) > >>> > intrinsics. > >>> > Testcase coverage is provided by the existing tests > >>> > gcc.target/powerpc/fold-vec-madd-*.c > >>> > > >>> > Sniff-tests appear clean. A full regtest is currently running across > >>> > assorted Power systems. (P6-P9). > >>> > OK for trunk (pending clean run results)? > >>> > >>> You can use FMA_EXPR on integer operands as well. Otherwise you risk > >>> the FMA be not matched by combine later when part of the operation is > >>> CSEd. > >> > >> I had tried that initially, without success,.. I'll probably need > >> another hint. :-) > >> Looking a bit closer, I think I see why the assert fired, but I'm not > >> sure what the proper fix would be. > >> > >> So attempting to the FMA_EXPR on the integer operands. (vector shorts in > >> this case), I end up triggering this error: > >> > >> /home/willschm/gcc/gcc-mainline-vec_fold_misc/gcc/testsuite/gcc.target/powerpc/fold-vec-madd-short.c:14:10: > >> internal compiler error: in expand_expr_real_2, at expr.c:8712 > >> 0x10813303 expand_expr_real_2(separate_ops*, rtx_def*, machine_mode, > >> expand_modifier) > >> /home/willschm/gcc/gcc-mainline-vec_fold_misc/gcc/expr.c:8712 > >> 0x1081822f expand_expr_real_1(tree_node*, rtx_def*, machine_mode, > >> expand_modifier, rtx_def**, bool) > >> /home/willschm/gcc/gcc-mainline-vec_fold_misc/gcc/expr.c:9787 > >> 0x1080f7bb expand_expr_real(tree_node*, rtx_def*, machine_mode, > >> expand_modifier, rtx_def**, bool) > >> /home/willschm/gcc/gcc-mainline-vec_fold_misc/gcc/expr.c:8084 > >> ... > >> > >> > >> which when followed back, I tripped an assert here: (gcc/expr.c: > >> expand_expr_real_2() ~ line 8710) > >> > >> case FMA_EXPR: > >> { > >> optab opt = fma_optab; > >> gimple *def0, *def2; > >> if (optab_handler (fma_optab, mode) == CODE_FOR_nothing) > >> { > >> tree fn = mathfn_built_in (TREE_TYPE (treeop0), BUILT_IN_FMA); > >> tree call_expr; > >> > >> gcc_assert (fn != NULL_TREE); > >> > >> where gcc/builtins.c > >> mathfn_built_in()->mathfn_built_in_1->mathfn_built_in_2 looks to have > >> returned END_BUILTINS/NULL_TREE, due to falling through the if/else > >> tree: > >> > >> if (TYPE_MAIN_VARIANT (type) == double_type_node) > >> return fcode; > >> else if (TYPE_MAIN_VARIANT (type) == float_type_node) > >> return fcodef; > >> else if (TYPE_MAIN_VARIANT (type) == long_double_type_node) > >> return fcodel; > >> else > >> return END_BUILTINS; > >> > >> Looks like that is all double/float/long double contents. First blush > >> attempt would be to add V8HI_type_node/integer_type_node to that if/else > >> tree, but that doesn't look like it would be near enough. > > > > Well - we of course expect to have an optab for the fma with vector > > short. I thought > > you had one given you have the intrinsic. If you don't have an optab > > you of course > > have to open-code it. > > > > Just thought you expected an actual machine instruction doing the integer > > FMA. > > So you have > > (define_insn "altivec_vmladduhm" > [(set (match_operand:V8HI 0 "register_operand" "=v") > (plus:V8HI (mult:V8HI (match_operand:V8HI 1 "register_operand" "v") > (match_operand:V8HI 2 "register_operand" "v")) > (match_operand:V8HI 3 "register_operand" "v")))] > "TARGET_ALTIVEC" > "vmladduhm %0,%1,%2,%3" > [(set_attr "type" "veccomplex")]) > > but not > > (define_expand "fmav8hi4" > ... > > or define_insn in case that's also a way to register an optab. > > Richard.
Ok. Thanks for the guidance. :-) -Will > > > > > Richard. > > > >> Thanks > >> -Will > >> > >>> > >>> Richard. > >>> > >>> > Thanks, > >>> > -Will > >>> > > >>> > [gcc] > >>> > > >>> > 2017-10-25 Will Schmidt <will_schm...@vnet.ibm.com> > >>> > > >>> > * config/rs6000/rs6000.c: (rs6000_gimple_fold_builtin) Add > >>> > support for > >>> > gimple folding of vec_madd() intrinsics. > >>> > > >>> > diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c > >>> > index 4837e14..04c2b15 100644 > >>> > --- a/gcc/config/rs6000/rs6000.c > >>> > +++ b/gcc/config/rs6000/rs6000.c > >>> > @@ -16606,10 +16606,43 @@ rs6000_gimple_fold_builtin > >>> > (gimple_stmt_iterator *gsi) > >>> > build_int_cst (arg2_type, > >>> > 0)), arg0); > >>> > gimple_set_location (g, loc); > >>> > gsi_replace (gsi, g, true); > >>> > return true; > >>> > } > >>> > + > >>> > + /* vec_madd (Float) */ > >>> > + case ALTIVEC_BUILTIN_VMADDFP: > >>> > + case VSX_BUILTIN_XVMADDDP: > >>> > + { > >>> > + arg0 = gimple_call_arg (stmt, 0); > >>> > + arg1 = gimple_call_arg (stmt, 1); > >>> > + tree arg2 = gimple_call_arg (stmt, 2); > >>> > + lhs = gimple_call_lhs (stmt); > >>> > + gimple *g = gimple_build_assign (lhs, FMA_EXPR , arg0, arg1, > >>> > arg2); > >>> > + gimple_set_location (g, gimple_location (stmt)); > >>> > + gsi_replace (gsi, g, true); > >>> > + return true; > >>> > + } > >>> > + /* vec_madd (Integral) */ > >>> > + case ALTIVEC_BUILTIN_VMLADDUHM: > >>> > + { > >>> > + arg0 = gimple_call_arg (stmt, 0); > >>> > + arg1 = gimple_call_arg (stmt, 1); > >>> > + tree arg2 = gimple_call_arg (stmt, 2); > >>> > + lhs = gimple_call_lhs (stmt); > >>> > + tree lhs_type = TREE_TYPE (lhs); > >>> > + location_t loc = gimple_location (stmt); > >>> > + gimple_seq stmts = NULL; > >>> > + tree mult_result = gimple_build (&stmts, loc, MULT_EXPR, > >>> > + lhs_type, arg0, arg1); > >>> > + tree plus_result = gimple_build (&stmts, loc, PLUS_EXPR, > >>> > + lhs_type, mult_result, arg2); > >>> > + gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); > >>> > + update_call_from_tree (gsi, plus_result); > >>> > + return true; > >>> > + } > >>> > + > >>> > default: > >>> > if (TARGET_DEBUG_BUILTIN) > >>> > fprintf (stderr, "gimple builtin intrinsic not matched:%d %s > >>> > %s\n", > >>> > fn_code, fn_name1, fn_name2); > >>> > break; > >>> > > >>> > > >>> > >> > >> >