The following patch to gfortran's trans-instrinsic.c tweaks the generic that is produced
for popcnt on integer(kind=16). Currently, the double word popcnt is implemented as parityll(hipart(x))^parityll(lopart(x)), but with this patch this is now translated as parityll(hipart(x)^lopart(x)). This will be just an aesthetic change once my tree-level parity optimization patch of 12th June is reviewed and accepted, but generating the more efficient form initially, avoids a tiny bit of garbage collection when the middle-end cleans this up into its preferred form. The semantics/correctness of this change are tested by the run-time tests in gfortran.dg/popcnt_poppar_2.F90 This patch has been tested with "make bootstrap" and "make -k check" on x86_64-pc-linux-gnu with no regressions. If approved, I'd very much appreciate it if the (gfortran) reviewer could commit this change for me. 2020-06-14 Roger Sayle <ro...@bextmovesoftware.com> * trans-intrinsic.c (gfc_conv_intrinsic_popcnt_poppar): Translate poppar(kind=16) as parityll(hipart(x)^lopart(x)) instead of parityll(hipart(x))^parityll(lopart(x)). Thanks in advance, Roger -- Roger Sayle NextMove Software Cambridge, UK
diff --git a/gcc/fortran/trans-intrinsic.c b/gcc/fortran/trans-intrinsic.c index fd88099..363874e 100644 --- a/gcc/fortran/trans-intrinsic.c +++ b/gcc/fortran/trans-intrinsic.c @@ -7153,35 +7153,39 @@ gfc_conv_intrinsic_popcnt_poppar (gfc_se * se, gfc_expr *expr, int parity) as 'long long'. */ gcc_assert (argsize == 2 * LONG_LONG_TYPE_SIZE); - func = builtin_decl_explicit (parity - ? BUILT_IN_PARITYLL - : BUILT_IN_POPCOUNTLL); - /* Convert it to an integer, and store into a variable. */ utype = gfc_build_uint_type (argsize); arg = fold_convert (utype, arg); arg = gfc_evaluate_now (arg, &se->pre); - - /* Call the builtin twice. */ - call1 = build_call_expr_loc (input_location, func, 1, - fold_convert (long_long_unsigned_type_node, - arg)); - - arg2 = fold_build2_loc (input_location, RSHIFT_EXPR, utype, arg, - build_int_cst (utype, LONG_LONG_TYPE_SIZE)); - call2 = build_call_expr_loc (input_location, func, 1, - fold_convert (long_long_unsigned_type_node, - arg2)); + arg_type = long_long_unsigned_type_node; /* Combine the results. */ if (parity) - se->expr = fold_build2_loc (input_location, BIT_XOR_EXPR, result_type, - call1, call2); + { + /* Construct parityll (LOPART (arg) ^ HIPART (arg)) */ + arg2 = fold_build2_loc (input_location, RSHIFT_EXPR, utype, arg, + build_int_cst (utype, LONG_LONG_TYPE_SIZE)); + arg = fold_build2_loc (input_location, BIT_XOR_EXPR, arg_type, + fold_convert (arg_type, arg), + fold_convert (arg_type, arg2)); + func = builtin_decl_explicit (BUILT_IN_PARITYLL); + argsize = LONG_LONG_TYPE_SIZE; + } else - se->expr = fold_build2_loc (input_location, PLUS_EXPR, result_type, - call1, call2); - - return; + { + func = builtin_decl_explicit (BUILT_IN_POPCOUNTLL); + + /* Call the builtin twice. */ + call1 = build_call_expr_loc (input_location, func, 1, + fold_convert (arg_type, arg)); + arg2 = fold_build2_loc (input_location, RSHIFT_EXPR, utype, arg, + build_int_cst (utype, LONG_LONG_TYPE_SIZE)); + call2 = build_call_expr_loc (input_location, func, 1, + fold_convert (arg_type, arg2)); + se->expr = fold_build2_loc (input_location, PLUS_EXPR, result_type, + call1, call2); + return; + } } /* Convert the actual argument twice: first, to the unsigned type of the