Now for __builtin_popcountl we are getting things like vrepli.b $vr0,0 vinsgr2vr.d $vr0,$r4,0 vpcnt.d $vr0,$vr0 vpickve2gr.du $r4,$vr0,0 slli.w $r4,$r4,0 jr $r1
The "vrepli.b" instruction is introduced by the init-regs pass (see PR61810 and all the issues it references). To work it around, we can use post-reload instead of define_expand: the "f" constraint will make the compiler automatically move the scalar between GPR and FPR, and reload is much later than init-regs so init-regs won't get in our way. Now the code looks like: movgr2fr.d $f0,$r4 vpcnt.d $vr0,$vr0 movfr2gr.d $r4,$f0 jr $r1 gcc/ChangeLog: * config/loongarch/loongarch.md (cntmap): Change to uppercase. (popcount<GPR:mode>2): Modify to a post reload split. --- Bootstrapped and regtested on loongarch64-linux-gnu. Ok for trunk? gcc/config/loongarch/loongarch.md | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md index 6f507c3c7f6..478f859051c 100644 --- a/gcc/config/loongarch/loongarch.md +++ b/gcc/config/loongarch/loongarch.md @@ -1732,21 +1732,23 @@ (define_insn "truncdfsf2" ;; This attribute used for get connection of scalar mode and corresponding ;; vector mode. -(define_mode_attr cntmap [(SI "v4si") (DI "v2di")]) +(define_mode_attr cntmap [(SI "V4SI") (DI "V2DI")]) -(define_expand "popcount<mode>2" - [(set (match_operand:GPR 0 "register_operand") - (popcount:GPR (match_operand:GPR 1 "register_operand")))] +(define_insn_and_split "popcount<mode>2" + [(set (match_operand:GPR 0 "register_operand" "=f") + (popcount:GPR (match_operand:GPR 1 "register_operand" "f")))] "ISA_HAS_LSX" + "#" + ;; Do the split very lately to work around init-regs unneeded zero- + ;; initialization from init-regs. See PR61810 and all the referenced + ;; issues. + "&& reload_completed" + [(set (match_operand:<cntmap> 0 "register_operand" "=f") + (popcount:<cntmap> + (match_operand:<cntmap> 1 "register_operand" "f")))] { - rtx in = operands[1]; - rtx out = operands[0]; - rtx vreg = <MODE>mode == SImode ? gen_reg_rtx (V4SImode) : - gen_reg_rtx (V2DImode); - emit_insn (gen_lsx_vinsgr2vr_<size> (vreg, in, vreg, GEN_INT (1))); - emit_insn (gen_popcount<cntmap>2 (vreg, vreg)); - emit_insn (gen_lsx_vpickve2gr_<size> (out, vreg, GEN_INT (0))); - DONE; + operands[0] = gen_rtx_REG (<cntmap>mode, REGNO (operands[0])); + operands[1] = gen_rtx_REG (<cntmap>mode, REGNO (operands[1])); }) ;; -- 2.48.1