Hi Raphael, On 8/8/24 10:10, Raphael Moreira Zinsly wrote: > From: Raphael Zinsly <rzin...@ventanamicro.com> > > Improve handling of constants where its upper and lower 32-bit > halves are the same and Zbkb is not available in riscv_move_integer. > riscv_split_integer already handles this but the changes in > riscv_build_integer makes it possible to improve code generation for > negative values.
But... > e.g. for: > > unsigned long f (void) { return 0xf857f2def857f2deUL; } > > Without the patch: > > li a0,-128454656 > addi a0,a0,734 > li a5,-128454656 > addi a5,a5,735 > slli a5,a5,32 > add a0,a5,a0 > > With the patch: > > li a0,128454656 > addi a0,a0,-735 > slli a5,a0,32 > add a0,a0,a5 > xori a0,a0,-1 [snip...] > } > + else if (cost > 3 && TARGET_64BIT && can_create_pseudo_p ()) > + { > + struct riscv_integer_op alt_codes[RISCV_MAX_INTEGER_OPS]; > + int alt_cost; > + > + unsigned HOST_WIDE_INT loval = value & 0xffffffff; > + unsigned HOST_WIDE_INT hival = (value & ~loval) >> 32; > + bool bit31 = (hival & 0x80000000) != 0; > + /* Without pack we can generate it with a shift 32 followed by an or. > */ > + if (hival == loval && !bit31) > + { > + alt_cost = 2 + riscv_build_integer_1 (alt_codes, > + sext_hwi (loval, 32), mode); > + if (alt_cost < cost) > + { > + /* We need to save the first constant we build. */ > + alt_codes[alt_cost - 3].save_temporary = true; > + > + /* Now we want to shift the previously generated constant into the > + high half. */ > + alt_codes[alt_cost - 2].code = ASHIFT; > + alt_codes[alt_cost - 2].value = 32; > + alt_codes[alt_cost - 2].use_uw = false; > + alt_codes[alt_cost - 2].save_temporary = false; > + > + /* And the final step, IOR the two halves together. Since this > uses > + the saved temporary, use CONCAT similar to what we do for > Zbkb. */ > + alt_codes[alt_cost - 1].code = CONCAT; > + alt_codes[alt_cost - 1].value = 0; > + alt_codes[alt_cost - 1].use_uw = false; > + alt_codes[alt_cost - 1].save_temporary = false; > + > + memcpy (codes, alt_codes, sizeof (alt_codes)); > + cost = alt_cost; > + } > + } > + } As you mentioned above: positive values are already handled in riscv_split_integer after c104ef4b5eb1 ("RISC-V: improve codegen for large constants with same 32-bit lo and hi")parts [2] Meaning this is redundant with the code from above commit. If you prefer these changes (in light of patch 2/2 and/or zbkb changes tending to this code) IMO that code should be removed. FWIW at the time I did try (in vain) to handle negative values too but my change was broken [3] [3] https://gcc.gnu.org/pipermail/gcc-patches/2023-June/623365.html -Vineet > return cost; > } > @@ -2786,12 +2823,22 @@ riscv_move_integer (rtx temp, rtx dest, HOST_WIDE_INT > value, > } > else if (codes[i].code == CONCAT || codes[i].code == VEC_MERGE) > { > - rtx t = can_create_pseudo_p () ? gen_reg_rtx (mode) : temp; > - rtx t2 = codes[i].code == VEC_MERGE ? old_value : x; > - gcc_assert (t2); > - t2 = gen_lowpart (SImode, t2); > - emit_insn (gen_riscv_xpack_di_si_2 (t, x, GEN_INT (32), t2)); > - x = t; > + if (codes[i].code == CONCAT && !TARGET_ZBKB) > + { > + /* The two values should have no bits in common, so we can > + use PLUS instead of IOR which has a higher chance of > + using a compressed instruction. */ > + x = gen_rtx_PLUS (mode, x, old_value); > + } > + else > + { > + rtx t = can_create_pseudo_p () ? gen_reg_rtx (mode) : temp; > + rtx t2 = codes[i].code == VEC_MERGE ? old_value : x; > + gcc_assert (t2); > + t2 = gen_lowpart (SImode, t2); > + emit_insn (gen_riscv_xpack_di_si_2 (t, x, GEN_INT (32), t2)); > + x = t; > + } > } > else > x = gen_rtx_fmt_ee (codes[i].code, mode, > diff --git a/gcc/testsuite/gcc.target/riscv/synthesis-11.c > b/gcc/testsuite/gcc.target/riscv/synthesis-11.c > new file mode 100644 > index 00000000000..98401d5ca32 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/synthesis-11.c > @@ -0,0 +1,40 @@ > +/* { dg-do compile } */ > +/* { dg-require-effective-target rv64 } */ > +/* We aggressively skip as we really just need to test the basic synthesis > + which shouldn't vary based on the optimization level. -O1 seems to work > + and eliminates the usual sources of extraneous dead code that would throw > + off the counts. */ > +/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" "-O2" "-O3" "-Os" "-Oz" "-flto" } > } */ > +/* { dg-options "-march=rv64gc" } */ > + > +/* Rather than test for a specific synthesis of all these constants or > + having thousands of tests each testing one variant, we just test the > + total number of instructions. > + > + This isn't expected to change much and any change is worthy of a look. */ > +/* { dg-final { scan-assembler-times > "\\t(add|addi|bseti|li|pack|ret|sh1add|sh2add|sh3add|slli|srli|xori|or)" 114 > } } */ > + > + > + > +unsigned long foo_0x7857f2de7857f2de(void) { return 0x7857f2de7857f2deUL; } > +unsigned long foo_0x19660e6319660e63(void) { return 0x19660e6319660e63UL; } > +unsigned long foo_0x137f1b75137f1b75(void) { return 0x137f1b75137f1b75UL; } > +unsigned long foo_0x35019fa035019fa0(void) { return 0x35019fa035019fa0UL; } > +unsigned long foo_0x3828e6c13828e6c1(void) { return 0x3828e6c13828e6c1UL; } > +unsigned long foo_0x039d87e9039d87e9(void) { return 0x039d87e9039d87e9UL; } > +unsigned long foo_0x429617c1429617c1(void) { return 0x429617c1429617c1UL; } > +unsigned long foo_0x2411811924118119(void) { return 0x2411811924118119UL; } > +unsigned long foo_0x0c01df7d0c01df7d(void) { return 0x0c01df7d0c01df7dUL; } > +unsigned long foo_0x70e23d6b70e23d6b(void) { return 0x70e23d6b70e23d6bUL; } > +unsigned long foo_0xf857f2def857f2de(void) { return 0xf857f2def857f2deUL; } > +unsigned long foo_0x99660e6399660e63(void) { return 0x99660e6399660e63UL; } > +unsigned long foo_0x937f1b75937f1b75(void) { return 0x937f1b75937f1b75UL; } > +unsigned long foo_0xb5019fa0b5019fa0(void) { return 0xb5019fa0b5019fa0UL; } > +unsigned long foo_0xb828e6c1b828e6c1(void) { return 0xb828e6c1b828e6c1UL; } > +unsigned long foo_0x839d87e9839d87e9(void) { return 0x839d87e9839d87e9UL; } > +unsigned long foo_0xc29617c1c29617c1(void) { return 0xc29617c1c29617c1UL; } > +unsigned long foo_0xa4118119a4118119(void) { return 0xa4118119a4118119UL; } > +unsigned long foo_0x8c01df7d8c01df7d(void) { return 0x8c01df7d8c01df7dUL; } > +unsigned long foo_0xf0e23d6bf0e23d6b(void) { return 0xf0e23d6bf0e23d6bUL; } > +unsigned long foo_0x7fff00007fff0000(void) { return 0x7fff00007fff0000UL; } > +