Ping. On Sun, 2024-06-16 at 01:50 +0800, Xi Ruoyao wrote: > Consider > > c &= 0xfff; > a &= ~0xfff; > b &= ~0xfff; > a |= c; > b |= c; > > This can be done with 2 bstrins instructions. But we need to > recognize > it in loongarch_rtx_costs or the compiler will not propagate "c & > 0xfff" > forward. > > gcc/ChangeLog: > > * config/loongarch/loongarch.cc: > (loongarch_use_bstrins_for_ior_with_mask): Split the main > logic > into ... > (loongarch_use_bstrins_for_ior_with_mask_1): ... here. > (loongarch_rtx_costs): Special case for IOR those can be > implemented with bstrins. > > gcc/testsuite/ChangeLog; > > * gcc.target/loongarch/bstrins-3.c: New test. > --- > > Bootstrapped and regtested on loongarch64-linux-gnu. Ok for trunk? > > gcc/config/loongarch/loongarch.cc | 73 ++++++++++++++---- > - > .../gcc.target/loongarch/bstrins-3.c | 16 ++++ > 2 files changed, 72 insertions(+), 17 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/loongarch/bstrins-3.c > > diff --git a/gcc/config/loongarch/loongarch.cc > b/gcc/config/loongarch/loongarch.cc > index 6ec3ee62502..256b76d044b 100644 > --- a/gcc/config/loongarch/loongarch.cc > +++ b/gcc/config/loongarch/loongarch.cc > @@ -3681,6 +3681,27 @@ loongarch_set_reg_reg_piece_cost (machine_mode > mode, unsigned int units) > return COSTS_N_INSNS ((GET_MODE_SIZE (mode) + units - 1) / units); > } > > +static int > +loongarch_use_bstrins_for_ior_with_mask_1 (machine_mode mode, > + unsigned HOST_WIDE_INT > mask1, > + unsigned HOST_WIDE_INT > mask2) > +{ > + if (mask1 != ~mask2 || !mask1 || !mask2) > + return 0; > + > + /* Try to avoid a right-shift. */ > + if (low_bitmask_len (mode, mask1) != -1) > + return -1; > + > + if (low_bitmask_len (mode, mask2 >> (ffs_hwi (mask2) - 1)) != -1) > + return 1; > + > + if (low_bitmask_len (mode, mask1 >> (ffs_hwi (mask1) - 1)) != -1) > + return -1; > + > + return 0; > +} > + > /* Return the cost of moving between two registers of mode MODE. */ > > static int > @@ -3812,6 +3833,38 @@ loongarch_rtx_costs (rtx x, machine_mode mode, > int outer_code, > /* Fall through. */ > > case IOR: > + { > + rtx op[2] = {XEXP (x, 0), XEXP (x, 1)}; > + if (GET_CODE (op[0]) == AND && GET_CODE (op[1]) == AND > + && (mode == SImode || (TARGET_64BIT && mode == DImode))) > + { > + rtx rtx_mask0 = XEXP (op[0], 1), rtx_mask1 = XEXP (op[1], > 1); > + if (CONST_INT_P (rtx_mask0) && CONST_INT_P (rtx_mask1)) > + { > + unsigned HOST_WIDE_INT mask0 = UINTVAL (rtx_mask0); > + unsigned HOST_WIDE_INT mask1 = UINTVAL (rtx_mask1); > + if (loongarch_use_bstrins_for_ior_with_mask_1 (mode, > + mask0, > + > mask1)) > + { > + /* A bstrins instruction */ > + *total = COSTS_N_INSNS (1); > + > + /* A srai instruction */ > + if (low_bitmask_len (mode, mask0) == -1 > + && low_bitmask_len (mode, mask1) == -1) > + *total += COSTS_N_INSNS (1); > + > + for (int i = 0; i < 2; i++) > + *total += set_src_cost (XEXP (op[i], 0), mode, > speed); > + > + return true; > + } > + } > + } > + } > + > + /* Fall through. */ > case XOR: > /* Double-word operations use two single-word operations. */ > *total = loongarch_binary_cost (x, COSTS_N_INSNS (1), > COSTS_N_INSNS (2), > @@ -5796,23 +5849,9 @@ bool loongarch_pre_reload_split (void) > int > loongarch_use_bstrins_for_ior_with_mask (machine_mode mode, rtx *op) > { > - unsigned HOST_WIDE_INT mask1 = UINTVAL (op[2]); > - unsigned HOST_WIDE_INT mask2 = UINTVAL (op[4]); > - > - if (mask1 != ~mask2 || !mask1 || !mask2) > - return 0; > - > - /* Try to avoid a right-shift. */ > - if (low_bitmask_len (mode, mask1) != -1) > - return -1; > - > - if (low_bitmask_len (mode, mask2 >> (ffs_hwi (mask2) - 1)) != -1) > - return 1; > - > - if (low_bitmask_len (mode, mask1 >> (ffs_hwi (mask1) - 1)) != -1) > - return -1; > - > - return 0; > + return loongarch_use_bstrins_for_ior_with_mask_1 (mode, > + UINTVAL (op[2]), > + UINTVAL (op[4])); > } > > /* Rewrite a MEM for simple load/store under -mexplicit-relocs=auto > diff --git a/gcc/testsuite/gcc.target/loongarch/bstrins-3.c > b/gcc/testsuite/gcc.target/loongarch/bstrins-3.c > new file mode 100644 > index 00000000000..13762bdef42 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/loongarch/bstrins-3.c > @@ -0,0 +1,16 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -fdump-rtl-final" } */ > +/* { dg-final { scan-rtl-dump-times "insv\[sd\]i" 2 "final" } } */ > + > +struct X { > + long a, b; > +}; > + > +struct X > +test (long a, long b, long c) > +{ > + c &= 0xfff; > + a &= ~0xfff; > + b &= ~0xfff; > + return (struct X){.a = a | c, .b = b | c}; > +}
-- Xi Ruoyao <xry...@xry111.site> School of Aerospace Science and Technology, Xidian University