While I still can't fully understand the immediate load issue and how this patch fix it, I've tested this patch (alongside the prefetch instruction patch) with bootstrap-ubsan. And the compiled result of imm-load1.c seems OK.
On Thu, 2022-11-17 at 17:59 +0800, Lulu Cheng wrote: > v1 -> v2: > 1. Change the code format. > 2. Fix bugs in the code. > > v2 -> v3: > Modifying a code implementation of an undefined behavior. > > v3 -> v4: > Move the part of the immediate number decomposition from expand pass > to split > pass. > > Both regression tests and spec2006 passed. > > The problem mentioned in the link does not move the four immediate > load > instructions out of the loop. It has been optimized. Now, as in the > test case, > four immediate load instructions are generated outside the loop. > ( > https://sourceware.org/pipermail/libc-alpha/2022-September/142202.html) > > -------------------------------------------------------------------- > Because loop2_invariant pass will extract the instructions that do not > change > in the loop out of the loop, some instructions will not meet the > extraction > conditions if the machine performs immediate decomposition while > expand pass, > so the immediate decomposition will be transferred to the split > process. > > gcc/ChangeLog: > > * config/loongarch/loongarch.cc (enum > loongarch_load_imm_method): > Remove the member METHOD_INSV that is not currently used. > (struct loongarch_integer_op): Define a new member curr_value, > that records the value of the number stored in the destination > register immediately after the current instruction has run. > (loongarch_build_integer): Assign a value to the curr_value > member variable. > (loongarch_move_integer): Adds information for the immediate > load instruction. > * config/loongarch/loongarch.md (*movdi_32bit): Redefine as > define_insn_and_split. > (*movdi_64bit): Likewise. > (*movsi_internal): Likewise. > (*movhi_internal): Likewise. > * config/loongarch/predicates.md: Return true as long as it is > CONST_INT, ensure > that the immediate number is not optimized by decomposition > during expand > optimization loop. > > gcc/testsuite/ChangeLog: > > * gcc.target/loongarch/imm-load.c: New test. > * gcc.target/loongarch/imm-load1.c: New test. > --- > gcc/config/loongarch/loongarch.cc | 62 ++++++++++-------- > - > gcc/config/loongarch/loongarch.md | 44 +++++++++++-- > gcc/config/loongarch/predicates.md | 2 +- > gcc/testsuite/gcc.target/loongarch/imm-load.c | 10 +++ > .../gcc.target/loongarch/imm-load1.c | 26 ++++++++ > 5 files changed, 110 insertions(+), 34 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/loongarch/imm-load.c > create mode 100644 gcc/testsuite/gcc.target/loongarch/imm-load1.c > > diff --git a/gcc/config/loongarch/loongarch.cc > b/gcc/config/loongarch/loongarch.cc > index 8ee32c90573..9e0d6c7c3ea 100644 > --- a/gcc/config/loongarch/loongarch.cc > +++ b/gcc/config/loongarch/loongarch.cc > @@ -139,22 +139,21 @@ struct loongarch_address_info > > METHOD_LU52I: > Load 52-63 bit of the immediate number. > - > - METHOD_INSV: > - immediate like 0xfff00000fffffxxx > - */ > +*/ > enum loongarch_load_imm_method > { > METHOD_NORMAL, > METHOD_LU32I, > - METHOD_LU52I, > - METHOD_INSV > + METHOD_LU52I > }; > > struct loongarch_integer_op > { > enum rtx_code code; > HOST_WIDE_INT value; > + /* Represent the result of the immediate count of the load > instruction at > + each step. */ > + HOST_WIDE_INT curr_value; > enum loongarch_load_imm_method method; > }; > > @@ -1475,24 +1474,27 @@ loongarch_build_integer (struct > loongarch_integer_op *codes, > { > /* The value of the lower 32 bit be loaded with one > instruction. > lu12i.w. */ > - codes[0].code = UNKNOWN; > - codes[0].method = METHOD_NORMAL; > - codes[0].value = low_part; > + codes[cost].code = UNKNOWN; > + codes[cost].method = METHOD_NORMAL; > + codes[cost].value = low_part; > + codes[cost].curr_value = low_part; > cost++; > } > else > { > /* lu12i.w + ior. */ > - codes[0].code = UNKNOWN; > - codes[0].method = METHOD_NORMAL; > - codes[0].value = low_part & ~(IMM_REACH - 1); > + codes[cost].code = UNKNOWN; > + codes[cost].method = METHOD_NORMAL; > + codes[cost].value = low_part & ~(IMM_REACH - 1); > + codes[cost].curr_value = codes[cost].value; > cost++; > HOST_WIDE_INT iorv = low_part & (IMM_REACH - 1); > if (iorv != 0) > { > - codes[1].code = IOR; > - codes[1].method = METHOD_NORMAL; > - codes[1].value = iorv; > + codes[cost].code = IOR; > + codes[cost].method = METHOD_NORMAL; > + codes[cost].value = iorv; > + codes[cost].curr_value = low_part; > cost++; > } > } > @@ -1515,11 +1517,14 @@ loongarch_build_integer (struct > loongarch_integer_op *codes, > { > codes[cost].method = METHOD_LU52I; > codes[cost].value = value & LU52I_B; > + codes[cost].curr_value = value; > return cost + 1; > } > > codes[cost].method = METHOD_LU32I; > codes[cost].value = (value & LU32I_B) | (sign51 ? LU52I_B : 0); > + codes[cost].curr_value = (value & 0xfffffffffffff) > + | (sign51 ? LU52I_B : 0); > cost++; > > /* Determine whether the 52-61 bits are sign-extended from the > low order, > @@ -1528,6 +1533,7 @@ loongarch_build_integer (struct > loongarch_integer_op *codes, > { > codes[cost].method = METHOD_LU52I; > codes[cost].value = value & LU52I_B; > + codes[cost].curr_value = value; > cost++; > } > } > @@ -2911,6 +2917,9 @@ loongarch_move_integer (rtx temp, rtx dest, > unsigned HOST_WIDE_INT value) > else > x = force_reg (mode, x); > > + set_unique_reg_note (get_last_insn (), REG_EQUAL, > + GEN_INT (codes[i-1].curr_value)); > + > switch (codes[i].method) > { > case METHOD_NORMAL: > @@ -2918,22 +2927,17 @@ loongarch_move_integer (rtx temp, rtx dest, > unsigned HOST_WIDE_INT value) > GEN_INT (codes[i].value)); > break; > case METHOD_LU32I: > - emit_insn ( > - gen_rtx_SET (x, > - gen_rtx_IOR (DImode, > - gen_rtx_ZERO_EXTEND ( > - DImode, gen_rtx_SUBREG > (SImode, x, 0)), > - GEN_INT (codes[i].value)))); > + gcc_assert (mode == DImode); > + x = gen_rtx_IOR (DImode, > + gen_rtx_ZERO_EXTEND (DImode, > + gen_rtx_SUBREG > (SImode, x, 0)), > + GEN_INT (codes[i].value)); > break; > case METHOD_LU52I: > - emit_insn (gen_lu52i_d (x, x, GEN_INT (0xfffffffffffff), > - GEN_INT (codes[i].value))); > - break; > - case METHOD_INSV: > - emit_insn ( > - gen_rtx_SET (gen_rtx_ZERO_EXTRACT (DImode, x, GEN_INT > (20), > - GEN_INT (32)), > - gen_rtx_REG (DImode, 0))); > + gcc_assert (mode == DImode); > + x = gen_rtx_IOR (DImode, > + gen_rtx_AND (DImode, x, GEN_INT > (0xfffffffffffff)), > + GEN_INT (codes[i].value)); > break; > default: > gcc_unreachable (); > diff --git a/gcc/config/loongarch/loongarch.md > b/gcc/config/loongarch/loongarch.md > index 2fda5381904..f61db66d535 100644 > --- a/gcc/config/loongarch/loongarch.md > +++ b/gcc/config/loongarch/loongarch.md > @@ -1718,23 +1718,41 @@ (define_expand "movdi" > DONE; > }) > > -(define_insn "*movdi_32bit" > +(define_insn_and_split "*movdi_32bit" > [(set (match_operand:DI 0 "nonimmediate_operand" > "=r,r,r,w,*f,*f,*r,*m") > (match_operand:DI 1 "move_operand" "r,i,w,r,*J*r,*m,*f,*f"))] > "!TARGET_64BIT > && (register_operand (operands[0], DImode) > || reg_or_0_operand (operands[1], DImode))" > { return loongarch_output_move (operands[0], operands[1]); } > + "CONST_INT_P (operands[1]) && REG_P (operands[0]) && GP_REG_P > (REGNO > + (operands[0]))" > + [(const_int 0)] > + " > +{ > + loongarch_move_integer (operands[0], operands[0], INTVAL > (operands[1])); > + DONE; > +} > + " > [(set_attr "move_type" > "move,const,load,store,mgtf,fpload,mftg,fpstore") > (set_attr "mode" "DI")]) > > -(define_insn "*movdi_64bit" > +(define_insn_and_split "*movdi_64bit" > [(set (match_operand:DI 0 "nonimmediate_operand" > "=r,r,r,w,*f,*f,*r,*m") > (match_operand:DI 1 "move_operand" > "r,Yd,w,rJ,*r*J,*m,*f,*f"))] > "TARGET_64BIT > && (register_operand (operands[0], DImode) > || reg_or_0_operand (operands[1], DImode))" > { return loongarch_output_move (operands[0], operands[1]); } > + "CONST_INT_P (operands[1]) && REG_P (operands[0]) && GP_REG_P > (REGNO > + (operands[0]))" > + [(const_int 0)] > + " > +{ > + loongarch_move_integer (operands[0], operands[0], INTVAL > (operands[1])); > + DONE; > +} > + " > [(set_attr "move_type" > "move,const,load,store,mgtf,fpload,mftg,fpstore") > (set_attr "mode" "DI")]) > > @@ -1749,12 +1767,21 @@ (define_expand "movsi" > DONE; > }) > > -(define_insn "*movsi_internal" > +(define_insn_and_split "*movsi_internal" > [(set (match_operand:SI 0 "nonimmediate_operand" > "=r,r,r,w,*f,*f,*r,*m,*r,*z") > (match_operand:SI 1 "move_operand" > "r,Yd,w,rJ,*r*J,*m,*f,*f,*z,*r"))] > "(register_operand (operands[0], SImode) > || reg_or_0_operand (operands[1], SImode))" > { return loongarch_output_move (operands[0], operands[1]); } > + "CONST_INT_P (operands[1]) && REG_P (operands[0]) && GP_REG_P > (REGNO > + (operands[0]))" > + [(const_int 0)] > + " > +{ > + loongarch_move_integer (operands[0], operands[0], INTVAL > (operands[1])); > + DONE; > +} > + " > [(set_attr "move_type" > "move,const,load,store,mgtf,fpload,mftg,fpstore,mftg,mgtf") > (set_attr "mode" "SI")]) > > @@ -1774,12 +1801,21 @@ (define_expand "movhi" > DONE; > }) > > -(define_insn "*movhi_internal" > +(define_insn_and_split "*movhi_internal" > [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,r,r,m,r,k") > (match_operand:HI 1 "move_operand" "r,Yd,I,m,rJ,k,rJ"))] > "(register_operand (operands[0], HImode) > || reg_or_0_operand (operands[1], HImode))" > { return loongarch_output_move (operands[0], operands[1]); } > + "CONST_INT_P (operands[1]) && REG_P (operands[0]) && GP_REG_P > (REGNO > + (operands[0]))" > + [(const_int 0)] > + " > +{ > + loongarch_move_integer (operands[0], operands[0], INTVAL > (operands[1])); > + DONE; > +} > + " > [(set_attr "move_type" "move,const,const,load,store,load,store") > (set_attr "mode" "HI")]) > > diff --git a/gcc/config/loongarch/predicates.md > b/gcc/config/loongarch/predicates.md > index 8bd0c1376c9..58c3dc2261c 100644 > --- a/gcc/config/loongarch/predicates.md > +++ b/gcc/config/loongarch/predicates.md > @@ -226,7 +226,7 @@ (define_predicate "move_operand" > switch (GET_CODE (op)) > { > case CONST_INT: > - return !splittable_const_int_operand (op, mode); > + return true; > > case CONST: > case SYMBOL_REF: > diff --git a/gcc/testsuite/gcc.target/loongarch/imm-load.c > b/gcc/testsuite/gcc.target/loongarch/imm-load.c > new file mode 100644 > index 00000000000..c04ca33996f > --- /dev/null > +++ b/gcc/testsuite/gcc.target/loongarch/imm-load.c > @@ -0,0 +1,10 @@ > +/* { dg-do compile } */ > +/* { dg-options "-mabi=lp64d -O2 -fdump-rtl-split1" } */ > + > +long int > +test (void) > +{ > + return 0x1234567890abcdef; > +} > +/* { dg-final { scan-rtl-dump-times "scanning new insn with uid" 6 > "split1" } } */ > + > diff --git a/gcc/testsuite/gcc.target/loongarch/imm-load1.c > b/gcc/testsuite/gcc.target/loongarch/imm-load1.c > new file mode 100644 > index 00000000000..2ff02971239 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/loongarch/imm-load1.c > @@ -0,0 +1,26 @@ > +/* { dg-do compile } */ > +/* { dg-options "-mabi=lp64d -O2" } */ > +/* { dg-final { scan-assembler > "test:.*lu52i\.d.*\n\taddi\.w.*\n\.L2:" } } */ > + > + > +extern long long b[10]; > +static inline long long > +repeat_bytes (void) > +{ > + long long r = 0x0101010101010101; > + > + return r; > +} > + > +static inline long long > +highbit_mask (long long m) > +{ > + return m & repeat_bytes (); > +} > + > +void test(long long *a) > +{ > + for (int i = 0; i < 10; i++) > + b[i] = highbit_mask (a[i]); > + > +} -- Xi Ruoyao <xry...@xry111.site> School of Aerospace Science and Technology, Xidian University