The problem with peephole2 is it uses a naive sliding-window algorithm and misses many cases. For example:
float a[10000]; float t() { return a[0] + a[8000]; } is compiled to: la.local $r13,a la.local $r12,a+32768 fld.s $f1,$r13,0 fld.s $f0,$r12,-768 fadd.s $f0,$f1,$f0 by trunk. But as we've explained in r14-4851, the following would be better with -mexplicit-relocs=auto: pcalau12i $r13,%pc_hi20(a) pcalau12i $r12,%pc_hi20(a+32000) fld.s $f1,$r13,%pc_lo12(a) fld.s $f0,$r12,%pc_lo12(a+32000) fadd.s $f0,$f1,$f0 However the sliding-window algorithm just won't detect the pcalau12i/fld pair to be optimized. Use a define_insn_and_split in combine pass will work around the issue. gcc/ChangeLog: * config/loongarch/loongarch.md: (simple_load<P:mode><LD_AT_LEAST_32_BIT:mode>): New define_insn_and_split. (simple_load_off<P:mode><LD_AT_LEAST_32_BIT:mode>): Likewise. (simple_load_<su>ext<P:mode><SUBDI:mode><GPR:mode>): Likewise. (simple_load_off<su>ext<P:mode><SUBDI:mode><GPR:mode>): Likewise. (simple_store<ST_ANY:mode><P:mode>): Likewise. (simple_store_off<ST_ANY:mode><P:mode>): Likewise. (define_peephole2): Remove la.local/[f]ld peepholes. gcc/testsuite/ChangeLog: * gcc.target/loongarch/explicit-relocs-auto-single-load-store-2.c: New test. --- Bootstrapped & regtested on loongarch64-linux-gnu. Ok for trunk? gcc/config/loongarch/loongarch.md | 165 +++++++++--------- ...explicit-relocs-auto-single-load-store-2.c | 11 ++ 2 files changed, 98 insertions(+), 78 deletions(-) create mode 100644 gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-single-load-store-2.c diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md index 7b26d15aa4e..4009de408fb 100644 --- a/gcc/config/loongarch/loongarch.md +++ b/gcc/config/loongarch/loongarch.md @@ -4033,101 +4033,110 @@ (define_insn "loongarch_crcc_w_<size>_w" ;; ;; And if the pseudo op cannot be relaxed, we'll get a worse result (with ;; 3 instructions). -(define_peephole2 - [(set (match_operand:P 0 "register_operand") - (match_operand:P 1 "symbolic_pcrel_operand")) - (set (match_operand:LD_AT_LEAST_32_BIT 2 "register_operand") - (mem:LD_AT_LEAST_32_BIT (match_dup 0)))] - "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \ - && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \ - && (peep2_reg_dead_p (2, operands[0]) \ - || REGNO (operands[0]) == REGNO (operands[2]))" - [(set (match_dup 2) - (mem:LD_AT_LEAST_32_BIT (lo_sum:P (match_dup 0) (match_dup 1))))] +(define_insn_and_split "simple_load<P:mode><LD_AT_LEAST_32_BIT:mode>" + [(set (match_operand:LD_AT_LEAST_32_BIT 0 "register_operand" "=r,f") + (mem:LD_AT_LEAST_32_BIT + (match_operand:P 1 "symbolic_pcrel_operand" "")))] + "loongarch_pre_reload_split () \ + && la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \ + && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM)" + "#" + "" + [(set (match_dup 0) + (mem:LD_AT_LEAST_32_BIT (lo_sum:P (match_dup 2) (match_dup 1))))] { - emit_insn (gen_pcalau12i_gr<P:mode> (operands[0], operands[1])); + operands[2] = gen_reg_rtx (Pmode); + emit_insn (gen_pcalau12i_gr<P:mode> (operands[2], operands[1])); }) -(define_peephole2 - [(set (match_operand:P 0 "register_operand") - (match_operand:P 1 "symbolic_pcrel_operand")) - (set (match_operand:LD_AT_LEAST_32_BIT 2 "register_operand") - (mem:LD_AT_LEAST_32_BIT (plus (match_dup 0) - (match_operand 3 "const_int_operand"))))] - "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \ - && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \ - && (peep2_reg_dead_p (2, operands[0]) \ - || REGNO (operands[0]) == REGNO (operands[2]))" - [(set (match_dup 2) - (mem:LD_AT_LEAST_32_BIT (lo_sum:P (match_dup 0) (match_dup 1))))] +(define_insn_and_split "simple_load_off<P:mode><LD_AT_LEAST_32_BIT:mode>" + [(set (match_operand:LD_AT_LEAST_32_BIT 0 "register_operand" "=r,f") + (mem:LD_AT_LEAST_32_BIT + (plus (match_operand:P 1 "symbolic_pcrel_operand" "") + (match_operand 2 "const_int_operand" ""))))] + "loongarch_pre_reload_split () \ + && la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \ + && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM)" + "#" + "" + [(set (match_dup 0) + (mem:LD_AT_LEAST_32_BIT (lo_sum:P (match_dup 2) (match_dup 1))))] { - operands[1] = plus_constant (Pmode, operands[1], INTVAL (operands[3])); - emit_insn (gen_pcalau12i_gr<P:mode> (operands[0], operands[1])); + HOST_WIDE_INT offset = INTVAL (operands[2]); + operands[2] = gen_reg_rtx (Pmode); + operands[1] = plus_constant (Pmode, operands[1], offset); + emit_insn (gen_pcalau12i_gr<P:mode> (operands[2], operands[1])); }) -(define_peephole2 - [(set (match_operand:P 0 "register_operand") - (match_operand:P 1 "symbolic_pcrel_operand")) - (set (match_operand:GPR 2 "register_operand") - (any_extend:GPR (mem:SUBDI (match_dup 0))))] - "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \ - && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \ - && (peep2_reg_dead_p (2, operands[0]) \ - || REGNO (operands[0]) == REGNO (operands[2]))" - [(set (match_dup 2) - (any_extend:GPR (mem:SUBDI (lo_sum:P (match_dup 0) - (match_dup 1)))))] +(define_insn_and_split "simple_load_<su>ext<P:mode><SUBDI:mode><GPR:mode>" + [(set (match_operand:GPR 0 "register_operand" "=r") + (any_extend:GPR + (mem:SUBDI (match_operand:P 1 "symbolic_pcrel_operand" ""))))] + "loongarch_pre_reload_split () \ + && la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \ + && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM)" + "#" + "" + [(set (match_dup 0) + (any_extend:GPR + (mem:SUBDI (lo_sum:P (match_dup 2) (match_dup 1)))))] { - emit_insn (gen_pcalau12i_gr<P:mode> (operands[0], operands[1])); + operands[2] = gen_reg_rtx (Pmode); + emit_insn (gen_pcalau12i_gr<P:mode> (operands[2], operands[1])); }) -(define_peephole2 - [(set (match_operand:P 0 "register_operand") - (match_operand:P 1 "symbolic_pcrel_operand")) - (set (match_operand:GPR 2 "register_operand") +(define_insn_and_split + "simple_load_off_<su>ext<P:mode><SUBDI:mode><GPR:mode>" + [(set (match_operand:GPR 0 "register_operand" "=r") + (any_extend:GPR + (mem:SUBDI + (plus (match_operand:P 1 "symbolic_pcrel_operand" "") + (match_operand 2 "const_int_operand" "")))))] + "loongarch_pre_reload_split () \ + && la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \ + && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM)" + "#" + "" + [(set (match_dup 0) (any_extend:GPR - (mem:SUBDI (plus (match_dup 0) - (match_operand 3 "const_int_operand")))))] - "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \ - && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \ - && (peep2_reg_dead_p (2, operands[0]) \ - || REGNO (operands[0]) == REGNO (operands[2]))" - [(set (match_dup 2) - (any_extend:GPR (mem:SUBDI (lo_sum:P (match_dup 0) - (match_dup 1)))))] + (mem:SUBDI (lo_sum:P (match_dup 2) (match_dup 1)))))] { - operands[1] = plus_constant (Pmode, operands[1], INTVAL (operands[3])); - emit_insn (gen_pcalau12i_gr<P:mode> (operands[0], operands[1])); + HOST_WIDE_INT offset = INTVAL (operands[2]); + operands[2] = gen_reg_rtx (Pmode); + operands[1] = plus_constant (Pmode, operands[1], offset); + emit_insn (gen_pcalau12i_gr<P:mode> (operands[2], operands[1])); }) -(define_peephole2 - [(set (match_operand:P 0 "register_operand") - (match_operand:P 1 "symbolic_pcrel_operand")) - (set (mem:ST_ANY (match_dup 0)) - (match_operand:ST_ANY 2 "register_operand"))] - "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \ - && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \ - && (peep2_reg_dead_p (2, operands[0])) \ - && REGNO (operands[0]) != REGNO (operands[2])" - [(set (mem:ST_ANY (lo_sum:P (match_dup 0) (match_dup 1))) (match_dup 2))] +(define_insn_and_split "simple_store<ST_ANY:mode><P:mode>" + [(set (mem:ST_ANY (match_operand:P 0 "symbolic_pcrel_operand")) + (match_operand:ST_ANY 1 "register_operand" "r,f"))] + "loongarch_pre_reload_split () \ + && la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \ + && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM)" + "#" + "" + [(set (mem:ST_ANY (lo_sum:P (match_dup 2) (match_dup 0))) (match_dup 1))] { - emit_insn (gen_pcalau12i_gr<P:mode> (operands[0], operands[1])); + operands[2] = gen_reg_rtx (Pmode); + emit_insn (gen_pcalau12i_gr<P:mode> (operands[2], operands[0])); }) -(define_peephole2 - [(set (match_operand:P 0 "register_operand") - (match_operand:P 1 "symbolic_pcrel_operand")) - (set (mem:ST_ANY (plus (match_dup 0) - (match_operand 3 "const_int_operand"))) - (match_operand:ST_ANY 2 "register_operand"))] - "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \ - && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \ - && (peep2_reg_dead_p (2, operands[0])) \ - && REGNO (operands[0]) != REGNO (operands[2])" - [(set (mem:ST_ANY (lo_sum:P (match_dup 0) (match_dup 1))) (match_dup 2))] +(define_insn_and_split "simple_store_off<ST_ANY:mode><P:mode>" + [(set (mem:ST_ANY + (plus (match_operand:P 0 "symbolic_pcrel_operand" "") + (match_operand 1 "const_int_operand" ""))) + (match_operand:ST_ANY 2 "register_operand" "r,f"))] + "loongarch_pre_reload_split () \ + && la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \ + && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM)" + "#" + "" + [(set (mem:ST_ANY (lo_sum:P (match_dup 1) (match_dup 0))) (match_dup 2))] { - operands[1] = plus_constant (Pmode, operands[1], INTVAL (operands[3])); - emit_insn (gen_pcalau12i_gr<P:mode> (operands[0], operands[1])); + HOST_WIDE_INT offset = INTVAL (operands[1]); + operands[1] = gen_reg_rtx (Pmode); + operands[0] = plus_constant (Pmode, operands[0], offset); + emit_insn (gen_pcalau12i_gr<P:mode> (operands[1], operands[0])); }) ;; Synchronization instructions. diff --git a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-single-load-store-2.c b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-single-load-store-2.c new file mode 100644 index 00000000000..42cb966d1e0 --- /dev/null +++ b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-single-load-store-2.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=loongarch64 -mabi=lp64d -mexplicit-relocs=auto" } */ + +float a[8001]; +float +t (void) +{ + return a[0] + a[8000]; +} + +/* { dg-final { scan-assembler-not "la.local" } } */ -- 2.43.0