Given below testcase, the instruction which load function address from GOT table is not hoisted out of the loop while it should be, as the value is fixed at runtime.
The problem is we havn't mark those GOT related mem as READONLY that RTL loop2_iv pass has make conservative decision in check_maybe_invariant to not hoist them. int bar (int) ; int foo (int a, int bound) { int i = 0; int sum = 0; for (i; i < bound; i++) sum = bar (sum); return sum; } this patch mark mem in PIC related pattern as READONLY and NO_TRAP, more cleanup may needed for several other pattern. 2015-07-06 Jiong Wang <jiong.w...@arm.com> gcc/ * config/aarch64/aarch64.c (aarch64_load_symref_appropriately): Mark mem as READONLY and NOTRAP for PIC symbol. gcc/testsuite/ * gcc.target/aarch64/got_mem_hoist.c: New test. -- Regards, Jiong
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 4522fc2..4bbc049 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -915,6 +915,8 @@ aarch64_load_symref_appropriately (rtx dest, rtx imm, { machine_mode mode = GET_MODE (dest); rtx gp_rtx = pic_offset_table_rtx; + rtx insn; + rtx mem; /* NOTE: pic_offset_table_rtx can be NULL_RTX, because we can reach here before rtl expand. Tree IVOPT will generate rtl pattern to @@ -958,16 +960,27 @@ aarch64_load_symref_appropriately (rtx dest, rtx imm, if (mode == ptr_mode) { if (mode == DImode) - emit_insn (gen_ldr_got_small_28k_di (dest, gp_rtx, imm)); + insn = gen_ldr_got_small_28k_di (dest, gp_rtx, imm); else - emit_insn (gen_ldr_got_small_28k_si (dest, gp_rtx, imm)); + insn = gen_ldr_got_small_28k_si (dest, gp_rtx, imm); + + mem = XVECEXP (SET_SRC (insn), 0, 0); } else { gcc_assert (mode == Pmode); - emit_insn (gen_ldr_got_small_28k_sidi (dest, gp_rtx, imm)); + + insn = gen_ldr_got_small_28k_sidi (dest, gp_rtx, imm); + mem = XVECEXP (XEXP (SET_SRC (insn), 0), 0, 0); } + /* The operand is expected to be MEM. Whenever the related insn + pattern changed, above code which calculate mem should be + updated. */ + gcc_assert (GET_CODE (mem) == MEM); + MEM_READONLY_P (mem) = 1; + MEM_NOTRAP_P (mem) = 1; + emit_insn (insn); return; } @@ -980,6 +993,9 @@ aarch64_load_symref_appropriately (rtx dest, rtx imm, DImode if dest is dereferenced to access the memeory. This is why we have to handle three different ldr_got_small patterns here (two patterns for ILP32). */ + + rtx insn; + rtx mem; rtx tmp_reg = dest; machine_mode mode = GET_MODE (dest); @@ -990,16 +1006,24 @@ aarch64_load_symref_appropriately (rtx dest, rtx imm, if (mode == ptr_mode) { if (mode == DImode) - emit_insn (gen_ldr_got_small_di (dest, tmp_reg, imm)); + insn = gen_ldr_got_small_di (dest, tmp_reg, imm); else - emit_insn (gen_ldr_got_small_si (dest, tmp_reg, imm)); + insn = gen_ldr_got_small_si (dest, tmp_reg, imm); + + mem = XVECEXP (SET_SRC (insn), 0, 0); } else { gcc_assert (mode == Pmode); - emit_insn (gen_ldr_got_small_sidi (dest, tmp_reg, imm)); + + insn = gen_ldr_got_small_sidi (dest, tmp_reg, imm); + mem = XVECEXP (XEXP (SET_SRC (insn), 0), 0, 0); } + gcc_assert (GET_CODE (mem) == MEM); + MEM_READONLY_P (mem) = 1; + MEM_NOTRAP_P (mem) = 1; + emit_insn (insn); return; } diff --git a/gcc/testsuite/gcc.target/aarch64/got_mem_hoist.c b/gcc/testsuite/gcc.target/aarch64/got_mem_hoist.c new file mode 100644 index 0000000..6d29718 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/got_mem_hoist.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fpic -fdump-rtl-loop2_invariant" } */ + +int bar (int); +int cal (void *); + +int +foo (int a, int bound) +{ + int i = 0; + int sum = 0; + + for (i; i < bound; i++) + sum = cal (bar); + + return sum; +} + +/* The insn which loads function address from GOT table should be moved out + of the loop. */ +/* { dg-final { scan-rtl-dump "Decided" "loop2_invariant" } } */