Hi, This patch fixes up immediate generation for the AArch64 backend allowing for the RTL optimizers like CSE and loop hoisting to work more effectively with immediates. I also took the oppurtunity to rework this to also be used in the costs calculations. This patch only deals with numerical constants and handling symbolic constants will be the subject of another patch. There has been some talk about restructuring the immediate generation with a trie but I'm going to leave that for another time.
This requires another patch that James is working on to fix up bsl code code generation with float mode which was discovered to be broken causing regressions when testing this code. I've worked around those failures by pulling out a bsl patch that restructures the floating point versions with an unspec and found no other issues with this patch. The output now generated matches the expected output in the PR. Looked at output in a number of other benchmarks and saw it making sense. Tested cross with aarch64-none-elf + a BSL patch with no regressions. Bootstrapped and regression tested with aarch64-none-linux-gnu. Ok for trunk once James's BSL patch is committed ? Ramana <DATE> Ramana Radhakrishnan <ramana.radhakrish...@arm.com> PR target/63724 * config/aarch64/aarch64.c (aarch64_expand_mov_immediate): Split out numerical immediate handling to... (aarch64_internal_mov_immediate): ...this. New. (aarch64_rtx_costs): Use aarch64_internal_mov_immediate. (aarch64_mov_operand_p): Relax predicate. * config/aarch64/aarch64.md (mov<mode>:GPI): Do not expand CONST_INTs. (*movsi_aarch64): Turn into define_insn_and_split and new alternative for 'n'. (*movdi_aarch64): Likewise.
commit 34392753bd7f1481eff6ff86e055981618a3d06e Author: Ramana Radhakrishnan <ramana.radhakrish...@arm.com> Date: Thu Nov 6 16:08:27 2014 +0000 diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 736ad90..20cbb2d 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -1046,8 +1046,8 @@ aarch64_add_offset (machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset) return plus_constant (mode, reg, offset); } -void -aarch64_expand_mov_immediate (rtx dest, rtx imm) +static int +aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate) { machine_mode mode = GET_MODE (dest); unsigned HOST_WIDE_INT mask; @@ -1057,85 +1057,14 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm) bool subtargets; rtx subtarget; int one_match, zero_match, first_not_ffff_match; - - gcc_assert (mode == SImode || mode == DImode); - - /* Check on what type of symbol it is. */ - if (GET_CODE (imm) == SYMBOL_REF - || GET_CODE (imm) == LABEL_REF - || GET_CODE (imm) == CONST) - { - rtx mem, base, offset; - enum aarch64_symbol_type sty; - - /* If we have (const (plus symbol offset)), separate out the offset - before we start classifying the symbol. */ - split_const (imm, &base, &offset); - - sty = aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR); - switch (sty) - { - case SYMBOL_FORCE_TO_MEM: - if (offset != const0_rtx - && targetm.cannot_force_const_mem (mode, imm)) - { - gcc_assert (can_create_pseudo_p ()); - base = aarch64_force_temporary (mode, dest, base); - base = aarch64_add_offset (mode, NULL, base, INTVAL (offset)); - aarch64_emit_move (dest, base); - return; - } - mem = force_const_mem (ptr_mode, imm); - gcc_assert (mem); - if (mode != ptr_mode) - mem = gen_rtx_ZERO_EXTEND (mode, mem); - emit_insn (gen_rtx_SET (VOIDmode, dest, mem)); - return; - - case SYMBOL_SMALL_TLSGD: - case SYMBOL_SMALL_TLSDESC: - case SYMBOL_SMALL_GOTTPREL: - case SYMBOL_SMALL_GOT: - case SYMBOL_TINY_GOT: - if (offset != const0_rtx) - { - gcc_assert(can_create_pseudo_p ()); - base = aarch64_force_temporary (mode, dest, base); - base = aarch64_add_offset (mode, NULL, base, INTVAL (offset)); - aarch64_emit_move (dest, base); - return; - } - /* FALLTHRU */ - - case SYMBOL_SMALL_TPREL: - case SYMBOL_SMALL_ABSOLUTE: - case SYMBOL_TINY_ABSOLUTE: - aarch64_load_symref_appropriately (dest, imm, sty); - return; - - default: - gcc_unreachable (); - } - } + int num_insns = 0; if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode)) { - emit_insn (gen_rtx_SET (VOIDmode, dest, imm)); - return; - } - - if (!CONST_INT_P (imm)) - { - if (GET_CODE (imm) == HIGH) + if (generate) emit_insn (gen_rtx_SET (VOIDmode, dest, imm)); - else - { - rtx mem = force_const_mem (mode, imm); - gcc_assert (mem); - emit_insn (gen_rtx_SET (VOIDmode, dest, mem)); - } - - return; + num_insns++; + return num_insns; } if (mode == SImode) @@ -1143,10 +1072,15 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm) /* We know we can't do this in 1 insn, and we must be able to do it in two; so don't mess around looking for sequences that don't buy us anything. */ - emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (INTVAL (imm) & 0xffff))); - emit_insn (gen_insv_immsi (dest, GEN_INT (16), - GEN_INT ((INTVAL (imm) >> 16) & 0xffff))); - return; + if (generate) + { + emit_insn (gen_rtx_SET (VOIDmode, dest, + GEN_INT (INTVAL (imm) & 0xffff))); + emit_insn (gen_insv_immsi (dest, GEN_INT (16), + GEN_INT ((INTVAL (imm) >> 16) & 0xffff))); + } + num_insns += 2; + return num_insns; } /* Remaining cases are all for DImode. */ @@ -1176,11 +1110,15 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm) { /* Set one of the quarters and then insert back into result. */ mask = 0xffffll << first_not_ffff_match; - emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask))); - emit_insn (gen_insv_immdi (dest, GEN_INT (first_not_ffff_match), - GEN_INT ((val >> first_not_ffff_match) - & 0xffff))); - return; + if (generate) + { + emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask))); + emit_insn (gen_insv_immdi (dest, GEN_INT (first_not_ffff_match), + GEN_INT ((val >> first_not_ffff_match) + & 0xffff))); + } + num_insns += 2; + return num_insns; } if (zero_match == 2) @@ -1195,40 +1133,57 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm) { subtarget = subtargets ? gen_reg_rtx (DImode) : dest; - emit_insn (gen_rtx_SET (VOIDmode, subtarget, GEN_INT (val & mask))); - emit_insn (gen_adddi3 (dest, subtarget, - GEN_INT (val - (val & mask)))); - return; + if (generate) + { + emit_insn (gen_rtx_SET (VOIDmode, subtarget, + GEN_INT (val & mask))); + emit_insn (gen_adddi3 (dest, subtarget, + GEN_INT (val - (val & mask)))); + } + num_insns += 2; + return num_insns; } else if (aarch64_uimm12_shift (-(val - ((val + comp) & mask)))) { subtarget = subtargets ? gen_reg_rtx (DImode) : dest; - emit_insn (gen_rtx_SET (VOIDmode, subtarget, - GEN_INT ((val + comp) & mask))); - emit_insn (gen_adddi3 (dest, subtarget, - GEN_INT (val - ((val + comp) & mask)))); - return; + if (generate) + { + emit_insn (gen_rtx_SET (VOIDmode, subtarget, + GEN_INT ((val + comp) & mask))); + emit_insn (gen_adddi3 (dest, subtarget, + GEN_INT (val - ((val + comp) & mask)))); + } + num_insns += 2; + return num_insns; } else if (aarch64_uimm12_shift (val - ((val - comp) | ~mask))) { subtarget = subtargets ? gen_reg_rtx (DImode) : dest; - emit_insn (gen_rtx_SET (VOIDmode, subtarget, - GEN_INT ((val - comp) | ~mask))); - emit_insn (gen_adddi3 (dest, subtarget, - GEN_INT (val - ((val - comp) | ~mask)))); - return; + if (generate) + { + emit_insn (gen_rtx_SET (VOIDmode, subtarget, + GEN_INT ((val - comp) | ~mask))); + emit_insn (gen_adddi3 (dest, subtarget, + GEN_INT (val - ((val - comp) | ~mask)))); + } + num_insns += 2; + return num_insns; } else if (aarch64_uimm12_shift (-(val - (val | ~mask)))) { subtarget = subtargets ? gen_reg_rtx (DImode) : dest; - emit_insn (gen_rtx_SET (VOIDmode, subtarget, - GEN_INT (val | ~mask))); - emit_insn (gen_adddi3 (dest, subtarget, - GEN_INT (val - (val | ~mask)))); - return; + if (generate) + { + emit_insn (gen_rtx_SET (VOIDmode, subtarget, + GEN_INT (val | ~mask))); + emit_insn (gen_adddi3 (dest, subtarget, + GEN_INT (val - (val | ~mask)))); + } + num_insns += 2; + return num_insns; } } @@ -1243,22 +1198,30 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm) || aarch64_uimm12_shift (-val + aarch64_bitmasks[i])) { subtarget = subtargets ? gen_reg_rtx (DImode) : dest; - emit_insn (gen_rtx_SET (VOIDmode, subtarget, - GEN_INT (aarch64_bitmasks[i]))); - emit_insn (gen_adddi3 (dest, subtarget, - GEN_INT (val - aarch64_bitmasks[i]))); - return; + if (generate) + { + emit_insn (gen_rtx_SET (VOIDmode, subtarget, + GEN_INT (aarch64_bitmasks[i]))); + emit_insn (gen_adddi3 (dest, subtarget, + GEN_INT (val - aarch64_bitmasks[i]))); + } + num_insns += 2; + return num_insns; } for (j = 0; j < 64; j += 16, mask <<= 16) { if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask)) { - emit_insn (gen_rtx_SET (VOIDmode, dest, - GEN_INT (aarch64_bitmasks[i]))); - emit_insn (gen_insv_immdi (dest, GEN_INT (j), - GEN_INT ((val >> j) & 0xffff))); - return; + if (generate) + { + emit_insn (gen_rtx_SET (VOIDmode, dest, + GEN_INT (aarch64_bitmasks[i]))); + emit_insn (gen_insv_immdi (dest, GEN_INT (j), + GEN_INT ((val >> j) & 0xffff))); + } + num_insns += 2; + return num_insns; } } } @@ -1274,11 +1237,15 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm) if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j])) { subtarget = subtargets ? gen_reg_rtx (mode) : dest; - emit_insn (gen_rtx_SET (VOIDmode, subtarget, - GEN_INT (aarch64_bitmasks[i]))); - emit_insn (gen_iordi3 (dest, subtarget, - GEN_INT (aarch64_bitmasks[j]))); - return; + if (generate) + { + emit_insn (gen_rtx_SET (VOIDmode, subtarget, + GEN_INT (aarch64_bitmasks[i]))); + emit_insn (gen_iordi3 (dest, subtarget, + GEN_INT (aarch64_bitmasks[j]))); + } + num_insns += 2; + return num_insns; } } else if ((val & aarch64_bitmasks[i]) == val) @@ -1290,11 +1257,15 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm) { subtarget = subtargets ? gen_reg_rtx (mode) : dest; - emit_insn (gen_rtx_SET (VOIDmode, subtarget, - GEN_INT (aarch64_bitmasks[j]))); - emit_insn (gen_anddi3 (dest, subtarget, - GEN_INT (aarch64_bitmasks[i]))); - return; + if (generate) + { + emit_insn (gen_rtx_SET (VOIDmode, subtarget, + GEN_INT (aarch64_bitmasks[j]))); + emit_insn (gen_anddi3 (dest, subtarget, + GEN_INT (aarch64_bitmasks[i]))); + } + num_insns += 2; + return num_insns; } } } @@ -1303,18 +1274,24 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm) { /* Set either first three quarters or all but the third. */ mask = 0xffffll << (16 - first_not_ffff_match); - emit_insn (gen_rtx_SET (VOIDmode, dest, - GEN_INT (val | mask | 0xffffffff00000000ull))); + if (generate) + emit_insn (gen_rtx_SET (VOIDmode, dest, + GEN_INT (val | mask | 0xffffffff00000000ull))); + num_insns ++; /* Now insert other two quarters. */ for (i = first_not_ffff_match + 16, mask <<= (first_not_ffff_match << 1); i < 64; i += 16, mask <<= 16) { if ((val & mask) != mask) - emit_insn (gen_insv_immdi (dest, GEN_INT (i), - GEN_INT ((val >> i) & 0xffff))); + { + if (generate) + emit_insn (gen_insv_immdi (dest, GEN_INT (i), + GEN_INT ((val >> i) & 0xffff))); + num_insns ++; + } } - return; + return num_insns; } simple_sequence: @@ -1326,15 +1303,106 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm) { if (first) { - emit_insn (gen_rtx_SET (VOIDmode, dest, - GEN_INT (val & mask))); + if (generate) + emit_insn (gen_rtx_SET (VOIDmode, dest, + GEN_INT (val & mask))); + num_insns ++; first = false; } else - emit_insn (gen_insv_immdi (dest, GEN_INT (i), - GEN_INT ((val >> i) & 0xffff))); + { + if (generate) + emit_insn (gen_insv_immdi (dest, GEN_INT (i), + GEN_INT ((val >> i) & 0xffff))); + num_insns ++; + } } } + + return num_insns; +} + + +void +aarch64_expand_mov_immediate (rtx dest, rtx imm) +{ + machine_mode mode = GET_MODE (dest); + + gcc_assert (mode == SImode || mode == DImode); + + /* Check on what type of symbol it is. */ + if (GET_CODE (imm) == SYMBOL_REF + || GET_CODE (imm) == LABEL_REF + || GET_CODE (imm) == CONST) + { + rtx mem, base, offset; + enum aarch64_symbol_type sty; + + /* If we have (const (plus symbol offset)), separate out the offset + before we start classifying the symbol. */ + split_const (imm, &base, &offset); + + sty = aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR); + switch (sty) + { + case SYMBOL_FORCE_TO_MEM: + if (offset != const0_rtx + && targetm.cannot_force_const_mem (mode, imm)) + { + gcc_assert (can_create_pseudo_p ()); + base = aarch64_force_temporary (mode, dest, base); + base = aarch64_add_offset (mode, NULL, base, INTVAL (offset)); + aarch64_emit_move (dest, base); + return; + } + mem = force_const_mem (ptr_mode, imm); + gcc_assert (mem); + if (mode != ptr_mode) + mem = gen_rtx_ZERO_EXTEND (mode, mem); + emit_insn (gen_rtx_SET (VOIDmode, dest, mem)); + return; + + case SYMBOL_SMALL_TLSGD: + case SYMBOL_SMALL_TLSDESC: + case SYMBOL_SMALL_GOTTPREL: + case SYMBOL_SMALL_GOT: + case SYMBOL_TINY_GOT: + if (offset != const0_rtx) + { + gcc_assert(can_create_pseudo_p ()); + base = aarch64_force_temporary (mode, dest, base); + base = aarch64_add_offset (mode, NULL, base, INTVAL (offset)); + aarch64_emit_move (dest, base); + return; + } + /* FALLTHRU */ + + case SYMBOL_SMALL_TPREL: + case SYMBOL_SMALL_ABSOLUTE: + case SYMBOL_TINY_ABSOLUTE: + aarch64_load_symref_appropriately (dest, imm, sty); + return; + + default: + gcc_unreachable (); + } + } + + if (!CONST_INT_P (imm)) + { + if (GET_CODE (imm) == HIGH) + emit_insn (gen_rtx_SET (VOIDmode, dest, imm)); + else + { + rtx mem = force_const_mem (mode, imm); + gcc_assert (mem); + emit_insn (gen_rtx_SET (VOIDmode, dest, mem)); + } + + return; + } + + aarch64_internal_mov_immediate (dest, imm, true); } static bool @@ -5234,9 +5302,8 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED, proportionally expensive to the number of instructions required to build that constant. This is true whether we are compiling for SPEED or otherwise. */ - *cost = COSTS_N_INSNS (aarch64_build_constant (0, - INTVAL (x), - false)); + *cost = COSTS_N_INSNS (aarch64_internal_mov_immediate + (gen_rtx_REG (mode, 0), x, false)); } return true; @@ -8035,7 +8102,7 @@ aarch64_mov_operand_p (rtx x, && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0)))) return true; - if (CONST_INT_P (x) && aarch64_move_imm (INTVAL (x), mode)) + if (CONST_INT_P (x)) return true; if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x)) diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index 17570ba..142e8b1 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -746,17 +746,20 @@ if (GET_CODE (operands[0]) == MEM && operands[1] != const0_rtx) operands[1] = force_reg (<MODE>mode, operands[1]); - if (CONSTANT_P (operands[1])) - { - aarch64_expand_mov_immediate (operands[0], operands[1]); - DONE; - } + /* FIXME: RR we still need to fix up what we are doing with + symbol_refs and other types of constants. */ + if (CONSTANT_P (operands[1]) + && !CONST_INT_P (operands[1])) + { + aarch64_expand_mov_immediate (operands[0], operands[1]); + DONE; + } " ) -(define_insn "*movsi_aarch64" - [(set (match_operand:SI 0 "nonimmediate_operand" "=r,k,r,r,r,*w,m, m,r,r ,*w, r,*w") - (match_operand:SI 1 "aarch64_mov_operand" " r,r,k,M,m, m,rZ,*w,S,Ush,rZ,*w,*w"))] +(define_insn_and_split "*movsi_aarch64" + [(set (match_operand:SI 0 "nonimmediate_operand" "=r,k,r,r,r,r,*w,m, m,r,r ,*w, r,*w") + (match_operand:SI 1 "aarch64_mov_operand" " r,r,k,M,n,m, m,rZ,*w,S,Ush,rZ,*w,*w"))] "(register_operand (operands[0], SImode) || aarch64_reg_or_zero (operands[1], SImode))" "@ @@ -764,6 +767,7 @@ mov\\t%w0, %w1 mov\\t%w0, %w1 mov\\t%w0, %1 + # ldr\\t%w0, %1 ldr\\t%s0, %1 str\\t%w1, %0 @@ -773,14 +777,20 @@ fmov\\t%s0, %w1 fmov\\t%w0, %s1 fmov\\t%s0, %s1" - [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,load1,load1,store1,store1,\ + "CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), SImode)" + [(const_int 0)] + "{ + aarch64_expand_mov_immediate (operands[0], operands[1]); + DONE; + }" + [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,load1,load1,store1,store1,\ adr,adr,f_mcr,f_mrc,fmov") - (set_attr "fp" "*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes")] + (set_attr "fp" "*,*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes")] ) -(define_insn "*movdi_aarch64" - [(set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,*w,m, m,r,r, *w, r,*w,w") - (match_operand:DI 1 "aarch64_mov_operand" " r,r,k,N,m, m,rZ,*w,S,Ush,rZ,*w,*w,Dd"))] +(define_insn_and_split "*movdi_aarch64" + [(set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,r,*w,m, m,r,r, *w, r,*w,w") + (match_operand:DI 1 "aarch64_mov_operand" " r,r,k,N,n,m, m,rZ,*w,S,Ush,rZ,*w,*w,Dd"))] "(register_operand (operands[0], DImode) || aarch64_reg_or_zero (operands[1], DImode))" "@ @@ -788,6 +798,7 @@ mov\\t%0, %x1 mov\\t%x0, %1 mov\\t%x0, %1 + # ldr\\t%x0, %1 ldr\\t%d0, %1 str\\t%x1, %0 @@ -798,10 +809,16 @@ fmov\\t%x0, %d1 fmov\\t%d0, %d1 movi\\t%d0, %1" - [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,load1,load1,store1,store1,\ + "(CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), DImode))" + [(const_int 0)] + "{ + aarch64_expand_mov_immediate (operands[0], operands[1]); + DONE; + }" + [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,load1,load1,store1,store1,\ adr,adr,f_mcr,f_mrc,fmov,fmov") - (set_attr "fp" "*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes,*") - (set_attr "simd" "*,*,*,*,*,*,*,*,*,*,*,*,*,yes")] + (set_attr "fp" "*,*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes,*") + (set_attr "simd" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,yes")] ) (define_insn "insv_imm<mode>"