On Wed, Jun 21, 2017 at 11:50:08AM +0100, James Greenhalgh wrote: > *ping*
Ping*2 Thanks, James > On Mon, Jun 12, 2017 at 02:54:00PM +0100, James Greenhalgh wrote: > > > > Hi, > > > > There seems to be a partial misconception in the AArch64 backend that > > load1/load2 referred to the number of registers to load, rather than the > > number of words to load. This patch fixes that using the new "number of > > byte" types added in the previous patch. > > > > That means using the load_16 and store_16 types that were defined in the > > previous patch for the first time in the AArch64 backend. To ensure > > continuity for scheduling models, I've just split this out from load_8. > > Please update your models if this is very wrong! > > > > Bootstrapped on aarch64-none-linux-gnu with no issue. > > > > OK? > > > > Thanks, > > James > > > > --- > > 2017-06-12 James Greenhalgh <james.greenha...@arm.com> > > > > * config/aarch64/aarch64.md (movdi_aarch64): Set load/store > > types correctly. > > (movti_aarch64): Likewise. > > (movdf_aarch64): Likewise. > > (movtf_aarch64): Likewise. > > (load_pairdi): Likewise. > > (store_pairdi): Likewise. > > (load_pairdf): Likewise. > > (store_pairdf): Likewise. > > (loadwb_pair<GPI:mode>_<P:mode>): Likewise. > > (storewb_pair<GPI:mode>_<P:mode>): Likewise. > > (ldr_got_small_<mode>): Likewise. > > (ldr_got_small_28k_<mode>): Likewise. > > (ldr_got_tiny): Likewise. > > * config/aarch64/iterators.md (ldst_sz): New. > > (ldpstp_sz): Likewise. > > * config/aarch64/thunderx.md (thunderx_storepair): Split store_8 > > to store_16. > > (thunderx_load): Split load_8 to load_16. > > * config/aarch64/thunderx2t99.md (thunderx2t99_loadpair): Split > > load_8 to load_16. > > (thunderx2t99_storepair_basic): Split store_8 to store_16. > > * config/arm/xgene1.md (xgene1_load_pair): Split load_8 to load_16. > > (xgene1_store_pair): Split store_8 to store_16. > > > > > diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md > > index 11295a6..a1385e3 100644 > > --- a/gcc/config/aarch64/aarch64.md > > +++ b/gcc/config/aarch64/aarch64.md > > @@ -981,7 +981,7 @@ > > DONE; > > }" > > [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,\ > > - load_4,load_4,store_4,store_4,\ > > + load_8,load_8,store_8,store_8,\ > > adr,adr,f_mcr,f_mrc,fmov,neon_move") > > (set_attr "fp" "*,*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes,*") > > (set_attr "simd" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,yes")] > > @@ -1026,7 +1026,8 @@ > > ldr\\t%q0, %1 > > str\\t%q1, %0" > > [(set_attr "type" "multiple,f_mcr,f_mrc,neon_logic_q, \ > > - load_8,store_8,store_8,f_loadd,f_stored") > > + load_16,store_16,store_16,\ > > + load_16,store_16") > > (set_attr "length" "8,8,8,4,4,4,4,4,4") > > (set_attr "simd" "*,*,*,yes,*,*,*,*,*") > > (set_attr "fp" "*,*,*,*,*,*,*,yes,yes")] > > @@ -1121,7 +1122,7 @@ > > str\\t%x1, %0 > > mov\\t%x0, %x1" > > [(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconstd,\ > > - f_loadd,f_stored,load_4,store_4,mov_reg") > > + f_loadd,f_stored,load_8,store_8,mov_reg") > > (set_attr "simd" "yes,*,*,*,*,*,*,*,*,*")] > > ) > > > > @@ -1145,7 +1146,7 @@ > > stp\\t%1, %H1, %0 > > stp\\txzr, xzr, %0" > > [(set_attr "type" "logic_reg,multiple,f_mcr,f_mrc,neon_move_q,f_mcr,\ > > - f_loadd,f_stored,load_8,store_8,store_8") > > + f_loadd,f_stored,load_16,store_16,store_16") > > (set_attr "length" "4,8,8,8,4,4,4,4,4,4,4") > > (set_attr "simd" "yes,*,*,*,yes,*,*,*,*,*,*")] > > ) > > @@ -1209,7 +1210,7 @@ > > "@ > > ldp\\t%x0, %x2, %1 > > ldp\\t%d0, %d2, %1" > > - [(set_attr "type" "load_8,neon_load1_2reg") > > + [(set_attr "type" "load_16,neon_load1_2reg") > > (set_attr "fp" "*,yes")] > > ) > > > > @@ -1244,7 +1245,7 @@ > > "@ > > stp\\t%x1, %x3, %0 > > stp\\t%d1, %d3, %0" > > - [(set_attr "type" "store_8,neon_store1_2reg") > > + [(set_attr "type" "store_16,neon_store1_2reg") > > (set_attr "fp" "*,yes")] > > ) > > > > @@ -1278,7 +1279,7 @@ > > "@ > > ldp\\t%d0, %d2, %1 > > ldp\\t%x0, %x2, %1" > > - [(set_attr "type" "neon_load1_2reg,load_8") > > + [(set_attr "type" "neon_load1_2reg,load_16") > > (set_attr "fp" "yes,*")] > > ) > > > > @@ -1312,7 +1313,7 @@ > > "@ > > stp\\t%d1, %d3, %0 > > stp\\t%x1, %x3, %0" > > - [(set_attr "type" "neon_store1_2reg,store_8") > > + [(set_attr "type" "neon_store1_2reg,store_16") > > (set_attr "fp" "yes,*")] > > ) > > > > @@ -1330,7 +1331,7 @@ > > (match_operand:P 5 "const_int_operand" "n"))))])] > > "INTVAL (operands[5]) == GET_MODE_SIZE (<GPI:MODE>mode)" > > "ldp\\t%<w>2, %<w>3, [%1], %4" > > - [(set_attr "type" "load_8")] > > + [(set_attr "type" "load_<ldpstp_sz>")] > > ) > > > > (define_insn "loadwb_pair<GPF:mode>_<P:mode>" > > @@ -1363,7 +1364,7 @@ > > (match_operand:GPI 3 "register_operand" "r"))])] > > "INTVAL (operands[5]) == INTVAL (operands[4]) + GET_MODE_SIZE > > (<GPI:MODE>mode)" > > "stp\\t%<w>2, %<w>3, [%0, %4]!" > > - [(set_attr "type" "store_8")] > > + [(set_attr "type" "store_<ldpstp_sz>")] > > ) > > > > (define_insn "storewb_pair<GPF:mode>_<P:mode>" > > @@ -5139,7 +5140,7 @@ > > UNSPEC_GOTSMALLPIC))] > > "" > > "ldr\\t%<w>0, [%1, #:got_lo12:%a2]" > > - [(set_attr "type" "load_4")] > > + [(set_attr "type" "load_<ldst_sz>")] > > ) > > > > (define_insn "ldr_got_small_sidi" > > @@ -5162,7 +5163,7 @@ > > UNSPEC_GOTSMALLPIC28K))] > > "" > > "ldr\\t%<w>0, [%1, #:<got_modifier>:%a2]" > > - [(set_attr "type" "load_4")] > > + [(set_attr "type" "load_<ldst_sz>")] > > ) > > > > (define_insn "ldr_got_small_28k_sidi" > > @@ -5183,7 +5184,7 @@ > > UNSPEC_GOTTINYPIC))] > > "" > > "ldr\\t%0, %L1" > > - [(set_attr "type" "load_4")] > > + [(set_attr "type" "load_8")] > > ) > > > > (define_insn "aarch64_load_tp_hard" > > diff --git a/gcc/config/aarch64/iterators.md > > b/gcc/config/aarch64/iterators.md > > index 43be7fd..a65c3aa 100644 > > --- a/gcc/config/aarch64/iterators.md > > +++ b/gcc/config/aarch64/iterators.md > > @@ -384,6 +384,11 @@ > > ;; 32-bit version and "%x0" in the 64-bit version. > > (define_mode_attr w [(QI "w") (HI "w") (SI "w") (DI "x") (SF "s") (DF > > "d")]) > > > > +;; The size of access, in bytes. > > +(define_mode_attr ldst_sz [(SI "4") (DI "8")]) > > +;; Likewise for load/store pair. > > +(define_mode_attr ldpstp_sz [(SI "8") (DI "16")]) > > + > > ;; For inequal width int to float conversion > > (define_mode_attr w1 [(HF "w") (SF "w") (DF "x")]) > > (define_mode_attr w2 [(HF "x") (SF "x") (DF "w")]) > > diff --git a/gcc/config/aarch64/thunderx.md b/gcc/config/aarch64/thunderx.md > > index c18da2f..84ac6cd 100644 > > --- a/gcc/config/aarch64/thunderx.md > > +++ b/gcc/config/aarch64/thunderx.md > > @@ -100,7 +100,7 @@ > > ;; Store pair are single issued > > (define_insn_reservation "thunderx_storepair" 1 > > (and (eq_attr "tune" "thunderx") > > - (eq_attr "type" "store_8")) > > + (eq_attr "type" "store_8,store_16")) > > "thunderx_pipe0 + thunderx_pipe1") > > > > ;; Prefetch are single issued > > @@ -112,7 +112,7 @@ > > ;; loads (and load pairs) from L1 take 3 cycles in pipe 0 > > (define_insn_reservation "thunderx_load" 3 > > (and (eq_attr "tune" "thunderx") > > - (eq_attr "type" "load_4, load_8")) > > + (eq_attr "type" "load_4, load_8, load_16")) > > "thunderx_pipe0") > > > > (define_insn_reservation "thunderx_brj" 1 > > diff --git a/gcc/config/aarch64/thunderx2t99.md > > b/gcc/config/aarch64/thunderx2t99.md > > index 632396f..4e39610 100644 > > --- a/gcc/config/aarch64/thunderx2t99.md > > +++ b/gcc/config/aarch64/thunderx2t99.md > > @@ -128,7 +128,7 @@ > > > > (define_insn_reservation "thunderx2t99_loadpair" 5 > > (and (eq_attr "tune" "thunderx2t99") > > - (eq_attr "type" "load_8")) > > + (eq_attr "type" "load_8,load_16")) > > "thunderx2t99_i012,thunderx2t99_ls01") > > > > (define_insn_reservation "thunderx2t99_store_basic" 1 > > @@ -138,7 +138,7 @@ > > > > (define_insn_reservation "thunderx2t99_storepair_basic" 1 > > (and (eq_attr "tune" "thunderx2t99") > > - (eq_attr "type" "store_8")) > > + (eq_attr "type" "store_8,store_16")) > > "thunderx2t99_ls01,thunderx2t99_sd") > > > > ;; FP data processing instructions. > > diff --git a/gcc/config/arm/xgene1.md b/gcc/config/arm/xgene1.md > > index 7e70408..0b457ee 100644 > > --- a/gcc/config/arm/xgene1.md > > +++ b/gcc/config/arm/xgene1.md > > @@ -92,12 +92,12 @@ > > > > (define_insn_reservation "xgene1_load_pair" 6 > > (and (eq_attr "tune" "xgene1") > > - (eq_attr "type" "load_8")) > > + (eq_attr "type" "load_8, load_16")) > > "xgene1_decodeIsolated") > > > > (define_insn_reservation "xgene1_store_pair" 2 > > (and (eq_attr "tune" "xgene1") > > - (eq_attr "type" "store_8")) > > + (eq_attr "type" "store_8, store_16")) > > "xgene1_decodeIsolated") > > > > (define_insn_reservation "xgene1_fp_load1" 10 >