store patterns

James Greenhalgh Mon, 03 Jul 2017 03:48:07 -0700

On Wed, Jun 21, 2017 at 11:50:08AM +0100, James Greenhalgh wrote:
> *ping*


Ping*2

Thanks,
James

> On Mon, Jun 12, 2017 at 02:54:00PM +0100, James Greenhalgh wrote:
> > 
> > Hi,
> > 
> > There seems to be a partial misconception in the AArch64 backend that
> > load1/load2 referred to the number of registers to load, rather than the
> > number of words to load. This patch fixes that using the new "number of
> > byte" types added in the previous patch.
> > 
> > That means using the load_16 and store_16 types that were defined in the
> > previous patch for the first time in the AArch64 backend. To ensure
> > continuity for scheduling models, I've just split this out from load_8.
> > Please update your models if this is very wrong!
> > 
> > Bootstrapped on aarch64-none-linux-gnu with no issue.
> > 
> > OK?
> > 
> > Thanks,
> > James
> > 
> > ---
> > 2017-06-12  James Greenhalgh  <[email protected]>
> > 
> >     * config/aarch64/aarch64.md (movdi_aarch64): Set load/store
> >     types correctly.
> >     (movti_aarch64): Likewise.
> >     (movdf_aarch64): Likewise.
> >     (movtf_aarch64): Likewise.
> >     (load_pairdi): Likewise.
> >     (store_pairdi): Likewise.
> >     (load_pairdf): Likewise.
> >     (store_pairdf): Likewise.
> >     (loadwb_pair<GPI:mode>_<P:mode>): Likewise.
> >     (storewb_pair<GPI:mode>_<P:mode>): Likewise.
> >     (ldr_got_small_<mode>): Likewise.
> >     (ldr_got_small_28k_<mode>): Likewise.
> >     (ldr_got_tiny): Likewise.
> >     * config/aarch64/iterators.md (ldst_sz): New.
> >     (ldpstp_sz): Likewise.
> >     * config/aarch64/thunderx.md (thunderx_storepair): Split store_8
> >     to store_16.
> >     (thunderx_load): Split load_8 to load_16.
> >     * config/aarch64/thunderx2t99.md (thunderx2t99_loadpair): Split
> >     load_8 to load_16.
> >     (thunderx2t99_storepair_basic): Split store_8 to store_16.
> >     * config/arm/xgene1.md (xgene1_load_pair): Split load_8 to load_16.
> >     (xgene1_store_pair): Split store_8 to store_16.
> > 
> 
> > diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
> > index 11295a6..a1385e3 100644
> > --- a/gcc/config/aarch64/aarch64.md
> > +++ b/gcc/config/aarch64/aarch64.md
> > @@ -981,7 +981,7 @@
> >         DONE;
> >      }"
> >    [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,\
> > -                     load_4,load_4,store_4,store_4,\
> > +                     load_8,load_8,store_8,store_8,\
> >                       adr,adr,f_mcr,f_mrc,fmov,neon_move")
> >     (set_attr "fp" "*,*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes,*")
> >     (set_attr "simd" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,yes")]
> > @@ -1026,7 +1026,8 @@
> >     ldr\\t%q0, %1
> >     str\\t%q1, %0"
> >    [(set_attr "type" "multiple,f_mcr,f_mrc,neon_logic_q, \
> > -                        load_8,store_8,store_8,f_loadd,f_stored")
> > +                        load_16,store_16,store_16,\
> > +                             load_16,store_16")
> >     (set_attr "length" "8,8,8,4,4,4,4,4,4")
> >     (set_attr "simd" "*,*,*,yes,*,*,*,*,*")
> >     (set_attr "fp" "*,*,*,*,*,*,*,yes,yes")]
> > @@ -1121,7 +1122,7 @@
> >     str\\t%x1, %0
> >     mov\\t%x0, %x1"
> >    [(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconstd,\
> > -                     f_loadd,f_stored,load_4,store_4,mov_reg")
> > +                     f_loadd,f_stored,load_8,store_8,mov_reg")
> >     (set_attr "simd" "yes,*,*,*,*,*,*,*,*,*")]
> >  )
> >  
> > @@ -1145,7 +1146,7 @@
> >     stp\\t%1, %H1, %0
> >     stp\\txzr, xzr, %0"
> >    [(set_attr "type" "logic_reg,multiple,f_mcr,f_mrc,neon_move_q,f_mcr,\
> > -                     f_loadd,f_stored,load_8,store_8,store_8")
> > +                     f_loadd,f_stored,load_16,store_16,store_16")
> >     (set_attr "length" "4,8,8,8,4,4,4,4,4,4,4")
> >     (set_attr "simd" "yes,*,*,*,yes,*,*,*,*,*,*")]
> >  )
> > @@ -1209,7 +1210,7 @@
> >    "@
> >     ldp\\t%x0, %x2, %1
> >     ldp\\t%d0, %d2, %1"
> > -  [(set_attr "type" "load_8,neon_load1_2reg")
> > +  [(set_attr "type" "load_16,neon_load1_2reg")
> >     (set_attr "fp" "*,yes")]
> >  )
> >  
> > @@ -1244,7 +1245,7 @@
> >    "@
> >     stp\\t%x1, %x3, %0
> >     stp\\t%d1, %d3, %0"
> > -  [(set_attr "type" "store_8,neon_store1_2reg")
> > +  [(set_attr "type" "store_16,neon_store1_2reg")
> >     (set_attr "fp" "*,yes")]
> >  )
> >  
> > @@ -1278,7 +1279,7 @@
> >    "@
> >     ldp\\t%d0, %d2, %1
> >     ldp\\t%x0, %x2, %1"
> > -  [(set_attr "type" "neon_load1_2reg,load_8")
> > +  [(set_attr "type" "neon_load1_2reg,load_16")
> >     (set_attr "fp" "yes,*")]
> >  )
> >  
> > @@ -1312,7 +1313,7 @@
> >    "@
> >     stp\\t%d1, %d3, %0
> >     stp\\t%x1, %x3, %0"
> > -  [(set_attr "type" "neon_store1_2reg,store_8")
> > +  [(set_attr "type" "neon_store1_2reg,store_16")
> >     (set_attr "fp" "yes,*")]
> >  )
> >  
> > @@ -1330,7 +1331,7 @@
> >                     (match_operand:P 5 "const_int_operand" "n"))))])]
> >    "INTVAL (operands[5]) == GET_MODE_SIZE (<GPI:MODE>mode)"
> >    "ldp\\t%<w>2, %<w>3, [%1], %4"
> > -  [(set_attr "type" "load_8")]
> > +  [(set_attr "type" "load_<ldpstp_sz>")]
> >  )
> >  
> >  (define_insn "loadwb_pair<GPF:mode>_<P:mode>"
> > @@ -1363,7 +1364,7 @@
> >            (match_operand:GPI 3 "register_operand" "r"))])]
> >    "INTVAL (operands[5]) == INTVAL (operands[4]) + GET_MODE_SIZE 
> > (<GPI:MODE>mode)"
> >    "stp\\t%<w>2, %<w>3, [%0, %4]!"
> > -  [(set_attr "type" "store_8")]
> > +  [(set_attr "type" "store_<ldpstp_sz>")]
> >  )
> >  
> >  (define_insn "storewb_pair<GPF:mode>_<P:mode>"
> > @@ -5139,7 +5140,7 @@
> >                 UNSPEC_GOTSMALLPIC))]
> >    ""
> >    "ldr\\t%<w>0, [%1, #:got_lo12:%a2]"
> > -  [(set_attr "type" "load_4")]
> > +  [(set_attr "type" "load_<ldst_sz>")]
> >  )
> >  
> >  (define_insn "ldr_got_small_sidi"
> > @@ -5162,7 +5163,7 @@
> >                 UNSPEC_GOTSMALLPIC28K))]
> >    ""
> >    "ldr\\t%<w>0, [%1, #:<got_modifier>:%a2]"
> > -  [(set_attr "type" "load_4")]
> > +  [(set_attr "type" "load_<ldst_sz>")]
> >  )
> >  
> >  (define_insn "ldr_got_small_28k_sidi"
> > @@ -5183,7 +5184,7 @@
> >                UNSPEC_GOTTINYPIC))]
> >    ""
> >    "ldr\\t%0, %L1"
> > -  [(set_attr "type" "load_4")]
> > +  [(set_attr "type" "load_8")]
> >  )
> >  
> >  (define_insn "aarch64_load_tp_hard"
> > diff --git a/gcc/config/aarch64/iterators.md 
> > b/gcc/config/aarch64/iterators.md
> > index 43be7fd..a65c3aa 100644
> > --- a/gcc/config/aarch64/iterators.md
> > +++ b/gcc/config/aarch64/iterators.md
> > @@ -384,6 +384,11 @@
> >  ;; 32-bit version and "%x0" in the 64-bit version.
> >  (define_mode_attr w [(QI "w") (HI "w") (SI "w") (DI "x") (SF "s") (DF 
> > "d")])
> >  
> > +;; The size of access, in bytes.
> > +(define_mode_attr ldst_sz [(SI "4") (DI "8")])
> > +;; Likewise for load/store pair.
> > +(define_mode_attr ldpstp_sz [(SI "8") (DI "16")])
> > +
> >  ;; For inequal width int to float conversion
> >  (define_mode_attr w1 [(HF "w") (SF "w") (DF "x")])
> >  (define_mode_attr w2 [(HF "x") (SF "x") (DF "w")])
> > diff --git a/gcc/config/aarch64/thunderx.md b/gcc/config/aarch64/thunderx.md
> > index c18da2f..84ac6cd 100644
> > --- a/gcc/config/aarch64/thunderx.md
> > +++ b/gcc/config/aarch64/thunderx.md
> > @@ -100,7 +100,7 @@
> >  ;; Store pair are single issued
> >  (define_insn_reservation "thunderx_storepair" 1
> >    (and (eq_attr "tune" "thunderx")
> > -       (eq_attr "type" "store_8"))
> > +       (eq_attr "type" "store_8,store_16"))
> >    "thunderx_pipe0 + thunderx_pipe1")
> >  
> >  ;; Prefetch are single issued
> > @@ -112,7 +112,7 @@
> >  ;; loads (and load pairs) from L1 take 3 cycles in pipe 0
> >  (define_insn_reservation "thunderx_load" 3
> >    (and (eq_attr "tune" "thunderx")
> > -       (eq_attr "type" "load_4, load_8"))
> > +       (eq_attr "type" "load_4, load_8, load_16"))
> >    "thunderx_pipe0")
> >  
> >  (define_insn_reservation "thunderx_brj" 1
> > diff --git a/gcc/config/aarch64/thunderx2t99.md 
> > b/gcc/config/aarch64/thunderx2t99.md
> > index 632396f..4e39610 100644
> > --- a/gcc/config/aarch64/thunderx2t99.md
> > +++ b/gcc/config/aarch64/thunderx2t99.md
> > @@ -128,7 +128,7 @@
> >  
> >  (define_insn_reservation "thunderx2t99_loadpair" 5
> >    (and (eq_attr "tune" "thunderx2t99")
> > -       (eq_attr "type" "load_8"))
> > +       (eq_attr "type" "load_8,load_16"))
> >    "thunderx2t99_i012,thunderx2t99_ls01")
> >  
> >  (define_insn_reservation "thunderx2t99_store_basic" 1
> > @@ -138,7 +138,7 @@
> >  
> >  (define_insn_reservation "thunderx2t99_storepair_basic" 1
> >    (and (eq_attr "tune" "thunderx2t99")
> > -       (eq_attr "type" "store_8"))
> > +       (eq_attr "type" "store_8,store_16"))
> >    "thunderx2t99_ls01,thunderx2t99_sd")
> >  
> >  ;; FP data processing instructions.
> > diff --git a/gcc/config/arm/xgene1.md b/gcc/config/arm/xgene1.md
> > index 7e70408..0b457ee 100644
> > --- a/gcc/config/arm/xgene1.md
> > +++ b/gcc/config/arm/xgene1.md
> > @@ -92,12 +92,12 @@
> >  
> >  (define_insn_reservation "xgene1_load_pair" 6
> >    (and (eq_attr "tune" "xgene1")
> > -       (eq_attr "type" "load_8"))
> > +       (eq_attr "type" "load_8, load_16"))
> >    "xgene1_decodeIsolated")
> >  
> >  (define_insn_reservation "xgene1_store_pair" 2
> >    (and (eq_attr "tune" "xgene1")
> > -       (eq_attr "type" "store_8"))
> > +       (eq_attr "type" "store_8, store_16"))
> >    "xgene1_decodeIsolated")
> >  
> >  (define_insn_reservation "xgene1_fp_load1" 10
>

Re: [Patch AArch64 2/2] Fix memory sizes to load/store patterns

Reply via email to