Some recent work showed me that many of the latency values in the documentation I have for Niagara-4 were simply inaccurate. So I went through the instruction set and tried to determine the real values by hand using test programs.
In particular the logical VIS operation, when working on 64-bit operands, can largely execute in 3 cycles instead of 11. 64-bit moves between float and integer registers can execute in 1 cycle. We also now properly represent the special third slot that stores can sometimes be scheduled into. Finally, we were emitting 'fsrc1' as the VIS move which on are quite expensive on Niagara-4 and later. 'fsrc2' executes in 1 cycle vs. 11 for 'fsrc1'. Committed to mainline. * config/sparc/sparc.md (type attribute): Add new types 'visl' (VIS logical operation), 'vismv' (VIS move), and 'pdistn'. Rename 'fgm_pdist' to 'pdist'. (*movsi_insn): Use vismv and visl. (*movdi_insn_sp64): Likewise. (*movsf_insn): Likewise. (*movdf_insn_sp64): Likewise. (*mov<VM32:mode>_insn): Likewise, use 'fsrc2s' instead of 'fsrc1s'. (*mov<VM64:mode>_insn_sp64): Likewise, use 'fsrc2s' instead of 'fsrc1s'. (*mov<VM64:mode>_insn_sp32): Likewise, use 'fsrc2s' instead of 'fsrc1s'. (VIS logical instructions): Mark as visl. (pdist_vis): Use 'pdist'. (pditsn<mode>_vis): Use 'pdistn'. * config/sparc/ultra1_2.md: Adjust for new VIS attribute types. * config/sparc/ultra3.md: Likewise. * config/sparc/niagara.md: Likewise. * config/sparc/niagara2.md: Likewise. * config/sparc/niagara4.md: Add cpu units "n4_slot2" and "n4_load_store" for special store scheduling. Use them in load and store reservations. Integer divide and multiply can only issue in slot-1. Represent 1-cycle VIS moves and 3-cycle VIS logic operations. --- gcc/ChangeLog | 25 ++++++++++++++++++++++ gcc/config/sparc/niagara.md | 2 +- gcc/config/sparc/niagara2.md | 4 ++-- gcc/config/sparc/niagara4.md | 49 ++++++++++++++++++++++++++++++++++++-------- gcc/config/sparc/sparc.md | 46 ++++++++++++++++++++--------------------- gcc/config/sparc/ultra1_2.md | 6 +++--- gcc/config/sparc/ultra3.md | 4 ++-- 7 files changed, 96 insertions(+), 40 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index a7d550f..f428d07 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,28 @@ +2012-10-09 David S. Miller <da...@davemloft.net> + + * config/sparc/sparc.md (type attribute): Add new types 'visl' + (VIS logical operation), 'vismv' (VIS move), and 'pdistn'. Rename + 'fgm_pdist' to 'pdist'. + (*movsi_insn): Use vismv and visl. + (*movdi_insn_sp64): Likewise. + (*movsf_insn): Likewise. + (*movdf_insn_sp64): Likewise. + (*mov<VM32:mode>_insn): Likewise, use 'fsrc2s' instead of 'fsrc1s'. + (*mov<VM64:mode>_insn_sp64): Likewise, use 'fsrc2s' instead of 'fsrc1s'. + (*mov<VM64:mode>_insn_sp32): Likewise, use 'fsrc2s' instead of 'fsrc1s'. + (VIS logical instructions): Mark as visl. + (pdist_vis): Use 'pdist'. + (pditsn<mode>_vis): Use 'pdistn'. + * config/sparc/ultra1_2.md: Adjust for new VIS attribute types. + * config/sparc/ultra3.md: Likewise. + * config/sparc/niagara.md: Likewise. + * config/sparc/niagara2.md: Likewise. + * config/sparc/niagara4.md: Add cpu units "n4_slot2" and + "n4_load_store" for special store scheduling. Use them in load + and store reservations. Integer divide and multiply can only + issue in slot-1. Represent 1-cycle VIS moves and 3-cycle VIS + logic operations. + 2012-10-10 Dehao Chen <de...@google.com> * tree-eh.c (lower_try_finally_onedest): Set correct location for diff --git a/gcc/config/sparc/niagara.md b/gcc/config/sparc/niagara.md index c7db7b9..e471b84 100644 --- a/gcc/config/sparc/niagara.md +++ b/gcc/config/sparc/niagara.md @@ -114,5 +114,5 @@ */ (define_insn_reservation "niag_vis" 8 (and (eq_attr "cpu" "niagara") - (eq_attr "type" "fga,fgm_pack,fgm_mul,fgm_pdist,edge,edgen,gsr,array")) + (eq_attr "type" "fga,visl,vismv,fgm_pack,fgm_mul,pdist,edge,edgen,gsr,array")) "niag_pipe*8") diff --git a/gcc/config/sparc/niagara2.md b/gcc/config/sparc/niagara2.md index 59499aa..856fc01 100644 --- a/gcc/config/sparc/niagara2.md +++ b/gcc/config/sparc/niagara2.md @@ -111,10 +111,10 @@ (define_insn_reservation "niag2_vis" 6 (and (eq_attr "cpu" "niagara2") - (eq_attr "type" "fga,fgm_pack,fgm_mul,fgm_pdist,edge,edgen,array,gsr")) + (eq_attr "type" "fga,vismv,visl,fgm_pack,fgm_mul,pdist,edge,edgen,array,gsr")) "niag2_pipe*6") (define_insn_reservation "niag3_vis" 9 (and (eq_attr "cpu" "niagara3") - (eq_attr "type" "fga,fgm_pack,fgm_mul,fgm_pdist,edge,edgen,array,gsr")) + (eq_attr "type" "fga,vismv,visl,fgm_pack,fgm_mul,pdist,pdistn,edge,edgen,array,gsr")) "niag2_pipe*9") diff --git a/gcc/config/sparc/niagara4.md b/gcc/config/sparc/niagara4.md index f1f83b6..272c8ff 100644 --- a/gcc/config/sparc/niagara4.md +++ b/gcc/config/sparc/niagara4.md @@ -19,12 +19,14 @@ (define_automaton "niagara4_0") -(define_cpu_unit "n4_slot0,n4_slot1" "niagara4_0") -(define_reservation "n4_single_issue" "n4_slot0 + n4_slot1") +(define_cpu_unit "n4_slot0,n4_slot1,n4_slot2" "niagara4_0") +(define_reservation "n4_single_issue" "n4_slot0 + n4_slot1 + n4_slot2") + +(define_cpu_unit "n4_load_store" "niagara4_0") (define_insn_reservation "n4_single" 1 (and (eq_attr "cpu" "niagara4") - (eq_attr "type" "multi,savew,flushw,iflush,trap,gsr")) + (eq_attr "type" "multi,savew,flushw,iflush,trap")) "n4_single_issue") (define_insn_reservation "n4_integer" 1 @@ -35,22 +37,22 @@ (define_insn_reservation "n4_imul" 12 (and (eq_attr "cpu" "niagara4") (eq_attr "type" "imul")) - "(n4_slot0 | n4_slot1), nothing*11") + "n4_slot1, nothing*11") (define_insn_reservation "n4_idiv" 35 (and (eq_attr "cpu" "niagara4") (eq_attr "type" "idiv")) - "(n4_slot0 | n4_slot1), nothing*34") + "n4_slot1, nothing*34") (define_insn_reservation "n4_load" 5 (and (eq_attr "cpu" "niagara4") (eq_attr "type" "load,fpload,sload")) - "n4_slot0, nothing*4") + "(n4_slot0 + n4_load_store), nothing*4") (define_insn_reservation "n4_store" 1 (and (eq_attr "cpu" "niagara4") (eq_attr "type" "store,fpstore")) - "n4_slot0") + "(n4_slot0 | n4_slot2) + n4_load_store") (define_insn_reservation "n4_cti" 2 (and (eq_attr "cpu" "niagara4") @@ -67,9 +69,38 @@ (eq_attr "type" "array,edge,edgen")) "n4_slot1, nothing*11") -(define_insn_reservation "n4_vis" 11 +(define_insn_reservation "n4_vis_move_1cycle" 1 + (and (eq_attr "cpu" "niagara4") + (and (eq_attr "type" "vismv") + (eq_attr "fptype" "double"))) + "n4_slot1") + +(define_insn_reservation "n4_vis_move_11cycle" 11 + (and (eq_attr "cpu" "niagara4") + (and (eq_attr "type" "vismv") + (eq_attr "fptype" "single"))) + "n4_slot1, nothing*10") + +(define_insn_reservation "n4_vis_logical" 3 + (and (eq_attr "cpu" "niagara4") + (and (eq_attr "type" "visl,pdistn") + (eq_attr "fptype" "double"))) + "n4_slot1, nothing*2") + +(define_insn_reservation "n4_vis_logical_11cycle" 11 + (and (eq_attr "cpu" "niagara4") + (and (eq_attr "type" "visl") + (eq_attr "fptype" "single"))) + "n4_slot1, nothing*10") + +(define_insn_reservation "n4_vis_fga" 11 + (and (eq_attr "cpu" "niagara4") + (eq_attr "type" "fga,gsr")) + "n4_slot1, nothing*10") + +(define_insn_reservation "n4_vis_fgm" 11 (and (eq_attr "cpu" "niagara4") - (eq_attr "type" "fga,fgm_pack,fgm_mul,fgm_pdist")) + (eq_attr "type" "fgm_pack,fgm_mul,pdist")) "n4_slot1, nothing*10") (define_insn_reservation "n4_fpdivs" 24 diff --git a/gcc/config/sparc/sparc.md b/gcc/config/sparc/sparc.md index 3e85461..f604f46 100644 --- a/gcc/config/sparc/sparc.md +++ b/gcc/config/sparc/sparc.md @@ -264,7 +264,7 @@ fpcmp, fpmul,fpdivs,fpdivd, fpsqrts,fpsqrtd, - fga,fgm_pack,fgm_mul,fgm_pdist,edge,edgen,gsr,array, + fga,visl,vismv,fgm_pack,fgm_mul,pdist,pdistn,edge,edgen,gsr,array, cmove, ialuX, multi,savew,flushw,iflush,trap" @@ -1457,7 +1457,7 @@ st\t%1, %0 fzeros\t%0 fones\t%0" - [(set_attr "type" "*,*,load,store,*,*,fpmove,fpload,fpstore,fga,fga") + [(set_attr "type" "*,*,load,store,vismv,vismv,fpmove,fpload,fpstore,visl,visl") (set_attr "cpu_feature" "*,*,*,*,vis3,vis3,*,*,*,vis,vis")]) (define_insn "*movsi_lo_sum" @@ -1622,7 +1622,7 @@ std\t%1, %0 fzero\t%0 fone\t%0" - [(set_attr "type" "store,store,store,load,*,*,*,*,fpstore,fpload,*,*,fpmove,*,*,*,fpload,fpstore,fga,fga") + [(set_attr "type" "store,store,store,load,*,*,*,*,fpstore,fpload,*,*,fpmove,*,*,*,fpload,fpstore,visl,visl") (set_attr "length" "*,2,*,*,2,2,2,2,*,*,2,2,*,2,2,2,*,*,*,*") (set_attr "fptype" "*,*,*,*,*,*,*,*,*,*,*,*,double,*,*,*,*,*,double,double") (set_attr "cpu_feature" "v9,*,*,*,*,*,*,*,fpu,fpu,fpu,fpu,v9,fpunotv9,vis3,vis3,fpu,fpu,vis,vis")]) @@ -1645,7 +1645,7 @@ std\t%1, %0 fzero\t%0 fone\t%0" - [(set_attr "type" "*,*,load,store,*,*,fpmove,fpload,fpstore,fga,fga") + [(set_attr "type" "*,*,load,store,vismv,vismv,fpmove,fpload,fpstore,visl,visl") (set_attr "fptype" "*,*,*,*,*,*,double,*,*,double,double") (set_attr "cpu_feature" "*,*,*,*,vis3,vis3,*,*,*,vis,vis")]) @@ -2251,7 +2251,7 @@ gcc_unreachable (); } } - [(set_attr "type" "fga,fga,fpmove,*,*,*,*,*,fpload,load,fpstore,store") + [(set_attr "type" "visl,visl,fpmove,*,*,*,vismv,vismv,fpload,load,fpstore,store") (set_attr "cpu_feature" "vis,vis,fpu,*,*,*,vis3,vis3,fpu,*,fpu,*")]) ;; The following 3 patterns build SFmode constants in integer registers. @@ -2323,7 +2323,7 @@ # # #" - [(set_attr "type" "fga,fga,fpmove,*,*,*,fpload,store,fpstore,load,store,*,*,*,*") + [(set_attr "type" "visl,visl,fpmove,*,*,*,fpload,store,fpstore,load,store,*,*,*,*") (set_attr "length" "*,*,*,2,2,2,*,*,*,*,*,2,2,2,2") (set_attr "fptype" "double,double,double,*,*,*,*,*,*,*,*,*,*,*,*") (set_attr "cpu_feature" "vis,vis,v9,fpunotv9,vis3,vis3,fpu,v9,fpu,*,*,fpu,*,*,fpu")]) @@ -2346,7 +2346,7 @@ ldx\t%1, %0 stx\t%r1, %0 #" - [(set_attr "type" "fga,fga,fpmove,*,*,load,store,*,load,store,*") + [(set_attr "type" "visl,visl,fpmove,vismv,vismv,load,store,*,load,store,*") (set_attr "length" "*,*,*,*,*,*,*,*,*,*,2") (set_attr "fptype" "double,double,double,double,double,*,*,*,*,*,*") (set_attr "cpu_feature" "vis,vis,fpu,vis3,vis3,fpu,fpu,*,*,*,*")]) @@ -7876,7 +7876,7 @@ "@ fzeros\t%0 fones\t%0 - fsrc1s\t%1, %0 + fsrc2s\t%1, %0 ld\t%1, %0 st\t%1, %0 st\t%r1, %0 @@ -7885,7 +7885,7 @@ mov\t%1, %0 movstouw\t%1, %0 movwtos\t%1, %0" - [(set_attr "type" "fga,fga,fga,fpload,fpstore,store,load,store,*,*,*") + [(set_attr "type" "visl,visl,vismv,fpload,fpstore,store,load,store,*,vismv,vismv") (set_attr "cpu_feature" "vis,vis,vis,*,*,*,*,*,*,vis3,vis3")]) (define_insn "*mov<VM64:mode>_insn_sp64" @@ -7898,7 +7898,7 @@ "@ fzero\t%0 fone\t%0 - fsrc1\t%1, %0 + fsrc2\t%1, %0 ldd\t%1, %0 std\t%1, %0 stx\t%r1, %0 @@ -7907,7 +7907,7 @@ movdtox\t%1, %0 movxtod\t%1, %0 mov\t%1, %0" - [(set_attr "type" "fga,fga,fga,fpload,fpstore,store,load,store,*,*,*") + [(set_attr "type" "visl,visl,vismv,fpload,fpstore,store,load,store,vismv,vismv,*") (set_attr "cpu_feature" "vis,vis,vis,*,*,*,*,*,vis3,vis3,*")]) (define_insn "*mov<VM64:mode>_insn_sp32" @@ -7920,7 +7920,7 @@ "@ fzero\t%0 fone\t%0 - fsrc1\t%1, %0 + fsrc2\t%1, %0 # # ldd\t%1, %0 @@ -7930,7 +7930,7 @@ std\t%1, %0 # #" - [(set_attr "type" "fga,fga,fga,*,*,fpload,fpstore,store,load,store,*,*") + [(set_attr "type" "visl,visl,vismv,*,*,fpload,fpstore,store,load,store,*,*") (set_attr "length" "*,*,*,2,2,*,*,*,*,*,2,2") (set_attr "cpu_feature" "vis,vis,vis,vis3,vis3,*,*,*,*,*,*,*")]) @@ -8025,7 +8025,7 @@ (match_operand:VL 2 "register_operand" "<vconstr>")))] "TARGET_VIS" "f<vlinsn><vlsuf>\t%1, %2, %0" - [(set_attr "type" "fga") + [(set_attr "type" "visl") (set_attr "fptype" "<vfptype>")]) (define_insn "*not_<code><mode>3" @@ -8034,7 +8034,7 @@ (match_operand:VL 2 "register_operand" "<vconstr>"))))] "TARGET_VIS" "f<vlninsn><vlsuf>\t%1, %2, %0" - [(set_attr "type" "fga") + [(set_attr "type" "visl") (set_attr "fptype" "<vfptype>")]) ;; (ior (not (op1)) (not (op2))) is the canonical form of NAND. @@ -8044,7 +8044,7 @@ (not:VL (match_operand:VL 2 "register_operand" "<vconstr>"))))] "TARGET_VIS" "fnand<vlsuf>\t%1, %2, %0" - [(set_attr "type" "fga") + [(set_attr "type" "visl") (set_attr "fptype" "<vfptype>")]) (define_code_iterator vlnotop [ior and]) @@ -8055,7 +8055,7 @@ (match_operand:VL 2 "register_operand" "<vconstr>")))] "TARGET_VIS" "f<vlinsn>not1<vlsuf>\t%1, %2, %0" - [(set_attr "type" "fga") + [(set_attr "type" "visl") (set_attr "fptype" "<vfptype>")]) (define_insn "*<code>_not2<mode>_vis" @@ -8064,7 +8064,7 @@ (not:VL (match_operand:VL 2 "register_operand" "<vconstr>"))))] "TARGET_VIS" "f<vlinsn>not2<vlsuf>\t%1, %2, %0" - [(set_attr "type" "fga") + [(set_attr "type" "visl") (set_attr "fptype" "<vfptype>")]) (define_insn "one_cmpl<mode>2" @@ -8072,7 +8072,7 @@ (not:VL (match_operand:VL 1 "register_operand" "<vconstr>")))] "TARGET_VIS" "fnot1<vlsuf>\t%1, %0" - [(set_attr "type" "fga") + [(set_attr "type" "visl") (set_attr "fptype" "<vfptype>")]) ;; Hard to generate VIS instructions. We have builtins for these. @@ -8351,7 +8351,7 @@ UNSPEC_PDIST))] "TARGET_VIS" "pdist\t%1, %2, %0" - [(set_attr "type" "fgm_pdist") + [(set_attr "type" "pdist") (set_attr "fptype" "double")]) ;; Edge instructions produce condition codes equivalent to a 'subcc' @@ -8433,7 +8433,7 @@ UNSPEC_FCMP))] "TARGET_VIS" "fcmp<code><GCM:gcm_name>\t%1, %2, %0" - [(set_attr "type" "fga") + [(set_attr "type" "visl") (set_attr "fptype" "double")]) (define_expand "vcond<mode><mode>" @@ -8674,7 +8674,7 @@ UNSPEC_PDISTN))] "TARGET_VIS3" "pdistn\t%1, %2, %0" - [(set_attr "type" "fgm_pdist") + [(set_attr "type" "pdistn") (set_attr "fptype" "double")]) (define_insn "fmean16_vis" @@ -8724,7 +8724,7 @@ UNSPEC_FUCMP))] "TARGET_VIS3" "fucmp<code>8\t%1, %2, %0" - [(set_attr "type" "fga")]) + [(set_attr "type" "visl")]) (define_insn "*naddsf3" [(set (match_operand:SF 0 "register_operand" "=f") diff --git a/gcc/config/sparc/ultra1_2.md b/gcc/config/sparc/ultra1_2.md index be26ea8..0ab32e5 100644 --- a/gcc/config/sparc/ultra1_2.md +++ b/gcc/config/sparc/ultra1_2.md @@ -255,7 +255,7 @@ 2 (and (and (eq_attr "cpu" "ultrasparc") - (eq_attr "type" "fga")) + (eq_attr "type" "fga,visl,vismv")) (eq_attr "fptype" "single")) "us1_fpa + us1_fp_single + us1_slotany, nothing") @@ -265,7 +265,7 @@ 2 (and (and (eq_attr "cpu" "ultrasparc") - (eq_attr "type" "fga")) + (eq_attr "type" "fga,visl,vismv")) (eq_attr "fptype" "double")) "us1_fpa + us1_fp_double + us1_slotany, nothing") @@ -294,7 +294,7 @@ (define_insn_reservation "us1_pdist" 4 (and (eq_attr "cpu" "ultrasparc") - (eq_attr "type" "fgm_pdist")) + (eq_attr "type" "pdist")) "us1_fpm + us1_fp_double + us1_slotany, nothing*3") (define_bypass 3 "us1_pdist" "us1_fga_double,us1_fga_single") diff --git a/gcc/config/sparc/ultra3.md b/gcc/config/sparc/ultra3.md index fc36e25..f85734b 100644 --- a/gcc/config/sparc/ultra3.md +++ b/gcc/config/sparc/ultra3.md @@ -176,7 +176,7 @@ (define_insn_reservation "us3_fga" 3 (and (eq_attr "cpu" "ultrasparc3") - (eq_attr "type" "fga")) + (eq_attr "type" "fga,visl,vismv")) "us3_fpa + us3_slotany, nothing*2") (define_insn_reservation "us3_fgm" @@ -188,7 +188,7 @@ (define_insn_reservation "us3_pdist" 4 (and (eq_attr "cpu" "ultrasparc3") - (eq_attr "type" "fgm_pdist")) + (eq_attr "type" "pdist")) "us3_fpm + us3_slotany, nothing*3") (define_bypass 1 "us3_pdist" "us3_pdist") -- 1.7.12.2.dirty