Here's an updated patch with Kyrill's and Andrew's comments integrated. I left the file in the config/arm-directory, as XGene-family is capable of executing ARMv7 and we will wire this into the 32bit backend in the near future (moving it now would just cause another move in the near future).
We also moved the 'include' up to where the pipeline models for the A53/A57/ThunderX are included, as the previous dependency on picking up the SIMD types from aarch64-simd.md no longer holds true since gcc-4.9. Cheers, -Philipp. --- gcc/ChangeLog | 6 + gcc/config/aarch64/aarch64.md | 3 +- gcc/config/arm/xgene1.md | 520 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 528 insertions(+), 1 deletion(-) create mode 100644 gcc/config/arm/xgene1.md diff --git a/gcc/ChangeLog b/gcc/ChangeLog index c9ac0d9..dad2278 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,5 +1,11 @@ 2014-11-19 Philipp Tomsich <[email protected]> + * config/aarch64/aarch64.md: Include xgene1.md. + (generic_sched): Set to no for xgene1. + * config/arm/xgene1.md: New file. + +2014-11-19 Philipp Tomsich <[email protected]> + * config/aarch64/aarch64-cores.def (xgene1): Update/add the xgene1 (APM XGene-1) core definition. * gcc/config/aarch64/aarch64.c: Add cost tables for APM XGene-1 diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index 597ff8c..1b36384 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -191,7 +191,7 @@ (define_attr "generic_sched" "yes,no" (const (if_then_else - (eq_attr "tune" "cortexa53,cortexa15,thunderx") + (eq_attr "tune" "cortexa53,cortexa15,thunderx,xgene1") (const_string "no") (const_string "yes")))) @@ -199,6 +199,7 @@ (include "../arm/cortex-a53.md") (include "../arm/cortex-a15.md") (include "thunderx.md") +(include "../arm/xgene1.md") ;; ------------------------------------------------------------------- ;; Jumps and other miscellaneous insns diff --git a/gcc/config/arm/xgene1.md b/gcc/config/arm/xgene1.md new file mode 100644 index 0000000..227f2c7 --- /dev/null +++ b/gcc/config/arm/xgene1.md @@ -0,0 +1,520 @@ +;; Machine description for AppliedMicro xgene1 core. +;; Copyright (C) 2012-2014 Free Software Foundation, Inc. +;; Contributed by Theobroma Systems Design und Consulting GmbH. +;; See http://www.theobroma-systems.com for more info. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, but +;; WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;; General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. + +;; Pipeline description for the xgene1 micro-architecture + +(define_automaton "xgene1") + +(define_cpu_unit "xgene1_decode_out0" "xgene1") +(define_cpu_unit "xgene1_decode_out1" "xgene1") +(define_cpu_unit "xgene1_decode_out2" "xgene1") +(define_cpu_unit "xgene1_decode_out3" "xgene1") + +(define_cpu_unit "xgene_divide" "xgene1") +(define_cpu_unit "xgene_fp_divide" "xgene1") + +(define_reservation "xgene1_decode1op" + "( xgene1_decode_out0 ) + |( xgene1_decode_out1 ) + |( xgene1_decode_out2 ) + |( xgene1_decode_out3 )" +) +(define_reservation "xgene1_decode2op" + "( xgene1_decode_out0 + xgene1_decode_out1 ) + |( xgene1_decode_out0 + xgene1_decode_out2 ) + |( xgene1_decode_out0 + xgene1_decode_out3 ) + |( xgene1_decode_out1 + xgene1_decode_out2 ) + |( xgene1_decode_out1 + xgene1_decode_out3 ) + |( xgene1_decode_out2 + xgene1_decode_out3 )" +) +(define_reservation "xgene1_decodeIsolated" + "( xgene1_decode_out0 + xgene1_decode_out1 + xgene1_decode_out2 + xgene1_decode_out3 )" +) + +(define_insn_reservation "branch" 1 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "branch")) + "xgene1_decode1op") + +(define_insn_reservation "nop" 1 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "no_insn")) + "xgene1_decode1op") + +(define_insn_reservation "call" 1 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "call")) + "xgene1_decode2op") + +(define_insn_reservation "f_load" 10 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "f_loadd,f_loads")) + "xgene1_decode2op") + +(define_insn_reservation "f_store" 4 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "f_stored,f_stores")) + "xgene1_decode2op") + +(define_insn_reservation "fmov" 2 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "fmov,fconsts,fconstd")) + "xgene1_decode1op") + +(define_insn_reservation "f_mcr" 10 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "f_mcr")) + "xgene1_decodeIsolated") + +(define_insn_reservation "f_mrc" 4 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "f_mrc")) + "xgene1_decode2op") + +(define_insn_reservation "load_pair" 6 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "load2")) + "xgene1_decodeIsolated") + +(define_insn_reservation "store_pair" 2 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "store2")) + "xgene1_decodeIsolated") + +(define_insn_reservation "load1" 2 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "load1")) + "xgene1_decode2op") + +(define_insn_reservation "store1" 2 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "store1")) + "xgene1_decode2op") + +(define_insn_reservation "move" 1 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "mov_reg,mov_imm,mrs")) + "xgene1_decode1op") + +(define_insn_reservation "alu" 1 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "alu_imm,alu_sreg,alu_shift_imm,\ + alu_ext,adc_reg,csel,logic_imm,\ + logic_reg,logic_shift_imm,clz,\ + rbit,shift_reg,adr,mov_reg,\ + mov_imm,extend")) + "xgene1_decode1op") + +(define_insn_reservation "simd" 1 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "rev")) + "xgene1_decode1op") + +(define_insn_reservation "alus" 1 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "alus_imm,alu_sreg,alus_shift_imm,\ + alus_ext,logics_imm,logics_reg,\ + logics_shift_imm")) + "xgene1_decode1op") + +(define_insn_reservation "mul" 6 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "mul,mla,smull,umull,smlal,umlal")) + "xgene1_decode2op") + +(define_insn_reservation "div" 2 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "sdiv,udiv")) + "xgene1_decode1op,xgene_divide") + +(define_insn_reservation "fcmp" 10 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "fcmpd,fcmps")) + "xgene1_decode1op") + +(define_insn_reservation "fcsel" 3 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "fcsel")) + "xgene1_decode1op") + +(define_insn_reservation "bfm" 2 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "bfm")) + "xgene1_decode1op") + +(define_insn_reservation "f_rint" 5 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "f_rintd,f_rints")) + "xgene1_decode1op") + +(define_insn_reservation "f_cvt" 3 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "f_cvt")) + "xgene1_decode1op") + +(define_insn_reservation "f_cvtf2i" 11 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "f_cvtf2i")) + "xgene1_decodeIsolated") + +(define_insn_reservation "f_cvti2f" 14 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "f_cvti2f")) + "xgene1_decodeIsolated") + +(define_insn_reservation "f_add" 5 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "faddd,fadds,fmuld,fmuls")) + "xgene1_decode1op") + +(define_insn_reservation "f_div" 2 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "fdivd,fdivs")) + "xgene1_decode1op,xgene_fp_divide") + +(define_insn_reservation "f_arith" 2 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "ffarithd,ffariths")) + "xgene1_decode1op") + +(define_insn_reservation "f_sqrt" 2 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "fsqrtd,fsqrts")) + "xgene1_decode1op,xgene_fp_divide") + +(define_insn_reservation "f_select" 3 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "f_minmaxd,f_minmaxs")) + "xgene1_decode1op") + + +(define_insn_reservation "neon_dup" 3 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "neon_dup,neon_dup_q")) + "xgene1_decode1op") + +(define_insn_reservation "neon_load1" 11 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "neon_load1_1reg, neon_load1_1reg_q")) + "xgene1_decode2op") + +(define_insn_reservation "neon_store1" 5 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "neon_store1_1reg, neon_store1_1reg_q")) + "xgene1_decode2op") + +(define_insn_reservation "neon_logic" 2 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "neon_logic,\ + neon_logic_q,\ + neon_bsl,\ + neon_bsl_q,\ + neon_move,\ + neon_move_q,\ + ")) + "xgene1_decode1op") + +(define_insn_reservation "neon_umov" 7 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "neon_to_gp, neon_to_gp_q")) + "xgene1_decodeIsolated") + +(define_insn_reservation "neon_ins" 14 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "neon_from_gp,\ + neon_from_gp_q,\ + neon_ins,\ + neon_ins_q,\ + ")) + "xgene1_decodeIsolated") + +(define_insn_reservation "neon_shift" 3 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "neon_shift_imm,\ + neon_shift_imm_q,\ + neon_shift_reg,\ + neon_shift_reg_q,\ + neon_shift_imm_long,\ + neon_sat_shift_imm,\ + neon_sat_shift_imm_q,\ + neon_sat_shift_imm_narrow_q,\ + neon_sat_shift_reg,\ + neon_sat_shift_reg_q,\ + neon_shift_imm_narrow_q,\ + ")) + "xgene1_decode1op") + +(define_insn_reservation "neon_arith" 3 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "neon_add,\ + neon_add_q,\ + neon_sub,\ + neon_sub_q,\ + neon_neg,\ + neon_neg_q,\ + neon_abs,\ + neon_abs_q,\ + neon_abd_q,\ + neon_arith_acc,\ + neon_arith_acc_q,\ + neon_reduc_add,\ + neon_reduc_add_q,\ + neon_add_halve,\ + neon_add_halve_q,\ + neon_sub_halve,\ + neon_sub_halve_q,\ + neon_qadd,\ + neon_qadd_q,\ + neon_compare,\ + neon_compare_q,\ + neon_compare_zero,\ + neon_compare_zero_q,\ + neon_tst,\ + neon_tst_q,\ + ")) + "xgene1_decode1op") + +(define_insn_reservation "neon_abs_diff" 6 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "neon_arith_acc,neon_arith_acc_q")) + "xgene1_decode2op") + +(define_insn_reservation "neon_mul" 5 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "neon_mul_b,\ + neon_mul_b_q,\ + neon_mul_h,\ + neon_mul_h_q,\ + neon_mul_s,\ + neon_mul_s_q,\ + neon_fp_mul_s_scalar,\ + neon_fp_mul_s_scalar_q,\ + neon_fp_mul_d_scalar_q,\ + neon_mla_b,neon_mla_b_q,\ + neon_mla_h,neon_mla_h_q,\ + neon_mla_s,neon_mla_s_q,\ + neon_mla_h_scalar,\ + neon_mla_h_scalar_q,\ + neon_mla_s_scalar,\ + neon_mla_s_scalar_q,\ + neon_mla_b_long,\ + neon_mla_h_long,\ + neon_mla_s_long,\ + neon_fp_mul_s,\ + neon_fp_mul_s_q,\ + neon_fp_mul_d,\ + neon_fp_mul_d_q,\ + neon_fp_mla_s,\ + neon_fp_mla_s_q,\ + neon_fp_mla_d,\ + neon_fp_mla_d_q,\ + neon_fp_mla_s_scalar,\ + neon_fp_mla_s_scalar_q,\ + neon_fp_mla_d_scalar_q,\ + neon_sat_mul_b,\ + neon_sat_mul_b_q,\ + neon_sat_mul_h,\ + neon_sat_mul_h_q,\ + neon_sat_mul_s,\ + neon_sat_mul_s_q,\ + neon_sat_mul_h_scalar,\ + neon_sat_mul_h_scalar_q,\ + neon_sat_mul_s_scalar,\ + neon_sat_mul_s_scalar_q,\ + neon_sat_mul_h_scalar_long,\ + neon_sat_mul_s_scalar_long,\ + neon_sat_mla_b_long,\ + neon_sat_mla_h_long,\ + neon_sat_mla_s_long,\ + neon_sat_mla_h_scalar_long,\ + neon_sat_mla_s_scalar_long,\ + ")) + "xgene1_decode2op") + +(define_insn_reservation "fp_abd_diff" 5 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "neon_fp_abd_s,\ + neon_fp_abd_s_q,\ + neon_fp_abd_d,\ + neon_fp_abd_d_q,\ + ")) + "xgene1_decode1op") + +(define_insn_reservation "neon_f_add" 5 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "neon_fp_addsub_s,\ + neon_fp_addsub_s_q,\ + neon_fp_addsub_d,\ + neon_fp_addsub_d_q,\ + ")) + "xgene1_decode1op") + +(define_insn_reservation "neon_f_div" 2 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "neon_fp_div_s,\ + neon_fp_div_s_q,\ + neon_fp_div_d,\ + neon_fp_div_d_q,\ + ")) + "xgene1_decode1op,xgene_fp_divide") + +(define_insn_reservation "neon_f_neg" 2 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "neon_fp_neg_s,\ + neon_fp_neg_s_q,\ + neon_fp_neg_d,\ + neon_fp_neg_d_q,\ + neon_fp_abs_s,\ + neon_fp_abs_s_q,\ + neon_fp_abs_d,\ + neon_fp_abs_d_q,\ + ")) + "xgene1_decode1op") + +(define_insn_reservation "neon_f_round" 5 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "neon_fp_round_s,\ + neon_fp_round_s_q,\ + neon_fp_round_d,\ + neon_fp_round_d_q,\ + ")) + "xgene1_decode1op") + +(define_insn_reservation "neon_f_cvt" 5 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "neon_int_to_fp_s,\ + neon_int_to_fp_s_q,\ + neon_int_to_fp_d,\ + neon_int_to_fp_d_q,\ + neon_fp_cvt_widen_s,\ + neon_fp_cvt_narrow_s_q,\ + neon_fp_cvt_narrow_d_q,\ + ")) + "xgene1_decode1op") + +(define_insn_reservation "neon_f_reduc" 5 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "neon_fp_reduc_add_s,\ + neon_fp_reduc_add_s_q,\ + neon_fp_reduc_add_d,\ + neon_fp_reduc_add_d_q,\ + ")) + "xgene1_decode1op") + +(define_insn_reservation "neon_cls" 2 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "neon_cls,neon_cls_q")) + "xgene1_decode1op") + +(define_insn_reservation "neon_st1" 4 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "neon_store1_one_lane,\ + neon_store1_one_lane_q,\ + ")) + "xgene1_decode1op") + +(define_insn_reservation "neon_halve_narrow" 6 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "neon_sub_halve_narrow_q,\ + neon_add_halve_narrow_q,\ + ")) + "xgene1_decodeIsolated") + +(define_insn_reservation "neon_shift_acc" 6 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "neon_shift_acc,\ + neon_shift_acc_q,\ + ")) + "xgene1_decode2op") + +(define_insn_reservation "neon_fp_compare" 3 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "neon_fp_compare_s,\ + neon_fp_compare_s_q,\ + neon_fp_compare_d,\ + neon_fp_compare_d_q,\ + ")) + "xgene1_decode1op") + +(define_insn_reservation "neon_fp_sqrt" 2 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "neon_fp_sqrt_s,\ + neon_fp_sqrt_s_q,\ + neon_fp_sqrt_d,\ + neon_fp_sqrt_d_q,\ + ")) + "xgene1_decode1op,xgene_fp_divide") + +(define_insn_reservation "neon_tbl1" 4 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "neon_tbl1,\ + neon_tbl1_q,\ + ")) + "xgene1_decode2op") + +(define_insn_reservation "neon_tbl2" 8 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "neon_tbl2,\ + neon_tbl2_q,\ + ")) + "xgene1_decodeIsolated") + +(define_insn_reservation "neon_permute" 3 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "neon_permute,\ + neon_permute_q,\ + ")) + "xgene1_decode2op") + +(define_insn_reservation "neon_ld1r" 10 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "neon_load1_all_lanes,\ + ")) + "xgene1_decode1op") + +(define_insn_reservation "neon_fp_recp" 3 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "neon_fp_recpe_s,\ + neon_fp_recpe_s_q,\ + neon_fp_recpe_d,\ + neon_fp_recpe_d_q,\ + neon_fp_recpx_s,\ + neon_fp_recpx_s_q,\ + neon_fp_recpx_d,\ + neon_fp_recpx_d_q,\ + ")) + "xgene1_decode1op") + + +(define_insn_reservation "neon_fp_recp_s" 5 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "neon_fp_recps_s,\ + neon_fp_recps_s_q,\ + neon_fp_recps_d,\ + neon_fp_recps_d_q,\ + ")) + "xgene1_decode1op") + +(define_insn_reservation "neon_pmull" 5 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "neon_mul_d_long,\ + ")) + "xgene1_decode2op") -- 1.9.1
