https://gcc.gnu.org/g:62ffaef514aad0eae5d3bc548d8e86d21a3095e7
commit r16-429-g62ffaef514aad0eae5d3bc548d8e86d21a3095e7 Author: Jan Hubicka <hubi...@ucw.cz> Date: Wed May 7 00:24:04 2025 +0200 i386: Add costs for integer<->float conversions Extend ix86_rtx_costs to cost FLOAT, UNSIGNED_FLOAT, FIX, and UNSIGNED_FIX. There are many variants of integer<->float conversions and it seems meaningful to start with the typical scalar and vector ones. On modern CPUs the variants differs by at most 1 cycle. gcc/ChangeLog: * config/i386/i386.cc (ix86_rtx_costs): Cost FLOAT, UNSIGNED_FLOAT, FIX, UNSIGNED_FIX. * config/i386/i386.h (struct processor_costs): Add cvtsi2ss, cvtss2si, cvtpi2ps, cvtps2pi. * config/i386/x86-tune-costs.h (struct processor_costs): Update tables. Diff: --- gcc/config/i386/i386.cc | 21 +++++++ gcc/config/i386/i386.h | 4 ++ gcc/config/i386/x86-tune-costs.h | 133 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 158 insertions(+) diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index 89f518c86b5e..bef95ea18c87 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -22794,6 +22794,27 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno, else *total = vec_fp_conversion_cost (cost, GET_MODE_BITSIZE (mode)); return false; + case FLOAT: + case UNSIGNED_FLOAT: + if (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)) + /* TODO: We do not have cost tables for x87. */ + *total = cost->fadd; + else if (VECTOR_MODE_P (mode)) + *total = ix86_vec_cost (mode, cost->cvtpi2ps); + else + *total = cost->cvtsi2ss; + return false; + + case FIX: + case UNSIGNED_FIX: + if (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)) + /* TODO: We do not have cost tables for x87. */ + *total = cost->fadd; + else if (VECTOR_MODE_P (mode)) + *total = ix86_vec_cost (mode, cost->cvtps2pi); + else + *total = cost->cvtss2si; + return false; case ABS: /* SSE requires memory load for the constant operand. It may make diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 02bf357d7762..6a38de30de43 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -213,6 +213,10 @@ struct processor_costs { such as VCVTPD2PS with larger reg in ymm. */ const int vcvtps2pd512; /* cost 512bit packed FP conversions, such as VCVTPD2PS with larger reg in zmm. */ + const int cvtsi2ss; /* cost of CVTSI2SS instruction. */ + const int cvtss2si; /* cost of CVT(T)SS2SI instruction. */ + const int cvtpi2ps; /* cost of CVTPI2PS instruction. */ + const int cvtps2pi; /* cost of CVT(T)PS2PI instruction. */ const int reassoc_int, reassoc_fp, reassoc_vec_int, reassoc_vec_fp; /* Specify reassociation width for integer, fp, vector integer and vector fp diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h index cddcf6173042..6cce70a6c403 100644 --- a/gcc/config/i386/x86-tune-costs.h +++ b/gcc/config/i386/x86-tune-costs.h @@ -134,6 +134,11 @@ struct processor_costs ix86_size_cost = {/* costs for tuning for size */ COSTS_N_BYTES (4), /* cost of CVTSS2SD etc. */ COSTS_N_BYTES (4), /* cost of 256bit VCVTPS2PD etc. */ COSTS_N_BYTES (6), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_BYTES (4), /* cost of CVTSI2SS instruction. */ + COSTS_N_BYTES (4), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_BYTES (4), /* cost of CVTPI2PS instruction. */ + COSTS_N_BYTES (4), /* cost of CVT(T)PS2PI instruction. */ + 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ ix86_size_memcpy, ix86_size_memset, @@ -249,6 +254,10 @@ struct processor_costs i386_cost = { /* 386 specific costs */ COSTS_N_INSNS (27), /* cost of CVTSS2SD etc. */ COSTS_N_INSNS (54), /* cost of 256bit VCVTPS2PD etc. */ COSTS_N_INSNS (108), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (27), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (27), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (27), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (27), /* cost of CVT(T)PS2PI instruction. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ i386_memcpy, i386_memset, @@ -365,6 +374,10 @@ struct processor_costs i486_cost = { /* 486 specific costs */ COSTS_N_INSNS (8), /* cost of CVTSS2SD etc. */ COSTS_N_INSNS (16), /* cost of 256bit VCVTPS2PD etc. */ COSTS_N_INSNS (32), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (27), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (27), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (27), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (27), /* cost of CVT(T)PS2PI instruction. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ i486_memcpy, i486_memset, @@ -479,6 +492,10 @@ struct processor_costs pentium_cost = { COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */ COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (3), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (3), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (3), /* cost of CVT(T)PS2PI instruction. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ pentium_memcpy, pentium_memset, @@ -586,6 +603,10 @@ struct processor_costs lakemont_cost = { COSTS_N_INSNS (5), /* cost of CVTSS2SD etc. */ COSTS_N_INSNS (10), /* cost of 256bit VCVTPS2PD etc. */ COSTS_N_INSNS (20), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (5), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (5), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (5), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (5), /* cost of CVT(T)PS2PI instruction. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ pentium_memcpy, pentium_memset, @@ -708,6 +729,10 @@ struct processor_costs pentiumpro_cost = { COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */ COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (3), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (3), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (3), /* cost of CVT(T)PS2PI instruction. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ pentiumpro_memcpy, pentiumpro_memset, @@ -821,6 +846,10 @@ struct processor_costs geode_cost = { COSTS_N_INSNS (6), /* cost of CVTSS2SD etc. */ COSTS_N_INSNS (12), /* cost of 256bit VCVTPS2PD etc. */ COSTS_N_INSNS (24), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (6), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (6), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (6), /* cost of CVT(T)PS2PI instruction. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ geode_memcpy, geode_memset, @@ -937,6 +966,10 @@ struct processor_costs k6_cost = { COSTS_N_INSNS (2), /* cost of CVTSS2SD etc. */ COSTS_N_INSNS (4), /* cost of 256bit VCVTPS2PD etc. */ COSTS_N_INSNS (8), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (2), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (2), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (2), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (2), /* cost of CVT(T)PS2PI instruction. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ k6_memcpy, k6_memset, @@ -1054,6 +1087,10 @@ struct processor_costs athlon_cost = { COSTS_N_INSNS (4), /* cost of CVTSS2SD etc. */ COSTS_N_INSNS (8), /* cost of 256bit VCVTPS2PD etc. */ COSTS_N_INSNS (16), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (4), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (4), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (6), /* cost of CVT(T)PS2PI instruction. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ athlon_memcpy, athlon_memset, @@ -1180,6 +1217,10 @@ struct processor_costs k8_cost = { COSTS_N_INSNS (4), /* cost of CVTSS2SD etc. */ COSTS_N_INSNS (8), /* cost of 256bit VCVTPS2PD etc. */ COSTS_N_INSNS (16), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (14), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (10), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (4), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (5), /* cost of CVT(T)PS2PI instruction. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ k8_memcpy, k8_memset, @@ -1314,6 +1355,10 @@ struct processor_costs amdfam10_cost = { COSTS_N_INSNS (4), /* cost of CVTSS2SD etc. */ COSTS_N_INSNS (8), /* cost of 256bit VCVTPS2PD etc. */ COSTS_N_INSNS (16), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (14), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (8), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (7), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ amdfam10_memcpy, amdfam10_memset, @@ -1441,6 +1486,10 @@ const struct processor_costs bdver_cost = { COSTS_N_INSNS (4), /* cost of CVTSS2SD etc. */ COSTS_N_INSNS (7), /* cost of 256bit VCVTPS2PD etc. */ COSTS_N_INSNS (14), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (14), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (13), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (4), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */ 1, 2, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ bdver_memcpy, bdver_memset, @@ -1593,6 +1642,10 @@ struct processor_costs znver1_cost = { /* Real latency is 4, but for split regs multiply cost of half op by 2. */ COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */ COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (8), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (7), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (6), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (6), /* cost of CVT(T)PS2PI instruction. */ /* Zen can execute 4 integer operations per cycle. FP operations take 3 cycles and it can execute 2 integer additions and 2 multiplications thus reassociation may make sense up to with of 6. SPEC2k6 bencharks suggests @@ -1755,6 +1808,10 @@ struct processor_costs znver2_cost = { COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ COSTS_N_INSNS (5), /* cost of 256bit VCVTPS2PD etc. */ COSTS_N_INSNS (10), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (7), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (6), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (3), /* cost of CVT(T)PS2PI instruction. */ /* Zen can execute 4 integer operations per cycle. FP operations take 3 cycles and it can execute 2 integer additions and 2 multiplications thus reassociation may make sense up to with of 6. @@ -1893,6 +1950,10 @@ struct processor_costs znver3_cost = { COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ COSTS_N_INSNS (5), /* cost of 256bit VCVTPS2PD etc. */ COSTS_N_INSNS (10), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (6), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */ /* Zen can execute 4 integer operations per cycle. FP operations take 3 cycles and it can execute 2 integer additions and 2 multiplications thus reassociation may make sense up to with of 6. @@ -2034,6 +2095,10 @@ struct processor_costs znver4_cost = { COSTS_N_INSNS (5), /* cost of 256bit VCVTPS2PD etc. */ /* Real latency is 6, but for split regs multiply cost of half op by 2. */ COSTS_N_INSNS (10), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (6), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */ /* Zen can execute 4 integer operations per cycle. FP operations take 3 cycles and it can execute 2 integer additions and 2 multiplications thus reassociation may make sense up to with of 6. @@ -2188,6 +2253,10 @@ struct processor_costs znver5_cost = { COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ COSTS_N_INSNS (5), /* cost of 256bit VCVTPS2PD etc. */ COSTS_N_INSNS (5), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (6), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */ /* Zen5 can execute: - integer ops: 6 per cycle, at most 3 multiplications. latency 1 for additions, 3 for multiplications (pipelined) @@ -2330,6 +2399,10 @@ struct processor_costs skylake_cost = { COSTS_N_INSNS (2), /* cost of CVTSS2SD etc. */ COSTS_N_INSNS (2), /* cost of 256bit VCVTPS2PD etc. */ COSTS_N_INSNS (4), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (6), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (6), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (7), /* cost of CVT(T)PS2PI instruction. */ 1, 4, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */ skylake_memcpy, skylake_memset, @@ -2462,6 +2535,10 @@ struct processor_costs icelake_cost = { COSTS_N_INSNS (2), /* cost of CVTSS2SD etc. */ COSTS_N_INSNS (2), /* cost of 256bit VCVTPS2PD etc. */ COSTS_N_INSNS (2), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (6), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (7), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (6), /* cost of CVT(T)PS2PI instruction. */ 1, 4, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */ icelake_memcpy, icelake_memset, @@ -2588,6 +2665,10 @@ struct processor_costs alderlake_cost = { COSTS_N_INSNS (2), /* cost of CVTSS2SD etc. */ COSTS_N_INSNS (2), /* cost of 256bit VCVTPS2PD etc. */ COSTS_N_INSNS (2), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (6), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (7), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (6), /* cost of CVT(T)PS2PI instruction. */ 1, 4, 3, 3, /* reassoc int, fp, vec_int, vec_fp. */ alderlake_memcpy, alderlake_memset, @@ -2707,6 +2788,10 @@ const struct processor_costs btver1_cost = { COSTS_N_INSNS (4), /* cost of CVTSS2SD etc. */ COSTS_N_INSNS (7), /* cost of 256bit VCVTPS2PD etc. */ COSTS_N_INSNS (14), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (14), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (13), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (4), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ btver1_memcpy, btver1_memset, @@ -2823,6 +2908,10 @@ const struct processor_costs btver2_cost = { COSTS_N_INSNS (4), /* cost of CVTSS2SD etc. */ COSTS_N_INSNS (7), /* cost of 256bit VCVTPS2PD etc. */ COSTS_N_INSNS (14), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (14), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (13), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (4), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ btver2_memcpy, btver2_memset, @@ -2938,6 +3027,10 @@ struct processor_costs pentium4_cost = { COSTS_N_INSNS (10), /* cost of CVTSS2SD etc. */ COSTS_N_INSNS (20), /* cost of 256bit VCVTPS2PD etc. */ COSTS_N_INSNS (40), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (20), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (17), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (12), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (8), /* cost of CVT(T)PS2PI instruction. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ pentium4_memcpy, pentium4_memset, @@ -3056,6 +3149,10 @@ struct processor_costs nocona_cost = { COSTS_N_INSNS (10), /* cost of CVTSS2SD etc. */ COSTS_N_INSNS (20), /* cost of 256bit VCVTPS2PD etc. */ COSTS_N_INSNS (40), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (20), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (17), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (12), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (8), /* cost of CVT(T)PS2PI instruction. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ nocona_memcpy, nocona_memset, @@ -3172,6 +3269,10 @@ struct processor_costs atom_cost = { COSTS_N_INSNS (6), /* cost of CVTSS2SD etc. */ COSTS_N_INSNS (12), /* cost of 256bit VCVTPS2PD etc. */ COSTS_N_INSNS (24), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (7), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (10), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (6), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */ 2, 2, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */ atom_memcpy, atom_memset, @@ -3288,6 +3389,10 @@ struct processor_costs slm_cost = { COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */ COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (5), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (5), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (4), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */ 1, 2, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ slm_memcpy, slm_memset, @@ -3418,6 +3523,10 @@ struct processor_costs tremont_cost = { COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */ COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (6), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (4), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */ 1, 4, 3, 3, /* reassoc int, fp, vec_int, vec_fp. */ tremont_memcpy, tremont_memset, @@ -3534,6 +3643,10 @@ struct processor_costs intel_cost = { COSTS_N_INSNS (8), /* cost of CVTSS2SD etc. */ COSTS_N_INSNS (16), /* cost of 256bit VCVTPS2PD etc. */ COSTS_N_INSNS (32), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (8), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (8), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (8), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (8), /* cost of CVT(T)PS2PI instruction. */ 1, 4, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ intel_memcpy, intel_memset, @@ -3655,6 +3768,10 @@ struct processor_costs lujiazui_cost = { COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */ COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (3), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (3), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (3), /* cost of CVT(T)PS2PI instruction. */ 1, 4, 3, 3, /* reassoc int, fp, vec_int, vec_fp. */ lujiazui_memcpy, lujiazui_memset, @@ -3774,6 +3891,10 @@ struct processor_costs yongfeng_cost = { COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */ COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (3), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (3), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (3), /* cost of CVT(T)PS2PI instruction. */ 4, 4, 4, 4, /* reassoc int, fp, vec_int, vec_fp. */ yongfeng_memcpy, yongfeng_memset, @@ -3893,6 +4014,10 @@ struct processor_costs shijidadao_cost = { COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */ COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (3), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (3), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (3), /* cost of CVT(T)PS2PI instruction. */ 4, 4, 4, 4, /* reassoc int, fp, vec_int, vec_fp. */ shijidadao_memcpy, shijidadao_memset, @@ -4020,6 +4145,10 @@ struct processor_costs generic_cost = { COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ COSTS_N_INSNS (4), /* cost of 256bit VCVTPS2PD etc. */ COSTS_N_INSNS (5), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (6), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (3), /* cost of CVT(T)PS2PI instruction. */ 1, 4, 3, 3, /* reassoc int, fp, vec_int, vec_fp. */ generic_memcpy, generic_memset, @@ -4152,6 +4281,10 @@ struct processor_costs core_cost = { COSTS_N_INSNS (2), /* cost of CVTSS2SD etc. */ COSTS_N_INSNS (2), /* cost of 256bit VCVTPS2PD etc. */ COSTS_N_INSNS (2), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (6), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (6), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (7), /* cost of CVT(T)PS2PI instruction. */ 1, 4, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */ core_memcpy, core_memset,