On Mon, Aug 25, 2025 at 12:04 PM <[email protected]> wrote: > > From: Soumya AR <[email protected]> > > To allow runtime updates to tuning parameters, the const keyword is removed > from > the members of the tune_params structure and the members of its nested > structures. > > Since this patch also touches tuning structures in the arm backend, it was > bootstrapped on aarch64-linux-gnu as well as arm-linux-gnueabihf. >
The Arm backend patches are ok if the AArch64 patches are reviewed and accepted. Regards Ramana > Signed-off-by: Soumya AR <[email protected]> > > gcc/ChangeLog: > > * config/aarch64/aarch64-protos.h > (struct scale_addr_mode_cost): Remove const from struct members. > (struct cpu_addrcost_table): Likewise. > (struct cpu_regmove_cost): Likewise. > (struct simd_vec_cost): Likewise. > (struct sve_vec_cost): Likewise. > (struct aarch64_base_vec_issue_info): Likewise. > (struct aarch64_simd_vec_issue_info): Likewise. > (struct aarch64_sve_vec_issue_info): Likewise. > (struct aarch64_vec_issue_info): Likewise. > (struct cpu_vector_cost): Likewise. > (struct cpu_branch_cost): Likewise. > (struct cpu_approx_modes): Likewise. > (struct cpu_prefetch_tune): Likewise. > * config/arm/aarch-common-protos.h > (struct alu_cost_table): Remove const from struct members. > (struct mult_cost_table): Likewise. > (struct mem_cost_table): Likewise. > (struct fp_cost_table): Likewise. > (struct vector_cost_table): Likewise. > (struct cpu_cost_table): Likewise. > --- > gcc/config/aarch64/aarch64-protos.h | 164 +++++++++++++-------------- > gcc/config/arm/aarch-common-protos.h | 128 ++++++++++----------- > 2 files changed, 146 insertions(+), 146 deletions(-) > > diff --git a/gcc/config/aarch64/aarch64-protos.h > b/gcc/config/aarch64/aarch64-protos.h > index 56efcf2c7f2..962ba612349 100644 > --- a/gcc/config/aarch64/aarch64-protos.h > +++ b/gcc/config/aarch64/aarch64-protos.h > @@ -166,88 +166,88 @@ enum aarch64_salt_type { > > struct scale_addr_mode_cost > { > - const int hi; > - const int si; > - const int di; > - const int ti; > + int hi; > + int si; > + int di; > + int ti; > }; > > /* Additional cost for addresses. */ > struct cpu_addrcost_table > { > - const struct scale_addr_mode_cost addr_scale_costs; > - const int pre_modify; > - const int post_modify; > - const int post_modify_ld3_st3; > - const int post_modify_ld4_st4; > - const int register_offset; > - const int register_sextend; > - const int register_zextend; > - const int imm_offset; > + struct scale_addr_mode_cost addr_scale_costs; > + int pre_modify; > + int post_modify; > + int post_modify_ld3_st3; > + int post_modify_ld4_st4; > + int register_offset; > + int register_sextend; > + int register_zextend; > + int imm_offset; > }; > > /* Additional costs for register copies. Cost is for one register. */ > struct cpu_regmove_cost > { > - const int GP2GP; > - const int GP2FP; > - const int FP2GP; > - const int FP2FP; > + int GP2GP; > + int GP2FP; > + int FP2GP; > + int FP2FP; > }; > > struct simd_vec_cost > { > /* Cost of any integer vector operation, excluding the ones handled > specially below. */ > - const int int_stmt_cost; > + int int_stmt_cost; > > /* Cost of any fp vector operation, excluding the ones handled > specially below. */ > - const int fp_stmt_cost; > + int fp_stmt_cost; > > /* Per-vector cost of permuting vectors after an LD2, LD3 or LD4, > as well as the per-vector cost of permuting vectors before > an ST2, ST3 or ST4. */ > - const int ld2_st2_permute_cost; > - const int ld3_st3_permute_cost; > - const int ld4_st4_permute_cost; > + int ld2_st2_permute_cost; > + int ld3_st3_permute_cost; > + int ld4_st4_permute_cost; > > /* Cost of a permute operation. */ > - const int permute_cost; > + int permute_cost; > > /* Cost of reductions for various vector types: iN is for N-bit > integer elements and fN is for N-bit floating-point elements. > We need to single out the element type because it affects the > depth of the reduction. */ > - const int reduc_i8_cost; > - const int reduc_i16_cost; > - const int reduc_i32_cost; > - const int reduc_i64_cost; > - const int reduc_f16_cost; > - const int reduc_f32_cost; > - const int reduc_f64_cost; > + int reduc_i8_cost; > + int reduc_i16_cost; > + int reduc_i32_cost; > + int reduc_i64_cost; > + int reduc_f16_cost; > + int reduc_f32_cost; > + int reduc_f64_cost; > > /* Additional cost of storing a single vector element, on top of the > normal cost of a scalar store. */ > - const int store_elt_extra_cost; > + int store_elt_extra_cost; > > /* Cost of a vector-to-scalar operation. */ > - const int vec_to_scalar_cost; > + int vec_to_scalar_cost; > > /* Cost of a scalar-to-vector operation. */ > - const int scalar_to_vec_cost; > + int scalar_to_vec_cost; > > /* Cost of an aligned vector load. */ > - const int align_load_cost; > + int align_load_cost; > > /* Cost of an unaligned vector load. */ > - const int unalign_load_cost; > + int unalign_load_cost; > > /* Cost of an unaligned vector store. */ > - const int unalign_store_cost; > + int unalign_store_cost; > > /* Cost of a vector store. */ > - const int store_cost; > + int store_cost; > }; > > typedef struct simd_vec_cost advsimd_vec_cost; > @@ -280,27 +280,27 @@ struct sve_vec_cost : simd_vec_cost > /* The cost of a vector-to-scalar CLASTA or CLASTB instruction, > with the scalar being stored in FP registers. This cost is > assumed to be a cycle latency. */ > - const int clast_cost; > + int clast_cost; > > /* The costs of FADDA for the three data types that it supports. > These costs are assumed to be cycle latencies. */ > - const int fadda_f16_cost; > - const int fadda_f32_cost; > - const int fadda_f64_cost; > + int fadda_f16_cost; > + int fadda_f32_cost; > + int fadda_f64_cost; > > /* The cost of a gather load instruction. The x32 value is for loads > of 32-bit elements and the x64 value is for loads of 64-bit elements. > */ > - const unsigned int gather_load_x32_cost; > - const unsigned int gather_load_x64_cost; > + unsigned int gather_load_x32_cost; > + unsigned int gather_load_x64_cost; > > /* Additional loop initialization cost of using a gather load instruction. > The x32 > value is for loads of 32-bit elements and the x64 value is for loads of > 64-bit elements. */ > - const int gather_load_x32_init_cost; > - const int gather_load_x64_init_cost; > + int gather_load_x32_init_cost; > + int gather_load_x64_init_cost; > > /* The per-element cost of a scatter store. */ > - const int scatter_store_elt_cost; > + int scatter_store_elt_cost; > }; > > /* Base information about how the CPU issues code, containing > @@ -319,10 +319,10 @@ struct sve_vec_cost : simd_vec_cost > struct aarch64_base_vec_issue_info > { > /* How many loads and stores can be issued per cycle. */ > - const unsigned int loads_stores_per_cycle; > + unsigned int loads_stores_per_cycle; > > /* How many stores can be issued per cycle. */ > - const unsigned int stores_per_cycle; > + unsigned int stores_per_cycle; > > /* How many integer or FP/SIMD operations can be issued per cycle. > > @@ -338,7 +338,7 @@ struct aarch64_base_vec_issue_info > This is not very precise, but it's only meant to be a heuristic. > We could certainly try to do better in future if there's an example > of something that would benefit. */ > - const unsigned int general_ops_per_cycle; > + unsigned int general_ops_per_cycle; > > /* How many FP/SIMD operations to count for a floating-point or > vector load operation. > @@ -347,7 +347,7 @@ struct aarch64_base_vec_issue_info > been loaded from memory, these values apply to each individual load. > When using an SVE gather load, the values apply to each element of > the gather. */ > - const unsigned int fp_simd_load_general_ops; > + unsigned int fp_simd_load_general_ops; > > /* How many FP/SIMD operations to count for a floating-point or > vector store operation. > @@ -355,7 +355,7 @@ struct aarch64_base_vec_issue_info > When storing individual elements of an Advanced SIMD vector out to > memory, these values apply to each individual store. When using an > SVE scatter store, these values apply to each element of the scatter. > */ > - const unsigned int fp_simd_store_general_ops; > + unsigned int fp_simd_store_general_ops; > }; > > using aarch64_scalar_vec_issue_info = aarch64_base_vec_issue_info; > @@ -382,9 +382,9 @@ struct aarch64_simd_vec_issue_info : > aarch64_base_vec_issue_info > > load ops: 3 > general ops: 3 * (fp_simd_load_general_ops + ld3_st3_general_ops). */ > - const unsigned int ld2_st2_general_ops; > - const unsigned int ld3_st3_general_ops; > - const unsigned int ld4_st4_general_ops; > + unsigned int ld2_st2_general_ops; > + unsigned int ld3_st3_general_ops; > + unsigned int ld4_st4_general_ops; > }; > > using aarch64_advsimd_vec_issue_info = aarch64_simd_vec_issue_info; > @@ -411,19 +411,19 @@ struct aarch64_sve_vec_issue_info : > aarch64_simd_vec_issue_info > {} > > /* How many predicate operations can be issued per cycle. */ > - const unsigned int pred_ops_per_cycle; > + unsigned int pred_ops_per_cycle; > > /* How many predicate operations are generated by a WHILExx > instruction. */ > - const unsigned int while_pred_ops; > + unsigned int while_pred_ops; > > /* How many predicate operations are generated by an integer > comparison instruction. */ > - const unsigned int int_cmp_pred_ops; > + unsigned int int_cmp_pred_ops; > > /* How many predicate operations are generated by a floating-point > comparison instruction. */ > - const unsigned int fp_cmp_pred_ops; > + unsigned int fp_cmp_pred_ops; > > /* How many general and predicate operations are generated by each pair > of elements in a gather load or scatter store. These values apply > @@ -433,38 +433,38 @@ struct aarch64_sve_vec_issue_info : > aarch64_simd_vec_issue_info > The reason for using pairs is that that is the largest possible > granule size for 128-bit SVE, which can load and store 2 64-bit > elements or 4 32-bit elements. */ > - const unsigned int gather_scatter_pair_general_ops; > - const unsigned int gather_scatter_pair_pred_ops; > + unsigned int gather_scatter_pair_general_ops; > + unsigned int gather_scatter_pair_pred_ops; > }; > > /* Information related to instruction issue for a particular CPU. */ > struct aarch64_vec_issue_info > { > - const aarch64_base_vec_issue_info *const scalar; > - const aarch64_simd_vec_issue_info *const advsimd; > - const aarch64_sve_vec_issue_info *const sve; > + const aarch64_base_vec_issue_info *scalar; > + const aarch64_simd_vec_issue_info *advsimd; > + const aarch64_sve_vec_issue_info *sve; > }; > > /* Cost for vector insn classes. */ > struct cpu_vector_cost > { > /* Cost of any integer scalar operation, excluding load and store. */ > - const int scalar_int_stmt_cost; > + int scalar_int_stmt_cost; > > /* Cost of any fp scalar operation, excluding load and store. */ > - const int scalar_fp_stmt_cost; > + int scalar_fp_stmt_cost; > > /* Cost of a scalar load. */ > - const int scalar_load_cost; > + int scalar_load_cost; > > /* Cost of a scalar store. */ > - const int scalar_store_cost; > + int scalar_store_cost; > > /* Cost of a taken branch. */ > - const int cond_taken_branch_cost; > + int cond_taken_branch_cost; > > /* Cost of a not-taken branch. */ > - const int cond_not_taken_branch_cost; > + int cond_not_taken_branch_cost; > > /* Cost of an Advanced SIMD operations. */ > const advsimd_vec_cost *advsimd; > @@ -473,14 +473,14 @@ struct cpu_vector_cost > const sve_vec_cost *sve; > > /* Issue information, or null if none is provided. */ > - const aarch64_vec_issue_info *const issue_info; > + const aarch64_vec_issue_info *issue_info; > }; > > /* Branch costs. */ > struct cpu_branch_cost > { > - const int predictable; /* Predictable branch or optimizing for size. */ > - const int unpredictable; /* Unpredictable branch or optimizing for speed. > */ > + int predictable; /* Predictable branch or optimizing for size. */ > + int unpredictable; /* Unpredictable branch or optimizing for speed. */ > }; > > /* Control approximate alternatives to certain FP operators. */ > @@ -497,25 +497,25 @@ struct cpu_branch_cost > /* Allowed modes for approximations. */ > struct cpu_approx_modes > { > - const uint64_t division; /* Division. */ > - const uint64_t sqrt; /* Square root. */ > - const uint64_t recip_sqrt; /* Reciprocal square root. */ > + uint64_t division; /* Division. */ > + uint64_t sqrt; /* Square root. */ > + uint64_t recip_sqrt; /* Reciprocal square root. */ > }; > > /* Cache prefetch settings for prefetch-loop-arrays. */ > struct cpu_prefetch_tune > { > - const int num_slots; > - const int l1_cache_size; > - const int l1_cache_line_size; > - const int l2_cache_size; > + int num_slots; > + int l1_cache_size; > + int l1_cache_line_size; > + int l2_cache_size; > /* Whether software prefetch hints should be issued for non-constant > strides. */ > - const bool prefetch_dynamic_strides; > + bool prefetch_dynamic_strides; > /* The minimum constant stride beyond which we should use prefetch > hints for. */ > - const int minimum_stride; > - const int default_opt_level; > + int minimum_stride; > + int default_opt_level; > }; > > /* Model the costs for loads/stores for the register allocators so that it > can > diff --git a/gcc/config/arm/aarch-common-protos.h > b/gcc/config/arm/aarch-common-protos.h > index 077387b9f90..1bafdbaa72f 100644 > --- a/gcc/config/arm/aarch-common-protos.h > +++ b/gcc/config/arm/aarch-common-protos.h > @@ -57,33 +57,33 @@ extern bool aarch_fun_is_indirect_return (rtx_insn *); > Costs may not have a negative value. */ > struct alu_cost_table > { > - const int arith; /* ADD/SUB. */ > - const int logical; /* AND/ORR/EOR/BIC, etc. */ > - const int shift; /* Simple shift. */ > - const int shift_reg; /* Simple shift by reg. */ > - const int arith_shift; /* Additional when arith also shifts... */ > - const int arith_shift_reg; /* ... and when the shift is by a reg. */ > - const int log_shift; /* Additional when logic also shifts... */ > - const int log_shift_reg; /* ... and when the shift is by a reg. */ > - const int extend; /* Zero/sign extension. */ > - const int extend_arith; /* Extend and arith. */ > - const int bfi; /* Bit-field insert. */ > - const int bfx; /* Bit-field extraction. */ > - const int clz; /* Count Leading Zeros. */ > - const int rev; /* Reverse bits/bytes. */ > - const int non_exec; /* Extra cost when not executing insn. */ > - const bool non_exec_costs_exec; /* True if non-execution must add the exec > + int arith; /* ADD/SUB. */ > + int logical; /* AND/ORR/EOR/BIC, etc. */ > + int shift; /* Simple shift. */ > + int shift_reg; /* Simple shift by reg. */ > + int arith_shift; /* Additional when arith also shifts... */ > + int arith_shift_reg; /* ... and when the shift is by a reg. */ > + int log_shift; /* Additional when logic also shifts... */ > + int log_shift_reg; /* ... and when the shift is by a reg. */ > + int extend; /* Zero/sign extension. */ > + int extend_arith; /* Extend and arith. */ > + int bfi; /* Bit-field insert. */ > + int bfx; /* Bit-field extraction. */ > + int clz; /* Count Leading Zeros. */ > + int rev; /* Reverse bits/bytes. */ > + int non_exec; /* Extra cost when not executing insn. */ > + bool non_exec_costs_exec; /* True if non-execution must add the exec > cost. */ > }; > > struct mult_cost_table > { > - const int simple; > - const int flag_setting; /* Additional cost if multiply sets flags. */ > - const int extend; > - const int add; > - const int extend_add; > - const int idiv; > + int simple; > + int flag_setting; /* Additional cost if multiply sets flags. */ > + int extend; > + int add; > + int extend_add; > + int idiv; > }; > > /* Calculations of LDM costs are complex. We assume an initial cost > @@ -98,60 +98,60 @@ struct mult_cost_table > */ > struct mem_cost_table > { > - const int load; > - const int load_sign_extend; /* Additional to load cost. */ > - const int ldrd; /* Cost of LDRD. */ > - const int ldm_1st; > - const int ldm_regs_per_insn_1st; > - const int ldm_regs_per_insn_subsequent; > - const int loadf; /* SFmode. */ > - const int loadd; /* DFmode. */ > - const int load_unaligned; /* Extra for unaligned loads. */ > - const int store; > - const int strd; > - const int stm_1st; > - const int stm_regs_per_insn_1st; > - const int stm_regs_per_insn_subsequent; > - const int storef; /* SFmode. */ > - const int stored; /* DFmode. */ > - const int store_unaligned; /* Extra for unaligned stores. */ > - const int loadv; /* Vector load. */ > - const int storev; /* Vector store. */ > + int load; > + int load_sign_extend; /* Additional to load cost. */ > + int ldrd; /* Cost of LDRD. */ > + int ldm_1st; > + int ldm_regs_per_insn_1st; > + int ldm_regs_per_insn_subsequent; > + int loadf; /* SFmode. */ > + int loadd; /* DFmode. */ > + int load_unaligned; /* Extra for unaligned loads. */ > + int store; > + int strd; > + int stm_1st; > + int stm_regs_per_insn_1st; > + int stm_regs_per_insn_subsequent; > + int storef; /* SFmode. */ > + int stored; /* DFmode. */ > + int store_unaligned; /* Extra for unaligned stores. */ > + int loadv; /* Vector load. */ > + int storev; /* Vector store. */ > }; > > struct fp_cost_table > { > - const int div; > - const int mult; > - const int mult_addsub; /* Non-fused. */ > - const int fma; /* Fused. */ > - const int addsub; > - const int fpconst; /* Immediate. */ > - const int neg; /* NEG and ABS. */ > - const int compare; > - const int widen; /* Widen to this size. */ > - const int narrow; /* Narrow from this size. */ > - const int toint; > - const int fromint; > - const int roundint; /* V8 round to integral, remains FP format. > */ > + int div; > + int mult; > + int mult_addsub; /* Non-fused. */ > + int fma; /* Fused. */ > + int addsub; > + int fpconst; /* Immediate. */ > + int neg; /* NEG and ABS. */ > + int compare; > + int widen; /* Widen to this size. */ > + int narrow; /* Narrow from this size. */ > + int toint; > + int fromint; > + int roundint; /* V8 round to integral, remains FP format. > */ > }; > > struct vector_cost_table > { > - const int alu; > - const int mult; > - const int movi; > - const int dup; > - const int extract; > + int alu; > + int mult; > + int movi; > + int dup; > + int extract; > }; > > struct cpu_cost_table > { > - const struct alu_cost_table alu; > - const struct mult_cost_table mult[2]; /* SImode and DImode. */ > - const struct mem_cost_table ldst; > - const struct fp_cost_table fp[2]; /* SFmode and DFmode. */ > - const struct vector_cost_table vect; > + struct alu_cost_table alu; > + struct mult_cost_table mult[2]; /* SImode and DImode. */ > + struct mem_cost_table ldst; > + struct fp_cost_table fp[2]; /* SFmode and DFmode. */ > + struct vector_cost_table vect; > }; > > rtx_insn *arm_md_asm_adjust (vec<rtx> &outputs, vec<rtx> & /*inputs*/, > -- > 2.44.0 >
