I just created two PRs for adding those new options into riscv-toolchain-conventions, so that we could make sure it aligned with clang/LLVM community.
https://github.com/riscv-non-isa/riscv-toolchain-conventions/pull/49 https://github.com/riscv-non-isa/riscv-toolchain-conventions/pull/50 On Wed, May 29, 2024 at 3:20 AM Robin Dapp <rdapp....@gmail.com> wrote: > > Hi, > > this patch disables movmisalign by default and introduces > the -mno-vector-strict-align option to override it and re-enable > movmisalign. For now, generic-ooo is the only uarch that supports > misaligned vector access. > > The patch also adds a check_effective_target_riscv_v_misalign_ok to > the testsuite which enables or disables the vector misalignment tests > depending on whether the target under test can execute a misaligned > vle32. > > Changes from v3: > - Adressed Kito's comments. > - Made -mscalar-strict-align a real alias. > > Regards > Robin > > gcc/ChangeLog: > > * config/riscv/riscv-opts.h (TARGET_VECTOR_MISALIGN_SUPPORTED): > Move from here... > * config/riscv/riscv.h (TARGET_VECTOR_MISALIGN_SUPPORTED): > ...to here and map to riscv_vector_unaligned_access_p. > * config/riscv/riscv.opt: Add -mvector-strict-align. > * config/riscv/riscv.cc (struct riscv_tune_param): Add > vector_unaligned_access. > (riscv_override_options_internal): Set > riscv_vector_unaligned_access_p. > * doc/invoke.texi: Document -mvector-strict-align. > > gcc/testsuite/ChangeLog: > > * lib/target-supports.exp: Add > check_effective_target_riscv_v_misalign_ok. > * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-7.c: Add > -mno-vector-strict-align. > * gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-10.c: Ditto. > * gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-11.c: Ditto. > * gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-12.c: Ditto. > * gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-8.c: Ditto. > * gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-9.c: Ditto. > * gcc.target/riscv/rvv/autovec/vls/misalign-1.c: Ditto. > --- > gcc/config/riscv/riscv-opts.h | 3 -- > gcc/config/riscv/riscv.cc | 19 +++++++++++ > gcc/config/riscv/riscv.h | 5 +++ > gcc/config/riscv/riscv.opt | 8 +++++ > gcc/doc/invoke.texi | 22 ++++++++++++ > .../costmodel/riscv/rvv/dynamic-lmul2-7.c | 2 +- > .../vect/costmodel/riscv/rvv/vla_vs_vls-10.c | 2 +- > .../vect/costmodel/riscv/rvv/vla_vs_vls-11.c | 2 +- > .../vect/costmodel/riscv/rvv/vla_vs_vls-12.c | 2 +- > .../vect/costmodel/riscv/rvv/vla_vs_vls-8.c | 2 +- > .../vect/costmodel/riscv/rvv/vla_vs_vls-9.c | 2 +- > .../riscv/rvv/autovec/vls/misalign-1.c | 2 +- > gcc/testsuite/lib/target-supports.exp | 34 +++++++++++++++++-- > 13 files changed, 93 insertions(+), 12 deletions(-) > > diff --git a/gcc/config/riscv/riscv-opts.h b/gcc/config/riscv/riscv-opts.h > index 1b2dd5757a8..f58a07abffc 100644 > --- a/gcc/config/riscv/riscv-opts.h > +++ b/gcc/config/riscv/riscv-opts.h > @@ -147,9 +147,6 @@ enum rvv_vector_bits_enum { > ? 0 > \ > : 32 << (__builtin_popcount (opts->x_riscv_zvl_flags) - 1)) > > -/* TODO: Enable RVV movmisalign by default for now. */ > -#define TARGET_VECTOR_MISALIGN_SUPPORTED 1 > - > /* The maximmum LMUL according to user configuration. */ > #define TARGET_MAX_LMUL > \ > (int) (rvv_max_lmul == RVV_DYNAMIC ? RVV_M8 : rvv_max_lmul) > diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc > index a99211d56b1..13cd61a4a22 100644 > --- a/gcc/config/riscv/riscv.cc > +++ b/gcc/config/riscv/riscv.cc > @@ -287,6 +287,7 @@ struct riscv_tune_param > unsigned short memory_cost; > unsigned short fmv_cost; > bool slow_unaligned_access; > + bool vector_unaligned_access; > bool use_divmod_expansion; > bool overlap_op_by_pieces; > unsigned int fusible_ops; > @@ -299,6 +300,10 @@ struct riscv_tune_param > /* Whether unaligned accesses execute very slowly. */ > bool riscv_slow_unaligned_access_p; > > +/* Whether misaligned vector accesses are supported (i.e. do not > + throw an exception). */ > +bool riscv_vector_unaligned_access_p; > + > /* Whether user explicitly passed -mstrict-align. */ > bool riscv_user_wants_strict_align; > > @@ -441,6 +446,7 @@ static const struct riscv_tune_param rocket_tune_info = { > 5, /* memory_cost */ > 8, /* fmv_cost */ > true, /* > slow_unaligned_access */ > + false, /* vector_unaligned_access */ > false, /* use_divmod_expansion */ > false, /* overlap_op_by_pieces */ > RISCV_FUSE_NOTHING, /* fusible_ops */ > @@ -459,6 +465,7 @@ static const struct riscv_tune_param sifive_7_tune_info = > { > 3, /* memory_cost */ > 8, /* fmv_cost */ > true, /* > slow_unaligned_access */ > + false, /* vector_unaligned_access */ > false, /* use_divmod_expansion */ > false, /* overlap_op_by_pieces */ > RISCV_FUSE_NOTHING, /* fusible_ops */ > @@ -477,6 +484,7 @@ static const struct riscv_tune_param > sifive_p400_tune_info = { > 3, /* memory_cost */ > 4, /* fmv_cost */ > true, /* > slow_unaligned_access */ > + false, /* vector_unaligned_access */ > false, /* use_divmod_expansion */ > false, /* overlap_op_by_pieces */ > RISCV_FUSE_LUI_ADDI | RISCV_FUSE_AUIPC_ADDI, /* fusible_ops */ > @@ -495,6 +503,7 @@ static const struct riscv_tune_param > sifive_p600_tune_info = { > 3, /* memory_cost */ > 4, /* fmv_cost */ > true, /* > slow_unaligned_access */ > + false, /* vector_unaligned_access */ > false, /* use_divmod_expansion */ > false, /* overlap_op_by_pieces */ > RISCV_FUSE_LUI_ADDI | RISCV_FUSE_AUIPC_ADDI, /* fusible_ops */ > @@ -513,6 +522,7 @@ static const struct riscv_tune_param thead_c906_tune_info > = { > 5, /* memory_cost */ > 8, /* fmv_cost */ > false, /* slow_unaligned_access */ > + false, /* vector_unaligned_access */ > false, /* use_divmod_expansion */ > false, /* overlap_op_by_pieces */ > RISCV_FUSE_NOTHING, /* fusible_ops */ > @@ -531,6 +541,7 @@ static const struct riscv_tune_param > xiangshan_nanhu_tune_info = { > 3, /* memory_cost */ > 3, /* fmv_cost */ > true, /* > slow_unaligned_access */ > + false, /* vector_unaligned_access */ > false, /* use_divmod_expansion */ > false, /* overlap_op_by_pieces */ > RISCV_FUSE_ZEXTW | RISCV_FUSE_ZEXTH, /* fusible_ops */ > @@ -549,6 +560,7 @@ static const struct riscv_tune_param > generic_ooo_tune_info = { > 4, /* memory_cost */ > 4, /* fmv_cost */ > false, /* slow_unaligned_access */ > + true, /* > vector_unaligned_access */ > false, /* use_divmod_expansion */ > true, /* > overlap_op_by_pieces */ > RISCV_FUSE_NOTHING, /* fusible_ops */ > @@ -567,6 +579,7 @@ static const struct riscv_tune_param > optimize_size_tune_info = { > 2, /* memory_cost */ > 8, /* fmv_cost */ > false, /* slow_unaligned_access */ > + false, /* vector_unaligned_access */ > false, /* use_divmod_expansion */ > false, /* overlap_op_by_pieces */ > RISCV_FUSE_NOTHING, /* fusible_ops */ > @@ -9615,6 +9628,12 @@ riscv_override_options_internal (struct gcc_options > *opts) > riscv_slow_unaligned_access_p = (cpu->tune_param->slow_unaligned_access > || TARGET_STRICT_ALIGN); > > + /* By default, when -mno-vector-strict-align is not specified, do not allow > + unaligned vector memory accesses except if -mtune's setting explicitly > + allows it. */ > + riscv_vector_unaligned_access_p = opts->x_rvv_vector_strict_align == 0 > + || cpu->tune_param->vector_unaligned_access; > + > /* Make a note if user explicitly passed -mstrict-align for later > builtin macro generation. Can't use target_flags_explicitly since > it is set even for -mno-strict-align. */ > diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h > index d6b14c4d620..57910eecd3e 100644 > --- a/gcc/config/riscv/riscv.h > +++ b/gcc/config/riscv/riscv.h > @@ -934,6 +934,10 @@ extern enum riscv_cc get_riscv_cc (const rtx use); > || (riscv_microarchitecture == sifive_p400) \ > || (riscv_microarchitecture == sifive_p600)) > > +/* True if the target supports misaligned vector loads and stores. */ > +#define TARGET_VECTOR_MISALIGN_SUPPORTED \ > + riscv_vector_unaligned_access_p > + > #define LOGICAL_OP_NON_SHORT_CIRCUIT 0 > > /* Control the assembler format that we output. */ > @@ -1161,6 +1165,7 @@ while (0) > #ifndef USED_FOR_TARGET > extern const enum reg_class riscv_regno_to_class[]; > extern bool riscv_slow_unaligned_access_p; > +extern bool riscv_vector_unaligned_access_p; > extern bool riscv_user_wants_strict_align; > extern unsigned riscv_stack_boundary; > extern unsigned riscv_bytes_per_vector_chunk; > diff --git a/gcc/config/riscv/riscv.opt b/gcc/config/riscv/riscv.opt > index 87f58332016..04f210ee88a 100644 > --- a/gcc/config/riscv/riscv.opt > +++ b/gcc/config/riscv/riscv.opt > @@ -128,6 +128,14 @@ mstrict-align > Target Mask(STRICT_ALIGN) Save > Do not generate unaligned memory accesses. > > +mscalar-strict-align > +Target Save Alias(mstrict-align) > +Do not generate unaligned scalar memory accesses. > + > +mvector-strict-align > +Target Var(rvv_vector_strict_align) Init(1) > +Do not create element-misaligned vector memory accesses. > + > Enum > Name(code_model) Type(enum riscv_code_model) > Known code models (for use with the -mcmodel= option): > diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi > index 2cba380718b..ed165787fd2 100644 > --- a/gcc/doc/invoke.texi > +++ b/gcc/doc/invoke.texi > @@ -31099,6 +31099,28 @@ Do not or do generate unaligned memory accesses. > The default is set depending > on whether the processor we are optimizing for supports fast unaligned access > or not. > > +@opindex mscalar-strict-align > +@opindex mno-scalar-strict-align > +@item -mscalar-strict-align > +@itemx -mno-scalar-strict-align > +Do not or do generate unaligned memory accesses. The default is set > depending > +on whether the processor we are optimizing for supports fast unaligned access > +or not. This is an alias for @option{-mstrict-align}. > + > +@opindex mvector-strict-align > +@opindex mno-vector-strict-align > +@item -mvector-strict-align > +@itemx -mno-vector-strict-align > +Do not or do generate unaligned vector memory accesses. The default is set > +to off unless the processor we are optimizing for explicitly supports > +element-misaligned vector memory access. > + > +@opindex mrvv-allow-misalign > +@item -mrvv-allow-misalign > +Allow the creation of element-misaligned vector loads and stores irrespective > +of the current uarch. The default is off. > + > + > @opindex mcmodel=medlow > @item -mcmodel=medlow > Generate code for the medium-low code model. The program and its statically > diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-7.c > b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-7.c > index 49ea3c2cf72..754f84ae0a0 100644 > --- a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-7.c > +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-7.c > @@ -1,5 +1,5 @@ > /* { dg-do compile } */ > -/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize > -mrvv-max-lmul=dynamic" } */ > +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize > -mrvv-max-lmul=dynamic -mno-vector-strict-align" } */ > > int > x264_pixel_8x8 (unsigned char *pix1, unsigned char *pix2, int i_stride_pix2) > diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-10.c > b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-10.c > index 144479324d7..d0a0f4208ee 100644 > --- a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-10.c > +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-10.c > @@ -1,5 +1,5 @@ > /* { dg-do compile } */ > -/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -mrvv-max-lmul=m4 > -fno-schedule-insns -fno-schedule-insns2" } */ > +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -mrvv-max-lmul=m4 > -fno-schedule-insns -fno-schedule-insns2 -mno-vector-strict-align" } */ > > #include <stdint-gcc.h> > > diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-11.c > b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-11.c > index 13ae8bd3bcf..5a779a9ee75 100644 > --- a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-11.c > +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-11.c > @@ -1,5 +1,5 @@ > /* { dg-do compile } */ > -/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -mrvv-max-lmul=m8 > -fno-schedule-insns -fno-schedule-insns2" } */ > +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -mrvv-max-lmul=m8 > -fno-schedule-insns -fno-schedule-insns2 -mno-vector-strict-align" } */ > > #include <stdint-gcc.h> > > diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-12.c > b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-12.c > index 1f9fa48264e..e7e4e841bb8 100644 > --- a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-12.c > +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-12.c > @@ -1,5 +1,5 @@ > /* { dg-do compile } */ > -/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -mrvv-max-lmul=dynamic > -fno-schedule-insns -fno-schedule-insns2" } */ > +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -mrvv-max-lmul=dynamic > -fno-schedule-insns -fno-schedule-insns2 -mno-vector-strict-align" } */ > > #include <stdint-gcc.h> > > diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-8.c > b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-8.c > index ea6a7cbe2b1..0e5b4522de5 100644 > --- a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-8.c > +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-8.c > @@ -1,5 +1,5 @@ > /* { dg-do compile } */ > -/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */ > +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -mno-vector-strict-align" } > */ > > #include <stdint-gcc.h> > > diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-9.c > b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-9.c > index cb4abeca989..5276e0b2f6c 100644 > --- a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-9.c > +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-9.c > @@ -1,5 +1,5 @@ > /* { dg-do compile } */ > -/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -mrvv-max-lmul=m2" } */ > +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -mrvv-max-lmul=m2 > -mno-vector-strict-align" } */ > > #include <stdint-gcc.h> > > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/misalign-1.c > b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/misalign-1.c > index 1a076cbcd0f..5184a295e16 100644 > --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/misalign-1.c > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/misalign-1.c > @@ -1,5 +1,5 @@ > /* { dg-do compile } */ > -/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fno-schedule-insns > -fno-schedule-insns2 -mrvv-max-lmul=m4 -fno-tree-loop-distribute-patterns" } > */ > +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fno-schedule-insns > -fno-schedule-insns2 -mrvv-max-lmul=m4 -fno-tree-loop-distribute-patterns > -mno-vector-strict-align" } */ > > #include <stdlib.h> > > diff --git a/gcc/testsuite/lib/target-supports.exp > b/gcc/testsuite/lib/target-supports.exp > index f0f6da52275..e887efbb8f3 100644 > --- a/gcc/testsuite/lib/target-supports.exp > +++ b/gcc/testsuite/lib/target-supports.exp > @@ -2034,7 +2034,7 @@ proc check_effective_target_riscv_zvfh_ok { } { > # check if we can execute vector insns with the given hardware or > # simulator > set gcc_march [regsub {[[:alnum:]]*} [riscv_get_arch] &v] > - if { [check_runtime ${gcc_march}_exec { > + if { [check_runtime ${gcc_march}_zvfh_exec { > int main() > { > asm ("vsetivli zero,8,e16,m1,ta,ma"); > @@ -2047,6 +2047,29 @@ proc check_effective_target_riscv_zvfh_ok { } { > return 0 > } > > +# Return 1 if we can load a vector from a 1-byte aligned address. > + > +proc check_effective_target_riscv_v_misalign_ok { } { > + > + if { ![check_effective_target_riscv_v_ok] } { > + return 0 > + } > + > + set gcc_march [riscv_get_arch] > + if { [check_runtime ${gcc_march}_misalign_exec { > + int main() { > + unsigned char a[16] > + = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; > + asm ("vsetivli zero,7,e8,m1,ta,ma"); > + asm ("addi a7,%0,1" : : "r" (a) : "a7" ); > + asm ("vle8.v v8,0(a7)" : : : "v8"); > + return 0; } } "-march=${gcc_march}"] } { > + return 1 > + } > + > + return 0 > +} > + > proc riscv_get_arch { } { > set gcc_march "" > # ??? do we neeed to add more extensions to the list below? > @@ -8139,7 +8162,6 @@ proc check_effective_target_vect_hw_misalign { } { > || ([istarget mips*-*-*] && [et-is-effective-target mips_msa]) > || ([istarget s390*-*-*] > && [check_effective_target_s390_vx]) > - || ([istarget riscv*-*-*]) > || ([istarget loongarch*-*-*]) > || [istarget amdgcn*-*-*] } { > return 1 > @@ -8148,6 +8170,11 @@ proc check_effective_target_vect_hw_misalign { } { > && ![check_effective_target_arm_vect_no_misalign] } { > return 1 > } > + if { [istarget riscv*-*-*] > + && [check_effective_target_riscv_v_misalign_ok] } { > + return 1 > + } > + > return 0 > }] > } > @@ -11565,6 +11592,9 @@ proc check_vect_support_and_set_flags { } { > } elseif [istarget riscv*-*-*] { > if [check_effective_target_riscv_v] { > set dg-do-what-default run > + if [check_effective_target_riscv_v_misalign_ok] { > + lappend DEFAULT_VECTCFLAGS "-mno-vector-strict-align" > + } > } else { > foreach item [add_options_for_riscv_v ""] { > lappend DEFAULT_VECTCFLAGS $item > -- > 2.45.0