I just created two PRs for adding those new options into
riscv-toolchain-conventions, so that we could make sure it aligned
with clang/LLVM community.

https://github.com/riscv-non-isa/riscv-toolchain-conventions/pull/49
https://github.com/riscv-non-isa/riscv-toolchain-conventions/pull/50

On Wed, May 29, 2024 at 3:20 AM Robin Dapp <rdapp....@gmail.com> wrote:
>
> Hi,
>
> this patch disables movmisalign by default and introduces
> the -mno-vector-strict-align option to override it and re-enable
> movmisalign.  For now, generic-ooo is the only uarch that supports
> misaligned vector access.
>
> The patch also adds a check_effective_target_riscv_v_misalign_ok to
> the testsuite which enables or disables the vector misalignment tests
> depending on whether the target under test can execute a misaligned
> vle32.
>
> Changes from v3:
>  - Adressed Kito's comments.
>  - Made -mscalar-strict-align a real alias.
>
> Regards
>  Robin
>
> gcc/ChangeLog:
>
>         * config/riscv/riscv-opts.h (TARGET_VECTOR_MISALIGN_SUPPORTED):
>         Move from here...
>         * config/riscv/riscv.h (TARGET_VECTOR_MISALIGN_SUPPORTED):
>         ...to here and map to riscv_vector_unaligned_access_p.
>         * config/riscv/riscv.opt: Add -mvector-strict-align.
>         * config/riscv/riscv.cc (struct riscv_tune_param): Add
>         vector_unaligned_access.
>         (riscv_override_options_internal): Set
>         riscv_vector_unaligned_access_p.
>         * doc/invoke.texi: Document -mvector-strict-align.
>
> gcc/testsuite/ChangeLog:
>
>         * lib/target-supports.exp: Add
>         check_effective_target_riscv_v_misalign_ok.
>         * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-7.c: Add
>         -mno-vector-strict-align.
>         * gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-10.c: Ditto.
>         * gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-11.c: Ditto.
>         * gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-12.c: Ditto.
>         * gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-8.c: Ditto.
>         * gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-9.c: Ditto.
>         * gcc.target/riscv/rvv/autovec/vls/misalign-1.c: Ditto.
> ---
>  gcc/config/riscv/riscv-opts.h                 |  3 --
>  gcc/config/riscv/riscv.cc                     | 19 +++++++++++
>  gcc/config/riscv/riscv.h                      |  5 +++
>  gcc/config/riscv/riscv.opt                    |  8 +++++
>  gcc/doc/invoke.texi                           | 22 ++++++++++++
>  .../costmodel/riscv/rvv/dynamic-lmul2-7.c     |  2 +-
>  .../vect/costmodel/riscv/rvv/vla_vs_vls-10.c  |  2 +-
>  .../vect/costmodel/riscv/rvv/vla_vs_vls-11.c  |  2 +-
>  .../vect/costmodel/riscv/rvv/vla_vs_vls-12.c  |  2 +-
>  .../vect/costmodel/riscv/rvv/vla_vs_vls-8.c   |  2 +-
>  .../vect/costmodel/riscv/rvv/vla_vs_vls-9.c   |  2 +-
>  .../riscv/rvv/autovec/vls/misalign-1.c        |  2 +-
>  gcc/testsuite/lib/target-supports.exp         | 34 +++++++++++++++++--
>  13 files changed, 93 insertions(+), 12 deletions(-)
>
> diff --git a/gcc/config/riscv/riscv-opts.h b/gcc/config/riscv/riscv-opts.h
> index 1b2dd5757a8..f58a07abffc 100644
> --- a/gcc/config/riscv/riscv-opts.h
> +++ b/gcc/config/riscv/riscv-opts.h
> @@ -147,9 +147,6 @@ enum rvv_vector_bits_enum {
>       ? 0                                                                     
>   \
>       : 32 << (__builtin_popcount (opts->x_riscv_zvl_flags) - 1))
>
> -/* TODO: Enable RVV movmisalign by default for now.  */
> -#define TARGET_VECTOR_MISALIGN_SUPPORTED 1
> -
>  /* The maximmum LMUL according to user configuration.  */
>  #define TARGET_MAX_LMUL                                                      
>   \
>    (int) (rvv_max_lmul == RVV_DYNAMIC ? RVV_M8 : rvv_max_lmul)
> diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
> index a99211d56b1..13cd61a4a22 100644
> --- a/gcc/config/riscv/riscv.cc
> +++ b/gcc/config/riscv/riscv.cc
> @@ -287,6 +287,7 @@ struct riscv_tune_param
>    unsigned short memory_cost;
>    unsigned short fmv_cost;
>    bool slow_unaligned_access;
> +  bool vector_unaligned_access;
>    bool use_divmod_expansion;
>    bool overlap_op_by_pieces;
>    unsigned int fusible_ops;
> @@ -299,6 +300,10 @@ struct riscv_tune_param
>  /* Whether unaligned accesses execute very slowly.  */
>  bool riscv_slow_unaligned_access_p;
>
> +/* Whether misaligned vector accesses are supported (i.e. do not
> +   throw an exception).  */
> +bool riscv_vector_unaligned_access_p;
> +
>  /* Whether user explicitly passed -mstrict-align.  */
>  bool riscv_user_wants_strict_align;
>
> @@ -441,6 +446,7 @@ static const struct riscv_tune_param rocket_tune_info = {
>    5,                                           /* memory_cost */
>    8,                                           /* fmv_cost */
>    true,                                                /* 
> slow_unaligned_access */
> +  false,                                       /* vector_unaligned_access */
>    false,                                       /* use_divmod_expansion */
>    false,                                       /* overlap_op_by_pieces */
>    RISCV_FUSE_NOTHING,                           /* fusible_ops */
> @@ -459,6 +465,7 @@ static const struct riscv_tune_param sifive_7_tune_info = 
> {
>    3,                                           /* memory_cost */
>    8,                                           /* fmv_cost */
>    true,                                                /* 
> slow_unaligned_access */
> +  false,                                       /* vector_unaligned_access */
>    false,                                       /* use_divmod_expansion */
>    false,                                       /* overlap_op_by_pieces */
>    RISCV_FUSE_NOTHING,                           /* fusible_ops */
> @@ -477,6 +484,7 @@ static const struct riscv_tune_param 
> sifive_p400_tune_info = {
>    3,                                           /* memory_cost */
>    4,                                           /* fmv_cost */
>    true,                                                /* 
> slow_unaligned_access */
> +  false,                                       /* vector_unaligned_access */
>    false,                                       /* use_divmod_expansion */
>    false,                                       /* overlap_op_by_pieces */
>    RISCV_FUSE_LUI_ADDI | RISCV_FUSE_AUIPC_ADDI,  /* fusible_ops */
> @@ -495,6 +503,7 @@ static const struct riscv_tune_param 
> sifive_p600_tune_info = {
>    3,                                           /* memory_cost */
>    4,                                           /* fmv_cost */
>    true,                                                /* 
> slow_unaligned_access */
> +  false,                                       /* vector_unaligned_access */
>    false,                                       /* use_divmod_expansion */
>    false,                                       /* overlap_op_by_pieces */
>    RISCV_FUSE_LUI_ADDI | RISCV_FUSE_AUIPC_ADDI,  /* fusible_ops */
> @@ -513,6 +522,7 @@ static const struct riscv_tune_param thead_c906_tune_info 
> = {
>    5,            /* memory_cost */
>    8,           /* fmv_cost */
>    false,            /* slow_unaligned_access */
> +  false,                                       /* vector_unaligned_access */
>    false,       /* use_divmod_expansion */
>    false,                                       /* overlap_op_by_pieces */
>    RISCV_FUSE_NOTHING,                           /* fusible_ops */
> @@ -531,6 +541,7 @@ static const struct riscv_tune_param 
> xiangshan_nanhu_tune_info = {
>    3,                                           /* memory_cost */
>    3,                                           /* fmv_cost */
>    true,                                                /* 
> slow_unaligned_access */
> +  false,                                       /* vector_unaligned_access */
>    false,                                       /* use_divmod_expansion */
>    false,                                       /* overlap_op_by_pieces */
>    RISCV_FUSE_ZEXTW | RISCV_FUSE_ZEXTH,          /* fusible_ops */
> @@ -549,6 +560,7 @@ static const struct riscv_tune_param 
> generic_ooo_tune_info = {
>    4,                                           /* memory_cost */
>    4,                                           /* fmv_cost */
>    false,                                       /* slow_unaligned_access */
> +  true,                                                /* 
> vector_unaligned_access */
>    false,                                       /* use_divmod_expansion */
>    true,                                                /* 
> overlap_op_by_pieces */
>    RISCV_FUSE_NOTHING,                           /* fusible_ops */
> @@ -567,6 +579,7 @@ static const struct riscv_tune_param 
> optimize_size_tune_info = {
>    2,                                           /* memory_cost */
>    8,                                           /* fmv_cost */
>    false,                                       /* slow_unaligned_access */
> +  false,                                       /* vector_unaligned_access */
>    false,                                       /* use_divmod_expansion */
>    false,                                       /* overlap_op_by_pieces */
>    RISCV_FUSE_NOTHING,                           /* fusible_ops */
> @@ -9615,6 +9628,12 @@ riscv_override_options_internal (struct gcc_options 
> *opts)
>    riscv_slow_unaligned_access_p = (cpu->tune_param->slow_unaligned_access
>                                    || TARGET_STRICT_ALIGN);
>
> +  /* By default, when -mno-vector-strict-align is not specified, do not allow
> +     unaligned vector memory accesses except if -mtune's setting explicitly
> +     allows it.  */
> +  riscv_vector_unaligned_access_p = opts->x_rvv_vector_strict_align == 0
> +    || cpu->tune_param->vector_unaligned_access;
> +
>    /* Make a note if user explicitly passed -mstrict-align for later
>       builtin macro generation.  Can't use target_flags_explicitly since
>       it is set even for -mno-strict-align.  */
> diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h
> index d6b14c4d620..57910eecd3e 100644
> --- a/gcc/config/riscv/riscv.h
> +++ b/gcc/config/riscv/riscv.h
> @@ -934,6 +934,10 @@ extern enum riscv_cc get_riscv_cc (const rtx use);
>    || (riscv_microarchitecture == sifive_p400) \
>    || (riscv_microarchitecture == sifive_p600))
>
> +/* True if the target supports misaligned vector loads and stores.  */
> +#define TARGET_VECTOR_MISALIGN_SUPPORTED \
> +   riscv_vector_unaligned_access_p
> +
>  #define LOGICAL_OP_NON_SHORT_CIRCUIT 0
>
>  /* Control the assembler format that we output.  */
> @@ -1161,6 +1165,7 @@ while (0)
>  #ifndef USED_FOR_TARGET
>  extern const enum reg_class riscv_regno_to_class[];
>  extern bool riscv_slow_unaligned_access_p;
> +extern bool riscv_vector_unaligned_access_p;
>  extern bool riscv_user_wants_strict_align;
>  extern unsigned riscv_stack_boundary;
>  extern unsigned riscv_bytes_per_vector_chunk;
> diff --git a/gcc/config/riscv/riscv.opt b/gcc/config/riscv/riscv.opt
> index 87f58332016..04f210ee88a 100644
> --- a/gcc/config/riscv/riscv.opt
> +++ b/gcc/config/riscv/riscv.opt
> @@ -128,6 +128,14 @@ mstrict-align
>  Target Mask(STRICT_ALIGN) Save
>  Do not generate unaligned memory accesses.
>
> +mscalar-strict-align
> +Target Save Alias(mstrict-align)
> +Do not generate unaligned scalar memory accesses.
> +
> +mvector-strict-align
> +Target Var(rvv_vector_strict_align) Init(1)
> +Do not create element-misaligned vector memory accesses.
> +
>  Enum
>  Name(code_model) Type(enum riscv_code_model)
>  Known code models (for use with the -mcmodel= option):
> diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
> index 2cba380718b..ed165787fd2 100644
> --- a/gcc/doc/invoke.texi
> +++ b/gcc/doc/invoke.texi
> @@ -31099,6 +31099,28 @@ Do not or do generate unaligned memory accesses.  
> The default is set depending
>  on whether the processor we are optimizing for supports fast unaligned access
>  or not.
>
> +@opindex mscalar-strict-align
> +@opindex mno-scalar-strict-align
> +@item -mscalar-strict-align
> +@itemx -mno-scalar-strict-align
> +Do not or do generate unaligned memory accesses.  The default is set 
> depending
> +on whether the processor we are optimizing for supports fast unaligned access
> +or not.  This is an alias for @option{-mstrict-align}.
> +
> +@opindex mvector-strict-align
> +@opindex mno-vector-strict-align
> +@item -mvector-strict-align
> +@itemx -mno-vector-strict-align
> +Do not or do generate unaligned vector memory accesses.  The default is set
> +to off unless the processor we are optimizing for explicitly supports
> +element-misaligned vector memory access.
> +
> +@opindex mrvv-allow-misalign
> +@item -mrvv-allow-misalign
> +Allow the creation of element-misaligned vector loads and stores irrespective
> +of the current uarch. The default is off.
> +
> +
>  @opindex mcmodel=medlow
>  @item -mcmodel=medlow
>  Generate code for the medium-low code model. The program and its statically
> diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-7.c 
> b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-7.c
> index 49ea3c2cf72..754f84ae0a0 100644
> --- a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-7.c
> +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-7.c
> @@ -1,5 +1,5 @@
>  /* { dg-do compile } */
> -/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize 
> -mrvv-max-lmul=dynamic" } */
> +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize 
> -mrvv-max-lmul=dynamic -mno-vector-strict-align" } */
>
>  int
>  x264_pixel_8x8 (unsigned char *pix1, unsigned char *pix2, int i_stride_pix2)
> diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-10.c 
> b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-10.c
> index 144479324d7..d0a0f4208ee 100644
> --- a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-10.c
> +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-10.c
> @@ -1,5 +1,5 @@
>  /* { dg-do compile } */
> -/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -mrvv-max-lmul=m4 
> -fno-schedule-insns -fno-schedule-insns2" } */
> +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -mrvv-max-lmul=m4 
> -fno-schedule-insns -fno-schedule-insns2 -mno-vector-strict-align" } */
>
>  #include <stdint-gcc.h>
>
> diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-11.c 
> b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-11.c
> index 13ae8bd3bcf..5a779a9ee75 100644
> --- a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-11.c
> +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-11.c
> @@ -1,5 +1,5 @@
>  /* { dg-do compile } */
> -/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -mrvv-max-lmul=m8 
> -fno-schedule-insns -fno-schedule-insns2" } */
> +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -mrvv-max-lmul=m8 
> -fno-schedule-insns -fno-schedule-insns2 -mno-vector-strict-align" } */
>
>  #include <stdint-gcc.h>
>
> diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-12.c 
> b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-12.c
> index 1f9fa48264e..e7e4e841bb8 100644
> --- a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-12.c
> +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-12.c
> @@ -1,5 +1,5 @@
>  /* { dg-do compile } */
> -/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -mrvv-max-lmul=dynamic 
> -fno-schedule-insns -fno-schedule-insns2" } */
> +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -mrvv-max-lmul=dynamic 
> -fno-schedule-insns -fno-schedule-insns2 -mno-vector-strict-align" } */
>
>  #include <stdint-gcc.h>
>
> diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-8.c 
> b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-8.c
> index ea6a7cbe2b1..0e5b4522de5 100644
> --- a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-8.c
> +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-8.c
> @@ -1,5 +1,5 @@
>  /* { dg-do compile } */
> -/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
> +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -mno-vector-strict-align" } 
> */
>
>  #include <stdint-gcc.h>
>
> diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-9.c 
> b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-9.c
> index cb4abeca989..5276e0b2f6c 100644
> --- a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-9.c
> +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-9.c
> @@ -1,5 +1,5 @@
>  /* { dg-do compile } */
> -/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -mrvv-max-lmul=m2" } */
> +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -mrvv-max-lmul=m2 
> -mno-vector-strict-align" } */
>
>  #include <stdint-gcc.h>
>
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/misalign-1.c 
> b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/misalign-1.c
> index 1a076cbcd0f..5184a295e16 100644
> --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/misalign-1.c
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/misalign-1.c
> @@ -1,5 +1,5 @@
>  /* { dg-do compile } */
> -/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fno-schedule-insns 
> -fno-schedule-insns2 -mrvv-max-lmul=m4 -fno-tree-loop-distribute-patterns" } 
> */
> +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fno-schedule-insns 
> -fno-schedule-insns2 -mrvv-max-lmul=m4 -fno-tree-loop-distribute-patterns 
> -mno-vector-strict-align" } */
>
>  #include <stdlib.h>
>
> diff --git a/gcc/testsuite/lib/target-supports.exp 
> b/gcc/testsuite/lib/target-supports.exp
> index f0f6da52275..e887efbb8f3 100644
> --- a/gcc/testsuite/lib/target-supports.exp
> +++ b/gcc/testsuite/lib/target-supports.exp
> @@ -2034,7 +2034,7 @@ proc check_effective_target_riscv_zvfh_ok { } {
>      # check if we can execute vector insns with the given hardware or
>      # simulator
>      set gcc_march [regsub {[[:alnum:]]*} [riscv_get_arch] &v]
> -    if { [check_runtime ${gcc_march}_exec {
> +    if { [check_runtime ${gcc_march}_zvfh_exec {
>         int main()
>         {
>             asm ("vsetivli zero,8,e16,m1,ta,ma");
> @@ -2047,6 +2047,29 @@ proc check_effective_target_riscv_zvfh_ok { } {
>      return 0
>  }
>
> +# Return 1 if we can load a vector from a 1-byte aligned address.
> +
> +proc check_effective_target_riscv_v_misalign_ok { } {
> +
> +    if { ![check_effective_target_riscv_v_ok] } {
> +       return 0
> +    }
> +
> +    set gcc_march [riscv_get_arch]
> +    if { [check_runtime ${gcc_march}_misalign_exec {
> +         int main() {
> +             unsigned char a[16]
> +               = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
> +             asm ("vsetivli zero,7,e8,m1,ta,ma");
> +             asm ("addi a7,%0,1" : : "r" (a) : "a7" );
> +             asm ("vle8.v v8,0(a7)" : : : "v8");
> +             return 0; } } "-march=${gcc_march}"] } {
> +       return 1
> +    }
> +
> +    return 0
> +}
> +
>  proc riscv_get_arch { } {
>      set gcc_march ""
>      # ??? do we neeed to add more extensions to the list below?
> @@ -8139,7 +8162,6 @@ proc check_effective_target_vect_hw_misalign { } {
>              || ([istarget mips*-*-*] && [et-is-effective-target mips_msa])
>              || ([istarget s390*-*-*]
>                  && [check_effective_target_s390_vx])
> -            || ([istarget riscv*-*-*])
>              || ([istarget loongarch*-*-*])
>              || [istarget amdgcn*-*-*] } {
>           return 1
> @@ -8148,6 +8170,11 @@ proc check_effective_target_vect_hw_misalign { } {
>              && ![check_effective_target_arm_vect_no_misalign] } {
>           return 1
>         }
> +       if { [istarget riscv*-*-*]
> +            && [check_effective_target_riscv_v_misalign_ok] } {
> +           return 1
> +       }
> +
>          return 0
>      }]
>  }
> @@ -11565,6 +11592,9 @@ proc check_vect_support_and_set_flags { } {
>      } elseif [istarget riscv*-*-*] {
>         if [check_effective_target_riscv_v] {
>             set dg-do-what-default run
> +           if [check_effective_target_riscv_v_misalign_ok] {
> +               lappend DEFAULT_VECTCFLAGS "-mno-vector-strict-align"
> +           }
>         } else {
>             foreach item [add_options_for_riscv_v ""] {
>                 lappend DEFAULT_VECTCFLAGS $item
> --
> 2.45.0

Reply via email to