On Thu, Nov 18, 2021 at 7:17 AM liuhongt via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> Make them be equal to cost of unaligned ones to avoid odd alignment
> peeling.
>
> Impact for SPEC2017 on CLX:
> fprate:
>   503.bwaves_r    BuildSame
>   507.cactuBSSN_r     -0.22
>   508.namd_r          -0.02
>   510.parest_r        -0.28
>   511.povray_r        -0.20
>   519.lbm_r       BuildSame
>   521.wrf_r           -0.58
>   526.blender_r       -0.30
>   527.cam4_r           1.07
>   538.imagick_r        0.01
>   544.nab_r           -0.09
>   549.fotonik3d_r BuildSame
>   554.roms_r      BuildSame
> intrate:
>   500.perlbench_r     -0.25
>   502.gcc_r           -0.15
>   505.mcf_r       BuildSame
>   520.omnetpp_r        1.03
>   523.xalancbmk_r     -0.13
>   525.x264_r          -0.05
>   531.deepsjeng_r     -0.27
>   541.leela_r         -0.24
>   548.exchange2_r     -0.06
>   557.xz_r            -0.10
>   999.specrand_ir      2.69
>
> Bootstrapped and regtested on x86_64-linux-gnu{-m32,}.
> Ready to push to trunk.

OK.

> gcc/ChangeLog:
>
>         PR target/102543
>         * config/i386/x86-tune-costs.h (skylake_cost): Reduce cost of
>         storing 256/512-bit SSE register to be equal to cost of
>         unaligned store to avoid odd alignment peeling.
>         (icelake_cost): Ditto.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/i386/pr102543.c: New test.
> ---
>  gcc/config/i386/x86-tune-costs.h         |  4 +--
>  gcc/testsuite/gcc.target/i386/pr102543.c | 35 ++++++++++++++++++++++++
>  2 files changed, 37 insertions(+), 2 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr102543.c
>
> diff --git a/gcc/config/i386/x86-tune-costs.h 
> b/gcc/config/i386/x86-tune-costs.h
> index dd5563d2e64..60d50c97fca 100644
> --- a/gcc/config/i386/x86-tune-costs.h
> +++ b/gcc/config/i386/x86-tune-costs.h
> @@ -1903,7 +1903,7 @@ struct processor_costs skylake_cost = {
>    {6, 6, 6},                           /* cost of storing integer registers 
> */
>    {6, 6, 6, 10, 20},                   /* cost of loading SSE register
>                                            in 32bit, 64bit, 128bit, 256bit 
> and 512bit */
> -  {8, 8, 8, 12, 24},                   /* cost of storing SSE register
> +  {8, 8, 8, 8, 16},                    /* cost of storing SSE register
>                                            in 32bit, 64bit, 128bit, 256bit 
> and 512bit */
>    {6, 6, 6, 10, 20},                   /* cost of unaligned loads.  */
>    {8, 8, 8, 8, 16},                    /* cost of unaligned stores.  */
> @@ -2029,7 +2029,7 @@ struct processor_costs icelake_cost = {
>    {6, 6, 6},                           /* cost of storing integer registers 
> */
>    {6, 6, 6, 10, 20},                   /* cost of loading SSE register
>                                            in 32bit, 64bit, 128bit, 256bit 
> and 512bit */
> -  {8, 8, 8, 12, 24},                   /* cost of storing SSE register
> +  {8, 8, 8, 8, 16},                    /* cost of storing SSE register
>                                            in 32bit, 64bit, 128bit, 256bit 
> and 512bit */
>    {6, 6, 6, 10, 20},                   /* cost of unaligned loads.  */
>    {8, 8, 8, 8, 16},                    /* cost of unaligned stores.  */
> diff --git a/gcc/testsuite/gcc.target/i386/pr102543.c 
> b/gcc/testsuite/gcc.target/i386/pr102543.c
> new file mode 100644
> index 00000000000..893eb9a5902
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102543.c
> @@ -0,0 +1,35 @@
> +/* PR target/102543 */
> +/* { dg-do compile } */
> +/* { dg-options "-Ofast -march=skylake-avx512 -fdump-tree-optimized" } */
> +/* { dg-final { scan-tree-dump-not "MEM\\\[" "optimized" } } */
> +
> +struct a
> +{
> +  int a[100];
> +};
> +typedef struct a misaligned_t __attribute__ ((aligned (8)));
> +typedef struct a aligned_t __attribute__ ((aligned (32)));
> +
> +__attribute__ ((used))
> +__attribute__ ((noinline))
> +void
> +t(void *a, int misaligned, aligned_t *d)
> +{
> +  int i,v;
> +  for (i=0;i<100;i++)
> +    {
> +      if (misaligned)
> +       v=((misaligned_t *)a)->a[i];
> +      else
> +       v=((aligned_t *)a)->a[i];
> +      d->a[i]+=v;
> +    }
> +}
> +struct b {int v; misaligned_t m;aligned_t aa;} b;
> +aligned_t d;
> +int
> +main()
> +{
> +  t(&b.m, 1, &d);
> +  return 0;
> +}
> --
> 2.18.2
>

Reply via email to