On Mon, Jan 23, 2017 at 11:23:48AM +0000, James Greenhalgh wrote: > > Hi, > > As subject, we have an oversight in aarch64_simd_container_mode for > HFmode inputs. This results in trunk only autovectorizing to a 64-bit vector, > rather than a full 128-bit vector. > > The fix is obvious, we just need to handle HFmode, and return an > appropriate vector mode. > > Tested on aarch64-none-elf with no issues. This patch looks low risk > for this development stage to me, though it fixes an oversight rather > than a regression.
*Ping* Thanks, James > gcc/ > > 2017-01-23 James Greenhalgh <james.greenha...@arm.com> > > * config/aarch64/aarch64.c (aarch64_simd_container_mode): Handle > HFmode. > > gcc/testsuite/ > > 2017-01-23 James Greenhalgh <james.greenha...@arm.com> > > * gcc.target/aarch64/vect_fp16_1.c: New. > > diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c > index 0cf7d12..7efc1f2 100644 > --- a/gcc/config/aarch64/aarch64.c > +++ b/gcc/config/aarch64/aarch64.c > @@ -10777,6 +10777,8 @@ aarch64_simd_container_mode (machine_mode mode, > unsigned width) > return V2DFmode; > case SFmode: > return V4SFmode; > + case HFmode: > + return V8HFmode; > case SImode: > return V4SImode; > case HImode: > @@ -10793,6 +10795,8 @@ aarch64_simd_container_mode (machine_mode mode, > unsigned width) > { > case SFmode: > return V2SFmode; > + case HFmode: > + return V4HFmode; > case SImode: > return V2SImode; > case HImode: > diff --git a/gcc/testsuite/gcc.target/aarch64/vect_fp16_1.c > b/gcc/testsuite/gcc.target/aarch64/vect_fp16_1.c > new file mode 100644 > index 0000000..da0cd81 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/vect_fp16_1.c > @@ -0,0 +1,30 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O3 -fno-vect-cost-model" } */ > + > +/* Check that we vectorize to a full 128-bit vector for _Float16 and __fp16 > + types. */ > + > +/* Enable ARMv8.2-A+fp16 so we have access to the vector instructions. */ > +#pragma GCC target ("arch=armv8.2-a+fp16") > + > +_Float16 > +sum_Float16 (_Float16 *__restrict__ __attribute__ ((__aligned__ (16))) a, > + _Float16 *__restrict__ __attribute__ ((__aligned__ (16))) b, > + _Float16 *__restrict__ __attribute__ ((__aligned__ (16))) c) > +{ > + for (int i = 0; i < 256; i++) > + a[i] = b[i] + c[i]; > +} > + > +_Float16 > +sum_fp16 (__fp16 *__restrict__ __attribute__ ((__aligned__ (16))) a, > + __fp16 *__restrict__ __attribute__ ((__aligned__ (16))) b, > + __fp16 *__restrict__ __attribute__ ((__aligned__ (16))) c) > +{ > + for (int i = 0; i < 256; i++) > + a[i] = b[i] + c[i]; > +} > + > +/* Two FADD operations on "8h" data widths, one from sum_Float16, one from > + sum_fp16. */ > +/* { dg-final { scan-assembler-times "fadd\tv\[0-9\]\+.8h" 2 } } */