Hi, The signed variants of the qtbl and qtbx intrinsics currently take an int8x<8,16> for their control vector parameter. This should be a uint8x<8,16> parameter.
Fixed as attached and checked against aarch64.exp on aarch64-none-elf with no regressions. Is this OK to commit? I have some similair patches kicking around in my tree, these feel obvious, but I'd like to check that others' share that perspective before I go committing anything! Thanks, James --- gcc/ 2013-09-06 James Greenhalgh <james.greenha...@arm.com> * config/aarch64/arm_neon.h (vqtbl<1,2,3,4><q>_s8): Fix control vector parameter type. (vqtbx<1,2,3,4><q>_s8): Likewise. gcc/testsuite/ 2013-09-06 James Greenhalgh <james.greenha...@arm.com> * gcc.target/aarch64/table-intrinsics.c (qtbl_tests8_< ,2,3,4>): Fix control vector parameter type. (qtb_tests8_< ,2,3,4>): Likewise. (qtblq_tests8_< ,2,3,4>): Likewise. (qtbxq_tests8_< ,2,3,4>): Likewise.
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index e20d34e..5864f2c 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -15973,7 +15973,7 @@ vqtbl1_p8 (poly8x16_t a, uint8x8_t b) } __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vqtbl1_s8 (int8x16_t a, int8x8_t b) +vqtbl1_s8 (int8x16_t a, uint8x8_t b) { int8x8_t result; __asm__ ("tbl %0.8b, {%1.16b}, %2.8b" @@ -16006,7 +16006,7 @@ vqtbl1q_p8 (poly8x16_t a, uint8x16_t b) } __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -vqtbl1q_s8 (int8x16_t a, int8x16_t b) +vqtbl1q_s8 (int8x16_t a, uint8x16_t b) { int8x16_t result; __asm__ ("tbl %0.16b, {%1.16b}, %2.16b" @@ -16028,7 +16028,7 @@ vqtbl1q_u8 (uint8x16_t a, uint8x16_t b) } __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vqtbl2_s8 (int8x16x2_t tab, int8x8_t idx) +vqtbl2_s8 (int8x16x2_t tab, uint8x8_t idx) { int8x8_t result; __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t" @@ -16064,7 +16064,7 @@ vqtbl2_p8 (poly8x16x2_t tab, uint8x8_t idx) } __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -vqtbl2q_s8 (int8x16x2_t tab, int8x16_t idx) +vqtbl2q_s8 (int8x16x2_t tab, uint8x16_t idx) { int8x16_t result; __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t" @@ -16100,7 +16100,7 @@ vqtbl2q_p8 (poly8x16x2_t tab, uint8x16_t idx) } __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vqtbl3_s8 (int8x16x3_t tab, int8x8_t idx) +vqtbl3_s8 (int8x16x3_t tab, uint8x8_t idx) { int8x8_t result; __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t" @@ -16136,7 +16136,7 @@ vqtbl3_p8 (poly8x16x3_t tab, uint8x8_t idx) } __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -vqtbl3q_s8 (int8x16x3_t tab, int8x16_t idx) +vqtbl3q_s8 (int8x16x3_t tab, uint8x16_t idx) { int8x16_t result; __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t" @@ -16172,7 +16172,7 @@ vqtbl3q_p8 (poly8x16x3_t tab, uint8x16_t idx) } __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vqtbl4_s8 (int8x16x4_t tab, int8x8_t idx) +vqtbl4_s8 (int8x16x4_t tab, uint8x8_t idx) { int8x8_t result; __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t" @@ -16209,7 +16209,7 @@ vqtbl4_p8 (poly8x16x4_t tab, uint8x8_t idx) __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -vqtbl4q_s8 (int8x16x4_t tab, int8x16_t idx) +vqtbl4q_s8 (int8x16x4_t tab, uint8x16_t idx) { int8x16_t result; __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t" @@ -16246,7 +16246,7 @@ vqtbl4q_p8 (poly8x16x4_t tab, uint8x16_t idx) __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vqtbx1_s8 (int8x8_t r, int8x16_t tab, int8x8_t idx) +vqtbx1_s8 (int8x8_t r, int8x16_t tab, uint8x8_t idx) { int8x8_t result = r; __asm__ ("tbx %0.8b,{%1.16b},%2.8b" @@ -16279,7 +16279,7 @@ vqtbx1_p8 (poly8x8_t r, poly8x16_t tab, uint8x8_t idx) } __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -vqtbx1q_s8 (int8x16_t r, int8x16_t tab, int8x16_t idx) +vqtbx1q_s8 (int8x16_t r, int8x16_t tab, uint8x16_t idx) { int8x16_t result = r; __asm__ ("tbx %0.16b,{%1.16b},%2.16b" @@ -16312,7 +16312,7 @@ vqtbx1q_p8 (poly8x16_t r, poly8x16_t tab, uint8x16_t idx) } __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vqtbx2_s8 (int8x8_t r, int8x16x2_t tab, int8x8_t idx) +vqtbx2_s8 (int8x8_t r, int8x16x2_t tab, uint8x8_t idx) { int8x8_t result = r; __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t" @@ -16349,7 +16349,7 @@ vqtbx2_p8 (poly8x8_t r, poly8x16x2_t tab, uint8x8_t idx) __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -vqtbx2q_s8 (int8x16_t r, int8x16x2_t tab, int8x16_t idx) +vqtbx2q_s8 (int8x16_t r, int8x16x2_t tab, uint8x16_t idx) { int8x16_t result = r; __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t" @@ -16386,7 +16386,7 @@ vqtbx2q_p8 (poly8x16_t r, poly8x16x2_t tab, uint8x16_t idx) __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vqtbx3_s8 (int8x8_t r, int8x16x3_t tab, int8x8_t idx) +vqtbx3_s8 (int8x8_t r, int8x16x3_t tab, uint8x8_t idx) { int8x8_t result = r; __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t" @@ -16423,7 +16423,7 @@ vqtbx3_p8 (poly8x8_t r, poly8x16x3_t tab, uint8x8_t idx) __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -vqtbx3q_s8 (int8x16_t r, int8x16x3_t tab, int8x16_t idx) +vqtbx3q_s8 (int8x16_t r, int8x16x3_t tab, uint8x16_t idx) { int8x16_t result = r; __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t" @@ -16460,7 +16460,7 @@ vqtbx3q_p8 (poly8x16_t r, poly8x16x3_t tab, uint8x16_t idx) __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vqtbx4_s8 (int8x8_t r, int8x16x4_t tab, int8x8_t idx) +vqtbx4_s8 (int8x8_t r, int8x16x4_t tab, uint8x8_t idx) { int8x8_t result = r; __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t" @@ -16497,7 +16497,7 @@ vqtbx4_p8 (poly8x8_t r, poly8x16x4_t tab, uint8x8_t idx) __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -vqtbx4q_s8 (int8x16_t r, int8x16x4_t tab, int8x16_t idx) +vqtbx4q_s8 (int8x16_t r, int8x16x4_t tab, uint8x16_t idx) { int8x16_t result = r; __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t" diff --git a/gcc/testsuite/gcc.target/aarch64/table-intrinsics.c b/gcc/testsuite/gcc.target/aarch64/table-intrinsics.c index 5d53abe..6281cda 100644 --- a/gcc/testsuite/gcc.target/aarch64/table-intrinsics.c +++ b/gcc/testsuite/gcc.target/aarch64/table-intrinsics.c @@ -148,7 +148,7 @@ tb_testp8_4 (poly8x8_t r, poly8x8x4_t tab, uint8x8_t idx) } int8x8_t -qtbl_tests8_ (int8x16_t tab, int8x8_t idx) +qtbl_tests8_ (int8x16_t tab, uint8x8_t idx) { return vqtbl1_s8 (tab, idx); } @@ -166,7 +166,7 @@ qtbl_testp8_ (poly8x16_t tab, uint8x8_t idx) } int8x8_t -qtbl_tests8_2 (int8x16x2_t tab, int8x8_t idx) +qtbl_tests8_2 (int8x16x2_t tab, uint8x8_t idx) { return vqtbl2_s8 (tab, idx); } @@ -184,7 +184,7 @@ qtbl_testp8_2 (poly8x16x2_t tab, uint8x8_t idx) } int8x8_t -qtbl_tests8_3 (int8x16x3_t tab, int8x8_t idx) +qtbl_tests8_3 (int8x16x3_t tab, uint8x8_t idx) { return vqtbl3_s8 (tab, idx); } @@ -202,7 +202,7 @@ qtbl_testp8_3 (poly8x16x3_t tab, uint8x8_t idx) } int8x8_t -qtbl_tests8_4 (int8x16x4_t tab, int8x8_t idx) +qtbl_tests8_4 (int8x16x4_t tab, uint8x8_t idx) { return vqtbl4_s8 (tab, idx); } @@ -220,7 +220,7 @@ qtbl_testp8_4 (poly8x16x4_t tab, uint8x8_t idx) } int8x8_t -qtb_tests8_ (int8x8_t r, int8x16_t tab, int8x8_t idx) +qtb_tests8_ (int8x8_t r, int8x16_t tab, uint8x8_t idx) { return vqtbx1_s8 (r, tab, idx); } @@ -238,7 +238,7 @@ qtb_testp8_ (poly8x8_t r, poly8x16_t tab, uint8x8_t idx) } int8x8_t -qtb_tests8_2 (int8x8_t r, int8x16x2_t tab, int8x8_t idx) +qtb_tests8_2 (int8x8_t r, int8x16x2_t tab, uint8x8_t idx) { return vqtbx2_s8 (r, tab, idx); } @@ -256,7 +256,7 @@ qtb_testp8_2 (poly8x8_t r, poly8x16x2_t tab, uint8x8_t idx) } int8x8_t -qtb_tests8_3 (int8x8_t r, int8x16x3_t tab, int8x8_t idx) +qtb_tests8_3 (int8x8_t r, int8x16x3_t tab, uint8x8_t idx) { return vqtbx3_s8 (r, tab, idx); } @@ -274,7 +274,7 @@ qtb_testp8_3 (poly8x8_t r, poly8x16x3_t tab, uint8x8_t idx) } int8x8_t -qtb_tests8_4 (int8x8_t r, int8x16x4_t tab, int8x8_t idx) +qtb_tests8_4 (int8x8_t r, int8x16x4_t tab, uint8x8_t idx) { return vqtbx4_s8 (r, tab, idx); } @@ -292,7 +292,7 @@ qtb_testp8_4 (poly8x8_t r, poly8x16x4_t tab, uint8x8_t idx) } int8x16_t -qtblq_tests8_ (int8x16_t tab, int8x16_t idx) +qtblq_tests8_ (int8x16_t tab, uint8x16_t idx) { return vqtbl1q_s8 (tab, idx); } @@ -310,7 +310,7 @@ qtblq_testp8_ (poly8x16_t tab, uint8x16_t idx) } int8x16_t -qtblq_tests8_2 (int8x16x2_t tab, int8x16_t idx) +qtblq_tests8_2 (int8x16x2_t tab, uint8x16_t idx) { return vqtbl2q_s8 (tab, idx); } @@ -328,7 +328,7 @@ qtblq_testp8_2 (poly8x16x2_t tab, uint8x16_t idx) } int8x16_t -qtblq_tests8_3 (int8x16x3_t tab, int8x16_t idx) +qtblq_tests8_3 (int8x16x3_t tab, uint8x16_t idx) { return vqtbl3q_s8 (tab, idx); } @@ -346,7 +346,7 @@ qtblq_testp8_3 (poly8x16x3_t tab, uint8x16_t idx) } int8x16_t -qtblq_tests8_4 (int8x16x4_t tab, int8x16_t idx) +qtblq_tests8_4 (int8x16x4_t tab, uint8x16_t idx) { return vqtbl4q_s8 (tab, idx); } @@ -364,7 +364,7 @@ qtblq_testp8_4 (poly8x16x4_t tab, uint8x16_t idx) } int8x16_t -qtbxq_tests8_ (int8x16_t r, int8x16_t tab, int8x16_t idx) +qtbxq_tests8_ (int8x16_t r, int8x16_t tab, uint8x16_t idx) { return vqtbx1q_s8 (r, tab, idx); } @@ -382,7 +382,7 @@ qtbxq_testp8_ (poly8x16_t r, poly8x16_t tab, uint8x16_t idx) } int8x16_t -qtbxq_tests8_2 (int8x16_t r, int8x16x2_t tab, int8x16_t idx) +qtbxq_tests8_2 (int8x16_t r, int8x16x2_t tab, uint8x16_t idx) { return vqtbx2q_s8 (r, tab, idx); } @@ -400,7 +400,7 @@ qtbxq_testp8_2 (poly8x16_t r, poly8x16x2_t tab, uint8x16_t idx) } int8x16_t -qtbxq_tests8_3 (int8x16_t r, int8x16x3_t tab, int8x16_t idx) +qtbxq_tests8_3 (int8x16_t r, int8x16x3_t tab, uint8x16_t idx) { return vqtbx3q_s8 (r, tab, idx); } @@ -418,7 +418,7 @@ qtbxq_testp8_3 (poly8x16_t r, poly8x16x3_t tab, uint8x16_t idx) } int8x16_t -qtbxq_tests8_4 (int8x16_t r, int8x16x4_t tab, int8x16_t idx) +qtbxq_tests8_4 (int8x16_t r, int8x16x4_t tab, uint8x16_t idx) { return vqtbx4q_s8 (r, tab, idx); }