LGTM, pushed, thanks.
On Wed, Mar 19, 2014 at 11:41:54AM +0800, Ruiling Song wrote: > Per OCL Spec, the computed address (p+offset*n) is 8-bit aligned for char, > and 16-bit aligned for short in vloadn & vstoren. That is we can not assume > that > vload4 with char pointer is 4byte aligned. The previous implementation will > make > Clang generate an load or store with alignment 4 which is in fact only > alignment 1. > > We need find another way to optimize the vloadn. > But before that, let's keep vloadn and vstoren work correctly. > This could fix the regression issue caused by byte/short optimization. > > Signed-off-by: Ruiling Song <[email protected]> > --- > backend/src/ocl_stdlib.tmpl.h | 60 > ++++++++++++++++++++++++++++++++++++++--- > 1 file changed, 56 insertions(+), 4 deletions(-) > > diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h > index e3ac632..25f2ff7 100755 > --- a/backend/src/ocl_stdlib.tmpl.h > +++ b/backend/src/ocl_stdlib.tmpl.h > @@ -3882,10 +3882,59 @@ INLINE_OVERLOADABLE TYPE##3 vload3(size_t offset, > const SPACE TYPE *p) { \ > DECL_UNTYPED_RD_ALL_SPACE(TYPE, __constant) \ > DECL_UNTYPED_RW_ALL_SPACE(TYPE, __private) > > -DECL_UNTYPED_RW_ALL(char) > -DECL_UNTYPED_RW_ALL(uchar) > -DECL_UNTYPED_RW_ALL(short) > -DECL_UNTYPED_RW_ALL(ushort) > +#define DECL_BYTE_RD_SPACE(TYPE, SPACE) \ > +INLINE_OVERLOADABLE TYPE##2 vload2(size_t offset, const SPACE TYPE *p) { \ > + return (TYPE##2)(*(p+2*offset), *(p+2*offset+1)); \ > +} \ > +INLINE_OVERLOADABLE TYPE##3 vload3(size_t offset, const SPACE TYPE *p) { \ > + return (TYPE##3)(*(p+3*offset), *(p+3*offset+1), *(p+3*offset+2)); \ > +} \ > +INLINE_OVERLOADABLE TYPE##4 vload4(size_t offset, const SPACE TYPE *p) { \ > + return (TYPE##4)(vload2(2*offset, p), vload2(2*offset, p+2)); \ > +} \ > +INLINE_OVERLOADABLE TYPE##8 vload8(size_t offset, const SPACE TYPE *p) { \ > + return (TYPE##8)(vload4(2*offset, p), vload4(2*offset, p+4)); \ > +} \ > +INLINE_OVERLOADABLE TYPE##16 vload16(size_t offset, const SPACE TYPE *p) { \ > + return (TYPE##16)(vload8(2*offset, p), vload8(2*offset, p+8)); \ > +} > + > +#define DECL_BYTE_WR_SPACE(TYPE, SPACE) \ > +INLINE_OVERLOADABLE void vstore2(TYPE##2 v, size_t offset, SPACE TYPE *p) {\ > + *(p + 2 * offset) = v.s0; \ > + *(p + 2 * offset + 1) = v.s1; \ > +} \ > +INLINE_OVERLOADABLE void vstore3(TYPE##3 v, size_t offset, SPACE TYPE *p) {\ > + *(p + 3 * offset) = v.s0; \ > + *(p + 3 * offset + 1) = v.s1; \ > + *(p + 3 * offset + 2) = v.s2; \ > +} \ > +INLINE_OVERLOADABLE void vstore4(TYPE##4 v, size_t offset, SPACE TYPE *p) { \ > + vstore2(v.lo, 2*offset, p); \ > + vstore2(v.hi, 2*offset, p+2); \ > +} \ > +INLINE_OVERLOADABLE void vstore8(TYPE##8 v, size_t offset, SPACE TYPE *p) { \ > + vstore4(v.lo, 2*offset, p); \ > + vstore4(v.hi, 2*offset, p+4); \ > +} \ > +INLINE_OVERLOADABLE void vstore16(TYPE##16 v, size_t offset, SPACE TYPE *p) > { \ > + vstore8(v.lo, 2*offset, p); \ > + vstore8(v.hi, 2*offset, p+8); \ > +} > + > +#define DECL_BYTE_RW_ALL(TYPE) \ > + DECL_BYTE_RD_SPACE(TYPE, __global) \ > + DECL_BYTE_RD_SPACE(TYPE, __local) \ > + DECL_BYTE_RD_SPACE(TYPE, __private) \ > + DECL_BYTE_RD_SPACE(TYPE, __constant) \ > + DECL_BYTE_WR_SPACE(TYPE, __global) \ > + DECL_BYTE_WR_SPACE(TYPE, __local) \ > + DECL_BYTE_WR_SPACE(TYPE, __private) > + > +DECL_BYTE_RW_ALL(char) > +DECL_BYTE_RW_ALL(uchar) > +DECL_BYTE_RW_ALL(short) > +DECL_BYTE_RW_ALL(ushort) > DECL_UNTYPED_RW_ALL(int) > DECL_UNTYPED_RW_ALL(uint) > DECL_UNTYPED_RW_ALL(long) > @@ -3900,6 +3949,9 @@ DECL_UNTYPED_RW_ALL(double) > #undef DECL_UNTYPED_RD_SPACE_N > #undef DECL_UNTYPED_V3_SPACE > #undef DECL_UNTYPED_RDV3_SPACE > +#undef DECL_BYTE_RD_SPACE > +#undef DECL_BYTE_WR_SPACE > +#undef DECL_BYTE_RW_ALL > > PURE CONST float __gen_ocl_f16to32(short h); > PURE CONST short __gen_ocl_f32to16(float f); > -- > 1.7.9.5 > > _______________________________________________ > Beignet mailing list > [email protected] > http://lists.freedesktop.org/mailman/listinfo/beignet _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet
