------- Comment #6 from pinskia at gcc dot gnu dot org 2005-10-06 16:44 ------- Reduced testcase: typedef int int32_t; typedef unsigned char uint8_t; typedef __attribute__((altivec(vector__))) signed short vss; typedef __attribute__((altivec(vector__))) unsigned short vus; typedef __attribute__((altivec(vector__))) signed char vsc; typedef __attribute__((altivec(vector__))) unsigned char vuc; uint8_t *Src; vsc FIR_Tab_16[17]; void H_Pass_16_Altivec_C(vuc vec_src, vsc firs, vss sums1, vss sums2) { vss t; vuc tmp; int H = 10; while(H-- > 0) { tmp = __builtin_vec_splat(vec_src,(3)); t = (vss)__builtin_vec_mergeh(tmp, tmp); sums1 = __builtin_vec_mladd( (vss)__builtin_vec_mergeh(tmp, tmp), __builtin_vec_unpackh(firs), sums1 ); sums2 = __builtin_vec_mladd( (vss)__builtin_vec_mergel(tmp, tmp), __builtin_vec_unpackl(firs), sums2 ); firs = FIR_Tab_16[4]; tmp = __builtin_vec_splat(vec_src,(4)); sums1 = __builtin_vec_mladd( (vss)__builtin_vec_mergeh(tmp, tmp), __builtin_vec_unpackh(firs), sums1 ); sums2 = __builtin_vec_mladd( (vss)__builtin_vec_mergel(tmp, tmp), __builtin_vec_unpackl(firs), sums2 ); firs = FIR_Tab_16[5]; tmp = __builtin_vec_splat(vec_src,(5)); sums1 = __builtin_vec_mladd( (vss)__builtin_vec_mergeh(tmp, tmp), __builtin_vec_unpackh(firs), sums1 ); sums2 = __builtin_vec_mladd( (vss)__builtin_vec_mergel(tmp, tmp), __builtin_vec_unpackl(firs), sums2 ); firs = FIR_Tab_16[6]; tmp = __builtin_vec_splat(vec_src,(6)); sums1 = __builtin_vec_mladd( (vss)__builtin_vec_mergeh(tmp, tmp), __builtin_vec_unpackh(firs), sums1 ); sums2 = __builtin_vec_mladd( (vss)__builtin_vec_mergel(tmp, tmp), __builtin_vec_unpackl(firs), sums2 ); firs = FIR_Tab_16[7]; tmp = __builtin_vec_splat(vec_src,(7)); sums1 = __builtin_vec_mladd( (vss)__builtin_vec_mergeh(tmp, tmp), __builtin_vec_unpackh(firs), sums1 ); sums2 = __builtin_vec_mladd( (vss)__builtin_vec_mergel(tmp, tmp), __builtin_vec_unpackl(firs), sums2 ); firs = FIR_Tab_16[8]; tmp = __builtin_vec_splat(vec_src,(8)); sums1 = __builtin_vec_mladd( (vss)__builtin_vec_mergeh(tmp, tmp), __builtin_vec_unpackh(firs), sums1 ); firs = FIR_Tab_16[9]; tmp = __builtin_vec_splat(vec_src,(9)); *((char*)&tmp) = Src[16*1]; sums1 = __builtin_vec_mladd( (vss)__builtin_vec_mergeh(tmp,tmp),__builtin_vec_unpackh(firs),sums1 ); sums2 = __builtin_vec_mladd( (vss)__builtin_vec_mergel(tmp,tmp),__builtin_vec_unpackl(firs),sums2 ); tmp = (vuc)((vus) __builtin_altivec_vspltish (((5)))); sums1 = __builtin_vec_sra(sums1,(vus)tmp); tmp = __builtin_vec_packsu(sums1,sums2); } }
-- http://gcc.gnu.org/bugzilla/show_bug.cgi?id=24230