http://gcc.gnu.org/bugzilla/show_bug.cgi?id=51980

--- Comment #4 from Ramana Radhakrishnan <ramana at gcc dot gnu.org> 2012-03-30 
07:58:49 UTC ---
Your testcase is broken - it doesn't honour reinterpret_casts properly . This
is  a better testcase. 

#include <arm_neon.h>

uint32x4_t sqrlen4D_16u8( const uint8x16_t A, const uint8x16_t B )
{
 const uint8x16_t absAB = vabdq_u8( A, B );
 const uint16x8_t square_l = vmull_u8( vget_low_u8( absAB ), vget_low_u8( absAB
) );
 const uint16x8_t square_h = vmull_u8( vget_high_u8( absAB ), vget_high_u8(
absAB ) );
 const uint32x4x2_t rgrgrgrg_babababa = vuzpq_u32( vreinterpretq_u32_u16
(square_l), vreinterpretq_u32_u16 (square_h) );
 const uint16x8_t rgrgrgrg = vreinterpretq_u16_u32 (rgrgrgrg_babababa.val[0]);
 const uint16x8_t babababa = vreinterpretq_u16_u32 (rgrgrgrg_babababa.val[1]);
 const uint32x4_t rpg_rpg_rpg_rpg = vpaddlq_u16( rgrgrgrg );
 const uint32x4_t dp = vpadalq_u16( rpg_rpg_rpg_rpg, babababa );
 return ( dp );
}

Reply via email to