https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108757
--- Comment #12 from Chip Kerchner <chip.kerchner at ibm dot com> ---
Here is an example of the original problem
#define EIGEN_ALWAYS_INLINE __attribute__((always_inline)) inline
typedef __vector float Packet4f;
typedef size_t Index;
EIGEN_ALWAYS_INLINE Packet4f ploadu(const float* from)
{
return vec_xl(0, const_cast<float*>(from));
}
EIGEN_ALWAYS_INLINE void pstoreu(float* to, const Packet4f &from)
{
vec_xst(from, 0, to);
}
void convert(Index rows, float*src, float *result)
{
for(Index i = 0; i + 4 <= rows; i+=4) {
Packet4f r32_0 = ploadu(src + i + 0);
pstoreu(result + i + 0, r32_0);
}
}
And the output (with notation on the lines in question)
cmpldi 0,3,3
blelr 0
addi 3,3,-4 <- i = rows - 4
li 9,0
srdi 3,3,2 <- i >>= 2
addi 8,3,1 <- i = i + 1
andi. 7,8,0x3
mr 10,8
beq 0,.L10
cmpdi 0,7,1
beq 0,.L14
cmpdi 0,7,2
beq 0,.L15
lxv 0,0(4)
mr 8,3
li 9,16
stxv 0,0(5)
.L15:
lxvx 0,4,9
addi 8,8,-1
stxvx 0,5,9
addi 9,9,16
.L14:
lxvx 0,4,9
cmpdi 0,8,1
stxvx 0,5,9
addi 9,9,16
beqlr 0
.L10:
srdi 10,10,2
mtctr 10
.L3:
lxvx 0,4,9
addi 10,9,16
addi 7,9,32
addi 8,9,48
stxvx 0,5,9
lxvx 0,4,10
addi 9,9,64
stxvx 0,5,10
lxvx 0,4,7
stxvx 0,5,7
lxvx 0,4,8
stxvx 0,5,8
bdnz .L3
blr
In this case the 3 lines notated can be replaced a simple `srdi 8,3,2`