Thank you Oleh!

Upon checking the generated assembly, I'm noticing that it's executing 
against AVX2 but not using YMM registers at all. Would you know why?
I've compiled it using -O2.

*AddVec4sProper_avx2:*
00007FF6F9E167E0  test        r8d,r8d  
00007FF6F9E167E3  jle         AddVec4sProper_avx2+0B9h (07FF6F9E16899h)  
00007FF6F9E167E9  lea         eax,[r8-1]  
00007FF6F9E167ED  mov         r9d,r8d  
00007FF6F9E167F0  and         r9d,3  
00007FF6F9E167F4  cmp         eax,3  
00007FF6F9E167F7  jae         AddVec4sProper_avx2+26h (07FF6F9E16806h)  
00007FF6F9E167F9  xor         r10d,r10d  
00007FF6F9E167FC  test        r9d,r9d  
00007FF6F9E167FF  jne         AddVec4sProper_avx2+90h (07FF6F9E16870h)  
00007FF6F9E16801  jmp         AddVec4sProper_avx2+0B9h (07FF6F9E16899h)  
00007FF6F9E16806  sub         r8d,r9d  
00007FF6F9E16809  mov         eax,30h  
00007FF6F9E1680E  xor         r10d,r10d  
00007FF6F9E16811  vmovss      xmm0,dword ptr [__real@3f800000 
(07FF6FA0EAF20h)]  
00007FF6F9E16819  nop         dword ptr [rax]  
00007FF6F9E16820  vmovaps     xmm1,xmmword ptr [rdx+rax-30h]  
00007FF6F9E16826  vaddss      xmm1,xmm1,xmm0  
00007FF6F9E1682A  vmovaps     xmmword ptr [rcx+rax-30h],xmm1  
00007FF6F9E16830  vmovaps     xmm1,xmmword ptr [rdx+rax-20h]  
00007FF6F9E16836  vaddss      xmm1,xmm1,xmm0  
00007FF6F9E1683A  vmovaps     xmmword ptr [rcx+rax-20h],xmm1  
00007FF6F9E16840  vmovaps     xmm1,xmmword ptr [rdx+rax-10h]  
00007FF6F9E16846  vaddss      xmm1,xmm1,xmm0  
00007FF6F9E1684A  vmovaps     xmmword ptr [rcx+rax-10h],xmm1  
00007FF6F9E16850  vmovaps     xmm1,xmmword ptr [rdx+rax]  
00007FF6F9E16855  vaddss      xmm1,xmm1,xmm0  
00007FF6F9E16859  vmovaps     xmmword ptr [rcx+rax],xmm1  
00007FF6F9E1685E  add         r10,4  
00007FF6F9E16862  add         rax,40h  
00007FF6F9E16866  cmp         r8d,r10d  
00007FF6F9E16869  jne         AddVec4sProper_avx2+40h (07FF6F9E16820h)  
00007FF6F9E1686B  test        r9d,r9d  
00007FF6F9E1686E  je          AddVec4sProper_avx2+0B9h (07FF6F9E16899h)  
00007FF6F9E16870  shl         r10,4  
00007FF6F9E16874  neg         r9d  
00007FF6F9E16877  vmovss      xmm0,dword ptr [__real@3f800000 
(07FF6FA0EAF20h)]  
00007FF6F9E1687F  nop  
00007FF6F9E16880  vmovaps     xmm1,xmmword ptr [rdx+r10]  
00007FF6F9E16886  vaddss      xmm1,xmm1,xmm0  
00007FF6F9E1688A  vmovaps     xmmword ptr [rcx+r10],xmm1  
00007FF6F9E16890  add         r10,10h  
00007FF6F9E16894  inc         r9d  
00007FF6F9E16897  jne         AddVec4sProper_avx2+0A0h (07FF6F9E16880h)  
00007FF6F9E16899  ret  
00007FF6F9E1689A  nop         word ptr [rax+rax]  

On Saturday, March 21, 2020 at 12:21:43 PM UTC-5, Oleh Nechaev wrote:
>
> struct Vector4SOA
> {
>     float<4> V;
> };
>
> export void Test(uniform Vector4SOA outs[], uniform Vector4SOA ins[], 
> uniform int count)
> {
>     for (uniform int i=0; i< count ; ++i)
>     {
>         uniform Vector4SOA vv = ins[i];
>         vv.V.x++; // builtin access by x y z w and r g b a
>         outs[i] = vv;
>     }
> }
>  
>

-- 
You received this message because you are subscribed to the Google Groups 
"Intel SPMD Program Compiler Users" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
To view this discussion on the web visit 
https://groups.google.com/d/msgid/ispc-users/cf0af12b-d84a-463c-be09-c8b35b3baf81%40googlegroups.com.

Reply via email to