Issue |
145276
|
Summary |
Some `shufflevector`s should emit as a single `shld`/`shrd` instruction
|
Labels |
new issue
|
Assignees |
|
Reporter |
Validark
|
[Zig Godbolt](https://zig.godbo.lt/#g:!((g:!((g:!((h:codeEditor,i:(filename:'1',fontScale:14,fontUsePx:'0',j:1,lang:zig,selection:(endColumn:1,endLineNumber:11,positionColumn:1,positionLineNumber:11,selectionStartColumn:1,selectionStartLineNumber:1,startColumn:1,startLineNumber:1),source:'const+std+%3D+@import(%22std%22)%3B%0A%0Aexport+fn+foo(a:+@Vector(8,+u8),+b:+@Vector(8,+u8))+@Vector(8,+u8)+%7B%0A++++return+@shuffle(u8,+a,+b,+%5B_%5Di32%7B+-8,+0,+1,+2,+3,+4,+5,+6+%7D)%3B%0A%7D%0A%0Aexport+fn+bar(a:+@Vector(8,+u8),+b:+@Vector(8,+u8))+@Vector(8,+u8)+%7B%0A++++const+i+%3D+8%3B%0A++++return+@bitCast((@as(u64,+@bitCast(a))+%3C%3C+i)+%7C+(@as(u64,+@bitCast(b))+%3E%3E+(63+-+i+%2B+1)))%3B%0A%7D%0A'),l:'5',n:'0',o:'Zig+source+%231',t:'0')),k:51.84055022764701,l:'4',n:'0',o:'',s:0,t:'0'),(g:!((g:!((h:compiler,i:(compiler:ztrunk,filters:(b:'0',binary:'1',binaryObject:'1',commentOnly:'0',debugCalls:'1',demangle:'0',directives:'0',execute:'1',intel:'0',libraryCode:'0',trim:'1',verboseDemangling:'0'),flagsViewOpen:'1',fontScale:14,fontUsePx:'0',j:1,lang:zig,libs:!(),options:'-OReleaseFast+-target+x86_64-linux+-mcpu%3Dznver5',overrides:!(),selection:(endColumn:1,endLineNumber:1,positionColumn:1,positionLineNumber:1,selectionStartColumn:1,selectionStartLineNumber:1,startColumn:1,startLineNumber:1),source:1),l:'5',n:'0',o:'+zig+trunk+(Editor+%231)',t:'0')),k:48.15944977235301,l:'4',m:50,n:'0',o:'',s:0,t:'0'),(g:!((h:ir,i:('-fno-discard-value-names':'0',compilerName:'zig+trunk',demangle-symbols:'0',editorid:1,filter-attributes:'0',filter-comments:'0',filter-debug-info:'0',filter-instruction-metadata:'0',fontScale:14,fontUsePx:'0',j:1,selection:(endColumn:2,endLineNumber:5,positionColumn:1,positionLineNumber:1,selectionStartColumn:2,selectionStartLineNumber:5,startColumn:1,startLineNumber:1),treeid:0,wrap:'1'),l:'5',n:'0',o:'LLVM+IR+Viewer+zig+trunk+(Editor+%231,+Compiler+%231)',t:'0')),header:(),l:'4',m:50,n:'0',o:'',s:0,t:'0')),k:48.15944977235301,l:'3',n:'0',o:'',t:'0')),l:'2',n:'0',o:'',t:'0')),version:4) [LLVM Godbolt](https://llvm.godbo.lt/#g:!((g:!((g:!((h:codeEditor,i:(filename:'1',fontScale:14,fontUsePx:'0',j:1,lang:llvm,selection:(endColumn:21,endLineNumber:3,positionColumn:21,positionLineNumber:3,selectionStartColumn:8,selectionStartLineNumber:3,startColumn:8,startLineNumber:3),source:'define+dso_local+%3C8+x+i8%3E+@foo(%3C8+x+i8%3E+%250,+%3C8+x+i8%3E+%251)+local_unnamed_addr+%7B%0AEntry:%0A++%252+%3D+shufflevector+%3C8+x+i8%3E+%250,+%3C8+x+i8%3E+%251,+%3C8+x+i32%3E+%3Ci32+15,+i32+0,+i32+1,+i32+2,+i32+3,+i32+4,+i32+5,+i32+6%3E%0A++ret+%3C8+x+i8%3E+%252%0A%7D'),l:'5',n:'0',o:'LLVM+IR+source+%231',t:'0')),k:50,l:'4',n:'0',o:'',s:0,t:'0'),(g:!((h:compiler,i:(compiler:llctrunk,filters:(b:'0',binary:'1',binaryObject:'1',commentOnly:'0',debugCalls:'1',demangle:'0',directives:'0',execute:'1',intel:'0',libraryCode:'0',trim:'1',verboseDemangling:'0'),flagsViewOpen:'1',fontScale:14,fontUsePx:'0',j:1,lang:llvm,libs:!(),options:'-O3+--mtriple%3Dx86_64-linux+-mcpu%3Dznver5',overrides:!(),selection:(endColumn:1,endLineNumber:1,positionColumn:1,positionLineNumber:1,selectionStartColumn:1,selectionStartLineNumber:1,startColumn:1,startLineNumber:1),source:1),l:'5',n:'0',o:'+llc+(trunk)+(Editor+%231)',t:'0')),k:50,l:'4',n:'0',o:'',s:0,t:'0')),l:'2',m:100,n:'0',o:'',t:'0')),version:4)
This code:
```llvm
define dso_local <8 x i8> @foo(<8 x i8> %0, <8 x i8> %1) local_unnamed_addr {
Entry:
%2 = shufflevector <8 x i8> %0, <8 x i8> %1, <8 x i32> <i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6>
ret <8 x i8> %2
}
```
Compiles to the following on znver5:
```asm
.LCPI0_0:
.byte 15 # 0xf
.byte 0 # 0x0
.byte 2 # 0x2
.byte 4 # 0x4
.byte 6 # 0x6
.byte 8 # 0x8
.byte 10 # 0xa
.byte 12 # 0xc
.zero 1
.zero 1
.zero 1
.zero 1
.zero 1
.zero 1
.zero 1
.zero 1
foo: # @foo
vpunpcklbw xmm0, xmm0, xmm1 # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
vpshufb xmm0, xmm0, xmmword ptr [rip + .LCPI0_0] # xmm0 = xmm0[15,0,2,4,6,8,10,12,u,u,u,u,u,u,u,u]
ret
```
Zig trunk for some reason ends up with the following:
```asm
.LCPI0_0:
.byte 23
.byte 0
.byte 1
.byte 2
.byte 3
.byte 4
.byte 5
.byte 6
foo:
vpbroadcastq xmm2, qword ptr [rip + .LCPI0_0]
vpermt2b xmm0, xmm2, xmm1
ret
```
It should be:
```asm
foo:
vpshldq xmm0, xmm0, xmm1, 8
ret
```
_______________________________________________
llvm-bugs mailing list
llvm-bugs@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs