Author: rksimon Date: Tue Nov 8 11:01:05 2016 New Revision: 286251 URL: http://llvm.org/viewvc/llvm-project?rev=286251&view=rev Log: [3.9.1] Merging r283070 - [X86][AVX] Ensure broadcast loads respect dependencies
To allow broadcast loads of a non-zero'th vector element, lowerVectorShuffleAsBroadcast can replace a load with a new load with an adjusted address, but unfortunately we weren't ensuring that the new load respected the same dependencies. This patch adds a TokenFactor and updates all dependencies of the old load to reference the new load instead. Bug found during internal testing. Differential Revision: https://reviews.llvm.org/D25039 As discussed on PR30596 Modified: llvm/branches/release_39/ (props changed) llvm/branches/release_39/lib/Target/X86/X86ISelLowering.cpp llvm/branches/release_39/test/CodeGen/X86/avx-vbroadcast.ll Propchange: llvm/branches/release_39/ ------------------------------------------------------------------------------ --- svn:mergeinfo (original) +++ svn:mergeinfo Tue Nov 8 11:01:05 2016 @@ -1,3 +1,3 @@ /llvm/branches/Apple/Pertwee:110850,110961 /llvm/branches/type-system-rewrite:133420-134817 -/llvm/trunk:155241,275868-275870,275879,275898,275928,275935,275946,275978,275981,276015,276051,276077,276109,276119,276181,276209,276236-276237,276358,276364,276368,276389,276435,276438,276479,276510,276648,276676,276712,276740,276823,276956,276980,277093,277114,277135,277371,277399,277500,277504,277625,277691,277693,277773,278002,278086,278133,278157,278343,278370,278413,278558-278559,278562,278569,278571,278573,278575,278584,278841,278900,278938,278999,279125,279268,279369,279647,280837,281957,282613,283129 +/llvm/trunk:155241,275868-275870,275879,275898,275928,275935,275946,275978,275981,276015,276051,276077,276109,276119,276181,276209,276236-276237,276358,276364,276368,276389,276435,276438,276479,276510,276648,276676,276712,276740,276823,276956,276980,277093,277114,277135,277371,277399,277500,277504,277625,277691,277693,277773,278002,278086,278133,278157,278343,278370,278413,278558-278559,278562,278569,278571,278573,278575,278584,278841,278900,278938,278999,279125,279268,279369,279647,280837,281957,282613,283070,283129 Modified: llvm/branches/release_39/lib/Target/X86/X86ISelLowering.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_39/lib/Target/X86/X86ISelLowering.cpp?rev=286251&r1=286250&r2=286251&view=diff ============================================================================== --- llvm/branches/release_39/lib/Target/X86/X86ISelLowering.cpp (original) +++ llvm/branches/release_39/lib/Target/X86/X86ISelLowering.cpp Tue Nov 8 11:01:05 2016 @@ -8656,6 +8656,17 @@ static SDValue lowerVectorShuffleAsBroad V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr, DAG.getMachineFunction().getMachineMemOperand( Ld->getMemOperand(), Offset, SVT.getStoreSize())); + + // Make sure the newly-created LOAD is in the same position as Ld in + // terms of dependency. We create a TokenFactor for Ld and V, + // and update uses of Ld's output chain to use the TokenFactor. + if (Ld->hasAnyUseOfValue(1)) { + SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, + SDValue(Ld, 1), SDValue(V.getNode(), 1)); + DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), NewChain); + DAG.UpdateNodeOperands(NewChain.getNode(), SDValue(Ld, 1), + SDValue(V.getNode(), 1)); + } } else if (!BroadcastFromReg) { // We can't broadcast from a vector register. return SDValue(); Modified: llvm/branches/release_39/test/CodeGen/X86/avx-vbroadcast.ll URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_39/test/CodeGen/X86/avx-vbroadcast.ll?rev=286251&r1=286250&r2=286251&view=diff ============================================================================== --- llvm/branches/release_39/test/CodeGen/X86/avx-vbroadcast.ll (original) +++ llvm/branches/release_39/test/CodeGen/X86/avx-vbroadcast.ll Tue Nov 8 11:01:05 2016 @@ -548,38 +548,40 @@ define <4 x double> @splat_concat4(doubl } ; -; FIXME: When VBROADCAST replaces an existing load, ensure it still respects lifetime dependencies. +; When VBROADCAST replaces an existing load, ensure it still respects lifetime dependencies. ; define float @broadcast_lifetime() nounwind { ; X32-LABEL: broadcast_lifetime: ; X32: ## BB#0: ; X32-NEXT: pushl %esi -; X32-NEXT: subl $40, %esp +; X32-NEXT: subl $56, %esp ; X32-NEXT: leal {{[0-9]+}}(%esp), %esi ; X32-NEXT: movl %esi, (%esp) ; X32-NEXT: calll _gfunc +; X32-NEXT: vbroadcastss {{[0-9]+}}(%esp), %xmm0 +; X32-NEXT: vmovaps %xmm0, {{[0-9]+}}(%esp) ## 16-byte Spill ; X32-NEXT: movl %esi, (%esp) ; X32-NEXT: calll _gfunc ; X32-NEXT: vbroadcastss {{[0-9]+}}(%esp), %xmm0 -; X32-NEXT: vbroadcastss {{[0-9]+}}(%esp), %xmm1 -; X32-NEXT: vsubss %xmm0, %xmm1, %xmm0 +; X32-NEXT: vsubss {{[0-9]+}}(%esp), %xmm0, %xmm0 ## 16-byte Folded Reload ; X32-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp) ; X32-NEXT: flds {{[0-9]+}}(%esp) -; X32-NEXT: addl $40, %esp +; X32-NEXT: addl $56, %esp ; X32-NEXT: popl %esi ; X32-NEXT: retl ; ; X64-LABEL: broadcast_lifetime: ; X64: ## BB#0: -; X64-NEXT: subq $24, %rsp +; X64-NEXT: subq $40, %rsp ; X64-NEXT: leaq (%rsp), %rdi ; X64-NEXT: callq _gfunc +; X64-NEXT: vbroadcastss {{[0-9]+}}(%rsp), %xmm0 +; X64-NEXT: vmovaps %xmm0, {{[0-9]+}}(%rsp) ## 16-byte Spill ; X64-NEXT: leaq (%rsp), %rdi ; X64-NEXT: callq _gfunc ; X64-NEXT: vbroadcastss {{[0-9]+}}(%rsp), %xmm0 -; X64-NEXT: vbroadcastss {{[0-9]+}}(%rsp), %xmm1 -; X64-NEXT: vsubss %xmm0, %xmm1, %xmm0 -; X64-NEXT: addq $24, %rsp +; X64-NEXT: vsubss {{[0-9]+}}(%rsp), %xmm0, %xmm0 ## 16-byte Folded Reload +; X64-NEXT: addq $40, %rsp ; X64-NEXT: retq %1 = alloca <4 x float>, align 16 %2 = alloca <4 x float>, align 16 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits