[llvm-branch-commits] [llvm] 3286126 - Add flang to export.sh to it gets source tarballs in releases
Author: Hans Wennborg Date: 2020-07-31T17:23:43+02:00 New Revision: 3286126de175755a887e7fa335583811075be4f7 URL: https://github.com/llvm/llvm-project/commit/3286126de175755a887e7fa335583811075be4f7 DIFF: https://github.com/llvm/llvm-project/commit/3286126de175755a887e7fa335583811075be4f7.diff LOG: Add flang to export.sh to it gets source tarballs in releases (cherry picked from commit 9853786ce39b9510eeb2688baaef7a364d58e113) Added: Modified: llvm/utils/release/export.sh Removed: diff --git a/llvm/utils/release/export.sh b/llvm/utils/release/export.sh index 02a77afd0533..c3277de38b53 100755 --- a/llvm/utils/release/export.sh +++ b/llvm/utils/release/export.sh @@ -13,7 +13,7 @@ set -e -projects="llvm clang test-suite compiler-rt libcxx libcxxabi clang-tools-extra polly lldb lld openmp libunwind" +projects="llvm clang test-suite compiler-rt libcxx libcxxabi clang-tools-extra polly lldb lld openmp libunwind flang" release="" rc="" ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] fd2d5a0 - [AArch64][SVE] Correctly allocate scavenging slot in presence of SVE.
Author: Sander de Smalen
Date: 2020-07-31T17:27:46+02:00
New Revision: fd2d5a0c4cdc9ccb0b88f264ae452e3a0e8dcc09
URL:
https://github.com/llvm/llvm-project/commit/fd2d5a0c4cdc9ccb0b88f264ae452e3a0e8dcc09
DIFF:
https://github.com/llvm/llvm-project/commit/fd2d5a0c4cdc9ccb0b88f264ae452e3a0e8dcc09.diff
LOG: [AArch64][SVE] Correctly allocate scavenging slot in presence of SVE.
This patch addresses two issues:
* Forces the availability of the base-pointer (x19) when the frame has
both scalable vectors and variable-length arrays. Otherwise it will
be expensive to access non-SVE locals.
* In presence of SVE stack objects, it will allocate the emergency
scavenging slot close to the SP, so that they can be accessed from
the SP or BP if available. If accessed from the frame-pointer, it will
otherwise need an extra register to access the scavenging slot because
of mixed scalable/non-scalable addressing modes.
Reviewers: efriedma, ostannard, cameron.mcinally, rengolin, david-arm
Reviewed By: efriedma
Differential Revision: https://reviews.llvm.org/D70174
(cherry picked from commit bef56f7fe2382ed1476aa67a55626b364635b44e)
Added:
llvm/test/CodeGen/AArch64/framelayout-scavengingslot.mir
llvm/test/CodeGen/AArch64/framelayout-sve-basepointer.mir
llvm/test/CodeGen/AArch64/framelayout-sve-scavengingslot.mir
Modified:
llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
llvm/test/CodeGen/AArch64/framelayout-sve.mir
Removed:
diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
index de1ae4759210..83a488afc797 100644
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -353,6 +353,15 @@ bool AArch64RegisterInfo::hasBasePointer(const
MachineFunction &MF) const {
if (MFI.hasVarSizedObjects() || MF.hasEHFunclets()) {
if (needsStackRealignment(MF))
return true;
+
+if (MF.getSubtarget().hasSVE()) {
+ const AArch64FunctionInfo *AFI = MF.getInfo();
+ // Frames that have variable sized objects and scalable SVE objects,
+ // should always use a basepointer.
+ if (!AFI->hasCalculatedStackSizeSVE() || AFI->getStackSizeSVE())
+return true;
+}
+
// Conservatively estimate whether the negative offset from the frame
// pointer will be sufficient to reach. If a function has a smallish
// frame, it's less likely to have lots of spills and callee saved
@@ -389,8 +398,15 @@ AArch64RegisterInfo::useFPForScavengingIndex(const
MachineFunction &MF) const {
// (closer to SP).
//
// The beginning works most reliably if we have a frame pointer.
+ // In the presence of any non-constant space between FP and locals,
+ // (e.g. in case of stack realignment or a scalable SVE area), it is
+ // better to use SP or BP.
const AArch64FrameLowering &TFI = *getFrameLowering(MF);
- return TFI.hasFP(MF);
+ const AArch64FunctionInfo *AFI = MF.getInfo();
+ assert((!MF.getSubtarget().hasSVE() ||
+ AFI->hasCalculatedStackSizeSVE()) &&
+ "Expected SVE area to be calculated by this point");
+ return TFI.hasFP(MF) && !needsStackRealignment(MF) &&
!AFI->getStackSizeSVE();
}
bool AArch64RegisterInfo::requiresFrameIndexScavenging(
diff --git a/llvm/test/CodeGen/AArch64/framelayout-scavengingslot.mir
b/llvm/test/CodeGen/AArch64/framelayout-scavengingslot.mir
new file mode 100644
index ..d1252435f874
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/framelayout-scavengingslot.mir
@@ -0,0 +1,27 @@
+# RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass=prologepilog %s -o - |
FileCheck %s
+---
+# This test verifies that the emergency scavenging slot is located near
+# the SP when the stack is realigned.
+name: LateScavengingSlotRealignment
+# CHECK-LABEL: name: LateScavengingSlotRealignment
+# CHECK: bb.0:
+# CHECK: STRXui killed $[[SCRATCH:x[0-9]+]], $sp, 3
+# CHECK-NEXT: $[[SCRATCH]] = ADDXri $sp, 40, 0
+# CHECK-NEXT: STRXui $x0, killed $[[SCRATCH]], 4095
+# CHECK-NEXT: $[[SCRATCH]] = LDRXui $sp, 3
+# CHECK: bb.1:
+tracksRegLiveness: true
+frameInfo:
+ isFrameAddressTaken: true
+stack:
+ - { id: 0, size:16, alignment: 16 }
+ - { id: 1, size: 32768, alignment: 32 }
+body: |
+ bb.0:
+liveins: $x0, $x8
+STRXui $x0, %stack.0, 0
+B %bb.1
+ bb.1:
+liveins: $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10, $x11, $x12,
$x13, $x14, $x15, $x16, $x17, $x18, $x19, $x20, $x21, $x22, $x23, $x24, $x25,
$x26, $x27, $x28, $lr
+RET_ReallyLR implicit $x19, implicit $x20, implicit $x21, implicit $x22,
implicit $x23, implicit $x24, implicit $x25, implicit $x26, implicit $x27,
implicit $x28, implicit $lr
+...
diff --git a/llvm/test/CodeGen/AArch64/framelayout-sve-basepointer.mir
b/llvm/test/CodeGen/AArch64/framelayout-sve-basepointer.mir
new file mode 100644
index ..a366744d8fa2
--- /de
[llvm-branch-commits] [llvm] 5596693 - [AArch64][SVE] Don't align the last SVE callee save.
Author: Sander de Smalen
Date: 2020-07-31T17:27:48+02:00
New Revision: 5596693504af263d53d6676ec6f0f4669ac706b0
URL:
https://github.com/llvm/llvm-project/commit/5596693504af263d53d6676ec6f0f4669ac706b0
DIFF:
https://github.com/llvm/llvm-project/commit/5596693504af263d53d6676ec6f0f4669ac706b0.diff
LOG: [AArch64][SVE] Don't align the last SVE callee save.
Instead of aligning the last callee-saved-register slot to the stack
alignment (16 bytes), just align the SVE callee-saved block. This also
simplifies the code that allocates space for the callee-saves.
This change is needed to make sure the offset to which the callee-saved
register is spilled, corresponds to the offset used for e.g. unwind call
frame instructions.
Reviewers: efriedma, paulwalker-arm, david-arm, rengolin
Reviewed By: efriedma
Differential Revision: https://reviews.llvm.org/D84042
(cherry picked from commit 26b4ef3694973ea2fa656d3d3a7f67f16f135654)
Added:
Modified:
llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
llvm/test/CodeGen/AArch64/framelayout-sve.mir
llvm/test/CodeGen/AArch64/sve-trunc.ll
Removed:
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index cc563dd70632..1b49c692f293 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -1192,7 +1192,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction
&MF,
// Process the SVE callee-saves to determine what space needs to be
// allocated.
- if (AFI->getSVECalleeSavedStackSize()) {
+ if (int64_t CalleeSavedSize = AFI->getSVECalleeSavedStackSize()) {
// Find callee save instructions in frame.
CalleeSavesBegin = MBBI;
assert(IsSVECalleeSave(CalleeSavesBegin) && "Unexpected instruction");
@@ -1200,11 +1200,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction
&MF,
++MBBI;
CalleeSavesEnd = MBBI;
-int64_t OffsetToFirstCalleeSaveFromSP =
-MFI.getObjectOffset(AFI->getMaxSVECSFrameIndex());
-StackOffset OffsetToCalleeSavesFromSP =
-StackOffset(OffsetToFirstCalleeSaveFromSP, MVT::nxv1i8) + SVEStackSize;
-AllocateBefore -= OffsetToCalleeSavesFromSP;
+AllocateBefore = {CalleeSavedSize, MVT::nxv1i8};
AllocateAfter = SVEStackSize - AllocateBefore;
}
@@ -1582,7 +1578,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction
&MF,
// deallocated.
StackOffset DeallocateBefore = {}, DeallocateAfter = SVEStackSize;
MachineBasicBlock::iterator RestoreBegin = LastPopI, RestoreEnd = LastPopI;
- if (AFI->getSVECalleeSavedStackSize()) {
+ if (int64_t CalleeSavedSize = AFI->getSVECalleeSavedStackSize()) {
RestoreBegin = std::prev(RestoreEnd);;
while (IsSVECalleeSave(RestoreBegin) &&
RestoreBegin != MBB.begin())
@@ -1592,12 +1588,9 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction
&MF,
assert(IsSVECalleeSave(RestoreBegin) &&
IsSVECalleeSave(std::prev(RestoreEnd)) && "Unexpected instruction");
-int64_t OffsetToFirstCalleeSaveFromSP =
-MFI.getObjectOffset(AFI->getMaxSVECSFrameIndex());
-StackOffset OffsetToCalleeSavesFromSP =
-StackOffset(OffsetToFirstCalleeSaveFromSP, MVT::nxv1i8) + SVEStackSize;
-DeallocateBefore = OffsetToCalleeSavesFromSP;
-DeallocateAfter = SVEStackSize - DeallocateBefore;
+StackOffset CalleeSavedSizeAsOffset = {CalleeSavedSize, MVT::nxv1i8};
+DeallocateBefore = SVEStackSize - CalleeSavedSizeAsOffset;
+DeallocateAfter = CalleeSavedSizeAsOffset;
}
// Deallocate the SVE area.
@@ -2612,9 +2605,6 @@ static int64_t
determineSVEStackObjectOffsets(MachineFrameInfo &MFI,
// Then process all callee saved slots.
if (getSVECalleeSaveSlotRange(MFI, MinCSFrameIndex, MaxCSFrameIndex)) {
-// Make sure to align the last callee save slot.
-MFI.setObjectAlignment(MaxCSFrameIndex, Align(16));
-
// Assign offsets to the callee save slots.
for (int I = MinCSFrameIndex; I <= MaxCSFrameIndex; ++I) {
Offset += MFI.getObjectSize(I);
@@ -2624,6 +2614,9 @@ static int64_t
determineSVEStackObjectOffsets(MachineFrameInfo &MFI,
}
}
+ // Ensure that the Callee-save area is aligned to 16bytes.
+ Offset = alignTo(Offset, Align(16U));
+
// Create a buffer of SVE objects to allocate and sort it.
SmallVector ObjectsToAllocate;
for (int I = 0, E = MFI.getObjectIndexEnd(); I != E; ++I) {
diff --git a/llvm/test/CodeGen/AArch64/framelayout-sve.mir
b/llvm/test/CodeGen/AArch64/framelayout-sve.mir
index 575c839fbd15..75a65a6ad522 100644
--- a/llvm/test/CodeGen/AArch64/framelayout-sve.mir
+++ b/llvm/test/CodeGen/AArch64/framelayout-sve.mir
@@ -573,7 +573,7 @@ body: |
# CHECK-NEXT: stack-id: sve-vec, callee-saved-register: '$z23',
# CHECK:- { id: 8, name: '', type: spill-slot, offset: -34, size:
[llvm-branch-commits] [llvm] 75e5d4f - [CodeGen] Remove calls to getVectorNumElements in DAGTypeLegalizer::SplitVecOp_EXTRACT_SUBVECTOR
Author: David Sherwood
Date: 2020-07-31T17:27:49+02:00
New Revision: 75e5d4f42ad9556f5d53bb2984a23082d6c6a830
URL:
https://github.com/llvm/llvm-project/commit/75e5d4f42ad9556f5d53bb2984a23082d6c6a830
DIFF:
https://github.com/llvm/llvm-project/commit/75e5d4f42ad9556f5d53bb2984a23082d6c6a830.diff
LOG: [CodeGen] Remove calls to getVectorNumElements in
DAGTypeLegalizer::SplitVecOp_EXTRACT_SUBVECTOR
In DAGTypeLegalizer::SplitVecOp_EXTRACT_SUBVECTOR I have replaced
calls to getVectorNumElements with getVectorMinNumElements, since
this code path works for both fixed and scalable vector types. For
scalable vectors the index will be multiplied by VSCALE.
Fixes warnings in this test:
sve-sext-zext.ll
Differential revision: https://reviews.llvm.org/D83198
(cherry picked from commit 5d84eafc6b86a42e261af8d753c3a823e0e7c67e)
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
Removed:
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 1394f084c6dc..6963de2e5029 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -2197,13 +2197,19 @@ SDValue
DAGTypeLegalizer::SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N) {
SDValue Idx = N->getOperand(1);
SDLoc dl(N);
SDValue Lo, Hi;
+
+ if (SubVT.isScalableVector() !=
+ N->getOperand(0).getValueType().isScalableVector())
+report_fatal_error("Extracting a fixed-length vector from an illegal "
+ "scalable vector is not yet supported");
+
GetSplitVector(N->getOperand(0), Lo, Hi);
- uint64_t LoElts = Lo.getValueType().getVectorNumElements();
+ uint64_t LoElts = Lo.getValueType().getVectorMinNumElements();
uint64_t IdxVal = cast(Idx)->getZExtValue();
if (IdxVal < LoElts) {
-assert(IdxVal + SubVT.getVectorNumElements() <= LoElts &&
+assert(IdxVal + SubVT.getVectorMinNumElements() <= LoElts &&
"Extracted subvector crosses vector split!");
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Lo, Idx);
} else {
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] db30641 - [NFC][AArch64] Replace some template methods/invocations...
Author: Francesco Petrogalli
Date: 2020-07-31T17:27:52+02:00
New Revision: db306412bf65f4b6fa4314dd5611752448bbc80c
URL:
https://github.com/llvm/llvm-project/commit/db306412bf65f4b6fa4314dd5611752448bbc80c
DIFF:
https://github.com/llvm/llvm-project/commit/db306412bf65f4b6fa4314dd5611752448bbc80c.diff
LOG: [NFC][AArch64] Replace some template methods/invocations...
...with the non-template version, as the template version might
increase the size of the compiler build.
Methods affected:
1.`findAddrModeSVELoadStore`
2. `SelectPredicatedStore`
Also, remove the `const` qualifier from the `unsigned` parameters of
the methods to conform with other similar methods in the class.
(cherry picked from commit dbeb184b7f54db2d3ef20ac153b1c77f81cf0b99)
Added:
Modified:
llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
Removed:
diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 10c477853353..dbd7db7ee8e6 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -262,14 +262,12 @@ class AArch64DAGToDAGISel : public SelectionDAGISel {
void SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc);
void SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
void SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
- template
- void SelectPredicatedStore(SDNode *N, unsigned NumVecs, const unsigned
Opc_rr,
- const unsigned Opc_ri);
- template
+ void SelectPredicatedStore(SDNode *N, unsigned NumVecs, unsigned Scale,
+ unsigned Opc_rr, unsigned Opc_ri);
std::tuple
- findAddrModeSVELoadStore(SDNode *N, const unsigned Opc_rr,
- const unsigned Opc_ri, const SDValue &OldBase,
- const SDValue &OldOffset);
+ findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr, unsigned Opc_ri,
+ const SDValue &OldBase, const SDValue &OldOffset,
+ unsigned Scale);
bool tryBitfieldExtractOp(SDNode *N);
bool tryBitfieldExtractOpFromSExt(SDNode *N);
@@ -1414,12 +1412,12 @@ void AArch64DAGToDAGISel::SelectPostLoad(SDNode *N,
unsigned NumVecs,
/// Optimize \param OldBase and \param OldOffset selecting the best addressing
/// mode. Returns a tuple consisting of an Opcode, an SDValue representing the
/// new Base and an SDValue representing the new offset.
-template
std::tuple
-AArch64DAGToDAGISel::findAddrModeSVELoadStore(SDNode *N, const unsigned Opc_rr,
- const unsigned Opc_ri,
+AArch64DAGToDAGISel::findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr,
+ unsigned Opc_ri,
const SDValue &OldBase,
- const SDValue &OldOffset) {
+ const SDValue &OldOffset,
+ unsigned Scale) {
SDValue NewBase = OldBase;
SDValue NewOffset = OldOffset;
// Detect a possible Reg+Imm addressing mode.
@@ -1429,7 +1427,7 @@ AArch64DAGToDAGISel::findAddrModeSVELoadStore(SDNode *N,
const unsigned Opc_rr,
// Detect a possible reg+reg addressing mode, but only if we haven't already
// detected a Reg+Imm one.
const bool IsRegReg =
- !IsRegImm && SelectSVERegRegAddrMode(OldBase, NewBase, NewOffset);
+ !IsRegImm && SelectSVERegRegAddrMode(OldBase, Scale, NewBase, NewOffset);
// Select the instruction.
return std::make_tuple(IsRegReg ? Opc_rr : Opc_ri, NewBase, NewOffset);
@@ -1479,10 +1477,9 @@ void AArch64DAGToDAGISel::SelectStore(SDNode *N,
unsigned NumVecs,
ReplaceNode(N, St);
}
-template
void AArch64DAGToDAGISel::SelectPredicatedStore(SDNode *N, unsigned NumVecs,
-const unsigned Opc_rr,
-const unsigned Opc_ri) {
+unsigned Scale, unsigned
Opc_rr,
+unsigned Opc_ri) {
SDLoc dl(N);
// Form a REG_SEQUENCE to force register allocation.
@@ -1492,9 +1489,9 @@ void AArch64DAGToDAGISel::SelectPredicatedStore(SDNode
*N, unsigned NumVecs,
// Optimize addressing mode.
unsigned Opc;
SDValue Offset, Base;
- std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
+ std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
N, Opc_rr, Opc_ri, N->getOperand(NumVecs + 3),
- CurDAG->getTargetConstant(0, dl, MVT::i64));
+ CurDAG->getTargetConstant(0, dl, MVT::i64), Scale);
SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), // predicate
Base, // address
@@ -4085,63 +4082,51 @@ v
[llvm-branch-commits] [llvm] a3532c5 - [SVE] Don't use LocalStackAllocation for SVE objects
Author: David Sherwood
Date: 2020-07-31T17:27:49+02:00
New Revision: a3532c58be5c3a4107549c2462613be76507fe55
URL:
https://github.com/llvm/llvm-project/commit/a3532c58be5c3a4107549c2462613be76507fe55
DIFF:
https://github.com/llvm/llvm-project/commit/a3532c58be5c3a4107549c2462613be76507fe55.diff
LOG: [SVE] Don't use LocalStackAllocation for SVE objects
I have introduced a new TargetFrameLowering query function:
isStackIdSafeForLocalArea
that queries whether or not it is safe for objects of a given stack
id to be bundled into the local area. The default behaviour is to
always bundle regardless of the stack id, however for AArch64 this is
overriden so that it's only safe for fixed-size stack objects.
There is future work here to extend this algorithm for multiple local
areas so that SVE stack objects can be bundled together and accessed
from their own virtual base-pointer.
Differential Revision: https://reviews.llvm.org/D83859
(cherry picked from commit 14bc85e0ebb6c00c1672158ab6a692bfbb11e1cc)
Added:
llvm/test/CodeGen/AArch64/sve-localstackalloc.mir
Modified:
llvm/include/llvm/CodeGen/TargetFrameLowering.h
llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
llvm/lib/Target/AArch64/AArch64FrameLowering.h
Removed:
diff --git a/llvm/include/llvm/CodeGen/TargetFrameLowering.h
b/llvm/include/llvm/CodeGen/TargetFrameLowering.h
index c3a11b199675..d6580430daf7 100644
--- a/llvm/include/llvm/CodeGen/TargetFrameLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetFrameLowering.h
@@ -134,6 +134,12 @@ class TargetFrameLowering {
/// was called).
virtual unsigned getStackAlignmentSkew(const MachineFunction &MF) const;
+ /// This method returns whether or not it is safe for an object with the
+ /// given stack id to be bundled into the local area.
+ virtual bool isStackIdSafeForLocalArea(unsigned StackId) const {
+return true;
+ }
+
/// getOffsetOfLocalArea - This method returns the offset of the local area
/// from the stack pointer on entrance to a function.
///
diff --git a/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
b/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
index 6c5ef0255a08..204fb556d810 100644
--- a/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
+++ b/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
@@ -220,6 +220,8 @@ void
LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) {
continue;
if (StackProtectorFI == (int)i)
continue;
+ if (!TFI.isStackIdSafeForLocalArea(MFI.getStackID(i)))
+continue;
switch (MFI.getObjectSSPLayout(i)) {
case MachineFrameInfo::SSPLK_None:
@@ -254,6 +256,8 @@ void
LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) {
continue;
if (ProtectedObjs.count(i))
continue;
+if (!TFI.isStackIdSafeForLocalArea(MFI.getStackID(i)))
+ continue;
AdjustStackOffset(MFI, i, Offset, StackGrowsDown, MaxAlign);
}
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.h
b/llvm/lib/Target/AArch64/AArch64FrameLowering.h
index 9d0a6d9eaf25..444740cb50ab 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.h
@@ -105,6 +105,12 @@ class AArch64FrameLowering : public TargetFrameLowering {
}
}
+ bool isStackIdSafeForLocalArea(unsigned StackId) const override {
+// We don't support putting SVE objects into the pre-allocated local
+// frame block at the moment.
+return StackId != TargetStackID::SVEVector;
+ }
+
private:
bool shouldCombineCSRLocalStackBump(MachineFunction &MF,
uint64_t StackBumpBytes) const;
diff --git a/llvm/test/CodeGen/AArch64/sve-localstackalloc.mir
b/llvm/test/CodeGen/AArch64/sve-localstackalloc.mir
new file mode 100644
index ..c20846c54b6a
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-localstackalloc.mir
@@ -0,0 +1,61 @@
+# RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve -run-pass=localstackalloc
-o - %s | FileCheck %s
+
+--- |
+ ; ModuleID = ''
+ source_filename = ""
+ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+ target triple = "aarch64-unknown-linux-gnu"
+
+ define @insert_32i8_idx( %a, i8 %elt,
i64 %idx) #0 {
+%ins = insertelement %a, i8 %elt, i64 %idx
+ret %ins
+ }
+
+ attributes #0 = { "target-features"="+sve" }
+
+...
+---
+name:insert_32i8_idx
+alignment: 4
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: zpr, preferred-register: '' }
+ - { id: 1, class: zpr, preferred-register: '' }
+ - { id: 2, class: gpr32, preferred-register: '' }
+ - { id: 3, class: gpr64, preferred-register: '' }
+ - { id: 5, class: ppr_3b, preferred-register: '' }
+ - { id: 6, class: gpr64sp, preferred-register: '' }
+ - { id: 7, class: zpr, preferred-register: '' }
+ - { id: 8, class: zpr, preferred-register: '' }
+
[llvm-branch-commits] [llvm] 6b66be5 - [llvm][sve] Reg + Imm addressing mode for ld1ro.
Author: Francesco Petrogalli
Date: 2020-07-31T17:27:52+02:00
New Revision: 6b66be512110acb2dfdab00d9755d86e185f1e3d
URL:
https://github.com/llvm/llvm-project/commit/6b66be512110acb2dfdab00d9755d86e185f1e3d
DIFF:
https://github.com/llvm/llvm-project/commit/6b66be512110acb2dfdab00d9755d86e185f1e3d.diff
LOG: [llvm][sve] Reg + Imm addressing mode for ld1ro.
Reviewers: kmclaughlin, efriedma, sdesmalen
Subscribers: tschuett, hiraditya, psnobl, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D83357
(cherry picked from commit 809600d6642773f71245f76995dab355effc73af)
Added:
llvm/test/CodeGen/AArch64/sve-intrinsics-ld1ro-addressing-mode-reg-imm.ll
Modified:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/lib/Target/AArch64/AArch64InstrFormats.td
llvm/lib/Target/AArch64/SVEInstrFormats.td
Removed:
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 2c992c07fad9..1500da2fdfc7 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -12301,6 +12301,9 @@ static SDValue performLD1ReplicateCombine(SDNode *N,
SelectionDAG &DAG) {
"Unsupported opcode.");
SDLoc DL(N);
EVT VT = N->getValueType(0);
+ if (VT == MVT::nxv8bf16 &&
+ !static_cast(DAG.getSubtarget()).hasBF16())
+return SDValue();
EVT LoadVT = VT;
if (VT.isFloatingPoint())
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index 6df7970f4d82..4f4ba692c2db 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -495,6 +495,9 @@ def SImmS4XForm : SDNodeXFormgetTargetConstant(N->getSExtValue() / 16, SDLoc(N), MVT::i64);
}]>;
+def SImmS32XForm : SDNodeXFormgetTargetConstant(N->getSExtValue() / 32, SDLoc(N), MVT::i64);
+}]>;
// simm6sN predicate - True if the immediate is a multiple of N in the range
// [-32 * N, 31 * N].
@@ -546,7 +549,7 @@ def simm4s16 : Operand, ImmLeaf, ImmLeaf=-256 && Imm <= 224 && (Imm % 32) == 0x0; }]> {
+[{ return Imm >=-256 && Imm <= 224 && (Imm % 32) == 0x0; }], SImmS32XForm> {
let PrintMethod = "printImmScale<32>";
let ParserMatchClass = SImm4s32Operand;
let DecoderMethod = "DecodeSImm<4>";
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td
b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index a005d1e65abe..c56a65b9e212 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -7718,9 +7718,13 @@ multiclass sve_mem_ldor_si sz, string asm,
RegisterOperand listty,
(!cast(NAME) zprty:$Zt, PPR3bAny:$Pg,
GPR64sp:$Rn, simm4s32:$imm4), 0>;
// Base addressing mode
- def : Pat<(Ty (Ld1ro (PredTy PPR3bAny:$gp), GPR64sp:$base)),
-(!cast(NAME) PPR3bAny:$gp, GPR64sp:$base, (i64 0))>;
-
+ def : Pat<(Ty (Ld1ro (PredTy PPR3bAny:$Pg), GPR64sp:$base)),
+(!cast(NAME) PPR3bAny:$Pg, GPR64sp:$base, (i64 0))>;
+ let AddedComplexity = 2 in {
+// Reg + Imm addressing mode
+def : Pat<(Ty (Ld1ro (PredTy PPR3bAny:$Pg), (add GPR64:$base, (i64
simm4s32:$imm,
+ (!cast(NAME) $Pg, $base, simm4s32:$imm)>;
+ }
}
class sve_mem_ldor_ss sz, string asm, RegisterOperand VecList,
diff --git
a/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1ro-addressing-mode-reg-imm.ll
b/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1ro-addressing-mode-reg-imm.ll
new file mode 100644
index ..e7edfc9d6bdd
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1ro-addressing-mode-reg-imm.ll
@@ -0,0 +1,174 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+f64mm -asm-verbose=0 < %s
2>%t | FileCheck %s
+; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
+
+; WARN-NOT: warning
+
+;
+; LD1ROB
+;
+
+define @ld1rob_i8( %pg, i8* %a) nounwind {
+; CHECK-LABEL: ld1rob_i8:
+; CHECK-NEXT: ld1rob { z0.b }, p0/z, [x0, #32]
+; CHECK-NEXT: ret
+ %base = getelementptr i8, i8* %a, i64 32
+ %load = call @llvm.aarch64.sve.ld1ro.nxv16i8( %pg, i8* %base)
+ ret %load
+}
+
+;
+; LD1ROH
+;
+
+define @ld1roh_i16( %pg, i16* %a) nounwind
{
+; CHECK-LABEL: ld1roh_i16:
+; CHECK-NEXT: ld1roh { z0.h }, p0/z, [x0, #64]
+; CHECK-NEXT: ret
+ %base = getelementptr i16, i16* %a, i64 32
+ %load = call @llvm.aarch64.sve.ld1ro.nxv8i16( %pg, i16* %base)
+ ret %load
+}
+
+define @ld1roh_f16( %pg, half* %a)
nounwind {
+; CHECK-LABEL: ld1roh_f16:
+; CHECK-NEXT: ld1roh { z0.h }, p0/z, [x0, #64]
+; CHECK-NEXT: ret
+ %base = getelementptr half, half* %a, i64 32
+ %load = call @llvm.aarch64.sve.ld1ro.nxv8f16( %pg, half* %base)
+ ret %load
+}
+
+define @ld1roh_bf16( %pg, bfloat* %a)
nounwind #0 {
+; CHECK-LABEL: ld1roh_bf16:
+; CHECK-NEXT: ld1roh { z0.h }, p0/z, [x0, #64]
+; CHECK-NEXT: ret
+ %base = getel
[llvm-branch-commits] [llvm] 328269f - [AArch64][SVE] Fix PCS for functions taking/returning scalable types.
Author: Sander de Smalen
Date: 2020-07-31T17:27:46+02:00
New Revision: 328269f3834d793bd4a7287d4344aa266d6641b9
URL:
https://github.com/llvm/llvm-project/commit/328269f3834d793bd4a7287d4344aa266d6641b9
DIFF:
https://github.com/llvm/llvm-project/commit/328269f3834d793bd4a7287d4344aa266d6641b9.diff
LOG: [AArch64][SVE] Fix PCS for functions taking/returning scalable types.
The default calling convention needs to save/restore the SVE callee
saves according to the SVE PCS when the function takes or returns
scalable types, even when the `aarch64_sve_vector_pcs` CC is not
specified for the function.
Reviewers: efriedma, paulwalker-arm, david-arm, rengolin
Reviewed By: paulwalker-arm
Differential Revision: https://reviews.llvm.org/D84041
(cherry picked from commit 9bacf1588583014538a0217add18f370acb95788)
Added:
Modified:
llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
llvm/test/CodeGen/AArch64/sve-calling-convention.ll
llvm/test/CodeGen/AArch64/sve-trunc.ll
Removed:
diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
index 886158ca4490..de1ae4759210 100644
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -40,6 +40,14 @@ AArch64RegisterInfo::AArch64RegisterInfo(const Triple &TT)
AArch64_MC::initLLVMToCVRegMapping(this);
}
+static bool hasSVEArgsOrReturn(const MachineFunction *MF) {
+ const Function &F = MF->getFunction();
+ return isa(F.getReturnType()) ||
+ any_of(F.args(), [](const Argument &Arg) {
+ return isa(Arg.getType());
+ });
+}
+
const MCPhysReg *
AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
assert(MF && "Invalid MachineFunction pointer.");
@@ -75,6 +83,8 @@ AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction
*MF) const {
// This is for OSes other than Windows; Windows is a separate case further
// above.
return CSR_AArch64_AAPCS_X18_SaveList;
+ if (hasSVEArgsOrReturn(MF))
+return CSR_AArch64_SVE_AAPCS_SaveList;
return CSR_AArch64_AAPCS_SaveList;
}
diff --git a/llvm/test/CodeGen/AArch64/sve-calling-convention.ll
b/llvm/test/CodeGen/AArch64/sve-calling-convention.ll
index 767a3cd8acfe..f95e749ad5ee 100644
--- a/llvm/test/CodeGen/AArch64/sve-calling-convention.ll
+++ b/llvm/test/CodeGen/AArch64/sve-calling-convention.ll
@@ -1,4 +1,5 @@
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -stop-after=finalize-isel <
%s 2>%t | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -stop-after=prologepilog <
%s 2>%t | FileCheck %s --check-prefix=CHECKCSR
; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
; If this check fails please read test/CodeGen/AArch64/README for instructions
on how to resolve it.
@@ -123,3 +124,25 @@ define
@sve_signature_pred_caller( %arg1, @sve_signature_pred( %arg2,
%arg1)
ret %res
}
+
+; Test that functions returning or taking SVE arguments use the correct
+; callee-saved set when using the default C calling convention (as opposed
+; to aarch64_sve_vector_pcs)
+
+; CHECKCSR-LABEL: name: sve_signature_vec_ret_callee
+; CHECKCSR: callee-saved-register: '$z8'
+; CHECKCSR: callee-saved-register: '$p4'
+; CHECKCSR: RET_ReallyLR
+define @sve_signature_vec_ret_callee() nounwind {
+ call void asm sideeffect "nop", "~{z8},~{p4}"()
+ ret zeroinitializer
+}
+
+; CHECKCSR-LABEL: name: sve_signature_vec_arg_callee
+; CHECKCSR: callee-saved-register: '$z8'
+; CHECKCSR: callee-saved-register: '$p4'
+; CHECKCSR: RET_ReallyLR
+define void @sve_signature_vec_arg_callee( %v) nounwind {
+ call void asm sideeffect "nop", "~{z8},~{p4}"()
+ ret void
+}
diff --git a/llvm/test/CodeGen/AArch64/sve-trunc.ll
b/llvm/test/CodeGen/AArch64/sve-trunc.ll
index 3743301cfa9b..46d152bbf7ac 100644
--- a/llvm/test/CodeGen/AArch64/sve-trunc.ll
+++ b/llvm/test/CodeGen/AArch64/sve-trunc.ll
@@ -113,6 +113,12 @@ entry:
define @trunc_i64toi1_split3( %in) {
; CHECK-LABEL: trunc_i64toi1_split3:
; CHECK: // %bb.0: // %entry
+; CHECK-NEXT:str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:addvl sp, sp, #-1
+; CHECK-NEXT:str p4, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT:.cfi_def_cfa_offset 16
+; CHECK-NEXT:.cfi_offset p4, -16
+; CHECK-NEXT:.cfi_offset w29, -16
; CHECK-NEXT:ptrue p0.d
; CHECK-NEXT:and z7.d, z7.d, #0x1
; CHECK-NEXT:and z6.d, z6.d, #0x1
@@ -134,9 +140,12 @@ define @trunc_i64toi1_split3( %in) {
; CHECK-NEXT:cmpne p4.d, p0/z, z1.d, #0
; CHECK-NEXT:cmpne p0.d, p0/z, z0.d, #0
; CHECK-NEXT:uzp1 p0.s, p0.s, p4.s
+; CHECK-NEXT:ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload
; CHECK-NEXT:uzp1 p1.h, p3.h, p1.h
; CHECK-NEXT:uzp1 p0.h, p0.h, p2.h
; CHECK-NEXT:uzp1 p0.b, p0.b, p1.b
+; CHECK-NEXT:addvl sp, sp, #1
+; CHECK-NEXT:ldr x29
[llvm-branch-commits] [llvm] 13fb3d3 - [AArch64][SVE] Fix epilogue for SVE when the stack is realigned.
Author: Sander de Smalen
Date: 2020-07-31T17:27:48+02:00
New Revision: 13fb3d3007f0c2ddfc14a1819def5bec1533a0b0
URL:
https://github.com/llvm/llvm-project/commit/13fb3d3007f0c2ddfc14a1819def5bec1533a0b0
DIFF:
https://github.com/llvm/llvm-project/commit/13fb3d3007f0c2ddfc14a1819def5bec1533a0b0.diff
LOG: [AArch64][SVE] Fix epilogue for SVE when the stack is realigned.
While deallocating the stackframe, the offset used to reload the
callee-saved registers was not pointing to the SVE callee-saves,
but rather to the whole SVE area.
+--+
| GRP callee |
| saves|
+--+ <- FP
| SVE callee |
| saves|
+--+ <- Should restore SVE callee saves from here
| SVE Spills |
| and Locals |
+--+ <- instead of from here.
| |
: :
| |
+--+ <- SP
Reviewed By: paulwalker-arm
Differential Revision: https://reviews.llvm.org/D84539
(cherry picked from commit cda2eb3ad2bbe923e74d6eb083af196a0622d800)
Added:
Modified:
llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
llvm/test/CodeGen/AArch64/framelayout-sve.mir
Removed:
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index 1b49c692f293..4789a9f02937 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -1596,12 +1596,13 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction
&MF,
// Deallocate the SVE area.
if (SVEStackSize) {
if (AFI->isStackRealigned()) {
- if (AFI->getSVECalleeSavedStackSize())
-// Set SP to start of SVE area, from which the callee-save reloads
-// can be done. The code below will deallocate the stack space
+ if (int64_t CalleeSavedSize = AFI->getSVECalleeSavedStackSize())
+// Set SP to start of SVE callee-save area from which they can
+// be reloaded. The code below will deallocate the stack space
// space by moving FP -> SP.
emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::FP,
--SVEStackSize, TII, MachineInstr::FrameDestroy);
+{-CalleeSavedSize, MVT::nxv1i8}, TII,
+MachineInstr::FrameDestroy);
} else {
if (AFI->getSVECalleeSavedStackSize()) {
// Deallocate the non-SVE locals first before we can deallocate (and
diff --git a/llvm/test/CodeGen/AArch64/framelayout-sve.mir
b/llvm/test/CodeGen/AArch64/framelayout-sve.mir
index 75a65a6ad522..668b243dd79e 100644
--- a/llvm/test/CodeGen/AArch64/framelayout-sve.mir
+++ b/llvm/test/CodeGen/AArch64/framelayout-sve.mir
@@ -500,7 +500,7 @@ body: |
# CHECK-NEXT: $[[TMP:x[0-9]+]] = frame-setup SUBXri $sp, 16, 0
# CHECK-NEXT: $sp = ANDXri killed $[[TMP]]
-# CHECK: $sp = frame-destroy ADDVL_XXI $fp, -19
+# CHECK: $sp = frame-destroy ADDVL_XXI $fp, -18
# CHECK-NEXT: $p15 = frame-destroy LDR_PXI $sp, 4
# CHECK-NEXT: $p14 = frame-destroy LDR_PXI $sp, 5
# CHECK: $p5 = frame-destroy LDR_PXI $sp, 14
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] 16a68ab - [SVE] Don't consider scalable vector types in SLPVectorizerPass::vectorizeChainsInBlock
Author: David Sherwood
Date: 2020-07-31T17:27:50+02:00
New Revision: 16a68abcebc0c09edaa03bde11e0f452cfd4abdf
URL:
https://github.com/llvm/llvm-project/commit/16a68abcebc0c09edaa03bde11e0f452cfd4abdf
DIFF:
https://github.com/llvm/llvm-project/commit/16a68abcebc0c09edaa03bde11e0f452cfd4abdf.diff
LOG: [SVE] Don't consider scalable vector types in
SLPVectorizerPass::vectorizeChainsInBlock
In vectorizeChainsInBlock we try to collect chains of PHI nodes
that have the same element type, but the code is relying upon
the implicit conversion from TypeSize -> uint64_t. For now, I have
modified the code to ignore PHI nodes with scalable types.
Differential Revision: https://reviews.llvm.org/D83542
(cherry picked from commit 9ad7c980bb47edd7db8f8db828b487cc7dfc9921)
Added:
Modified:
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
llvm/test/Transforms/SLPVectorizer/AArch64/scalable-vector.ll
Removed:
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 5bc35aa4695f..f950d0d4eb2b 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -7397,8 +7397,17 @@ bool
SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
// Look for the next elements with the same type.
SmallVector::iterator SameTypeIt = IncIt;
Type *EltTy = (*IncIt)->getType();
- unsigned EltSize = EltTy->isSized() ? DL->getTypeSizeInBits(EltTy)
- : MaxVecRegSize;
+
+ assert(EltTy->isSized() &&
+ "Instructions should all be sized at this point");
+ TypeSize EltTS = DL->getTypeSizeInBits(EltTy);
+ if (EltTS.isScalable()) {
+// For now, just ignore vectorizing scalable types.
+++IncIt;
+continue;
+ }
+
+ unsigned EltSize = EltTS.getFixedSize();
unsigned MaxNumElts = MaxVecRegSize / EltSize;
if (MaxNumElts < 2) {
++IncIt;
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/scalable-vector.ll
b/llvm/test/Transforms/SLPVectorizer/AArch64/scalable-vector.ll
index 70ce0dc4d7ba..99c60912f9db 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/scalable-vector.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/scalable-vector.ll
@@ -1,5 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -slp-vectorizer -S | FileCheck %s
+; RUN: opt < %s -slp-vectorizer -S 2>%t | FileCheck %s
+; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
+
+; WARN-NOT: warning
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
target triple = "aarch64-unknown-linux-gnu"
@@ -21,5 +24,28 @@ define void @test() {
ret void
}
+define @scalable_phi( %a, i32 %b) {
+; CHECK-LABEL: @scalable_phi(
+; CHECK-NEXT: entry:
+; CHECK-NEXT:[[CMP:%.*]] = icmp eq i32 [[B:%.*]], 0
+; CHECK-NEXT:br i1 [[CMP]], label [[IF_THEN:%.*]], label [[END:%.*]]
+; CHECK: if.then:
+; CHECK-NEXT:br label [[END]]
+; CHECK: end:
+; CHECK-NEXT:[[RETVAL:%.*]] = phi [ [[A:%.*]],
[[ENTRY:%.*]] ], [ zeroinitializer, [[IF_THEN]] ]
+; CHECK-NEXT:ret [[RETVAL]]
+;
+entry:
+ %cmp = icmp eq i32 %b, 0
+ br i1 %cmp, label %if.then, label %end
+
+if.then:
+ br label %end
+
+end:
+ %retval = phi [ %a, %entry ], [ zeroinitializer, %if.then
]
+ ret %retval
+}
+
declare @llvm.masked.load.nxv16i8.p0nxv16i8(*, i32 immarg, , )
declare void @llvm.masked.store.nxv16i8.p0nxv16i8(, *, i32 immarg, )
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] 48eb1aa - [AArch64][SVE] Teach copyPhysReg to copy ZPR2/3/4.
Author: Eli Friedman
Date: 2020-07-31T17:27:47+02:00
New Revision: 48eb1aa387eb1d356632b82efaf6438d1fcb6640
URL:
https://github.com/llvm/llvm-project/commit/48eb1aa387eb1d356632b82efaf6438d1fcb6640
DIFF:
https://github.com/llvm/llvm-project/commit/48eb1aa387eb1d356632b82efaf6438d1fcb6640.diff
LOG: [AArch64][SVE] Teach copyPhysReg to copy ZPR2/3/4.
It's sort of tricky to hit this in practice, but not impossible. I have
a synthetic C testcase if anyone is interested.
The implementation is identical to the equivalent NEON register copies.
Differential Revision: https://reviews.llvm.org/D84373
(cherry picked from commit 993c1a3219a8ae69f1d700183bf174d75f3815d4)
Added:
llvm/test/CodeGen/AArch64/sve-copy-zprpair.mir
Modified:
llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
Removed:
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 5139ae5ccaf1..08f80c9aa361 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -2744,6 +2744,35 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock
&MBB,
return;
}
+ // Copy a Z register pair by copying the individual sub-registers.
+ if (AArch64::ZPR2RegClass.contains(DestReg) &&
+ AArch64::ZPR2RegClass.contains(SrcReg)) {
+static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1};
+copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
+ Indices);
+return;
+ }
+
+ // Copy a Z register triple by copying the individual sub-registers.
+ if (AArch64::ZPR3RegClass.contains(DestReg) &&
+ AArch64::ZPR3RegClass.contains(SrcReg)) {
+static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,
+ AArch64::zsub2};
+copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
+ Indices);
+return;
+ }
+
+ // Copy a Z register quad by copying the individual sub-registers.
+ if (AArch64::ZPR4RegClass.contains(DestReg) &&
+ AArch64::ZPR4RegClass.contains(SrcReg)) {
+static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,
+ AArch64::zsub2, AArch64::zsub3};
+copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
+ Indices);
+return;
+ }
+
if (AArch64::GPR64spRegClass.contains(DestReg) &&
(AArch64::GPR64spRegClass.contains(SrcReg) || SrcReg == AArch64::XZR)) {
if (DestReg == AArch64::SP || SrcReg == AArch64::SP) {
diff --git a/llvm/test/CodeGen/AArch64/sve-copy-zprpair.mir
b/llvm/test/CodeGen/AArch64/sve-copy-zprpair.mir
new file mode 100644
index ..83a0b5dd1c14
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-copy-zprpair.mir
@@ -0,0 +1,78 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -run-pass=postrapseudos
-simplify-mir -verify-machineinstrs %s -o - | FileCheck %s
+
+---
+name:copy_zpr2
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$z0_z1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0:
+liveins: $z0_z1
+; CHECK-LABEL: name: copy_zpr2
+; CHECK: liveins: $z0_z1
+; CHECK: $z2 = ORR_ZZZ $z1, $z1
+; CHECK: $z1 = ORR_ZZZ $z0, $z0
+; CHECK: $z0 = ORR_ZZZ $z1, $z1
+; CHECK: $z1 = ORR_ZZZ $z2, $z2
+; CHECK: RET_ReallyLR
+$z1_z2 = COPY $z0_z1
+$z0_z1 = COPY $z1_z2
+RET_ReallyLR
+
+...
+---
+name:copy_zpr3
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$z0_z1_z2' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0:
+liveins: $z0_z1_z2
+; CHECK-LABEL: name: copy_zpr3
+; CHECK: liveins: $z0_z1_z2
+; CHECK: $z3 = ORR_ZZZ $z2, $z2
+; CHECK: $z2 = ORR_ZZZ $z1, $z1
+; CHECK: $z1 = ORR_ZZZ $z0, $z0
+; CHECK: $z0 = ORR_ZZZ $z1, $z1
+; CHECK: $z1 = ORR_ZZZ $z2, $z2
+; CHECK: $z2 = ORR_ZZZ $z3, $z3
+; CHECK: RET_ReallyLR
+$z1_z2_z3 = COPY $z0_z1_z2
+$z0_z1_z2 = COPY $z1_z2_z3
+RET_ReallyLR
+
+...
+---
+name:copy_zpr4
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$z0_z1_z2_z3' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0:
+liveins: $z0_z1_z2_z3
+; CHECK-LABEL: name: copy_zpr4
+; CHECK: liveins: $z0_z1_z2_z3
+; CHECK: $z4 = ORR_ZZZ $z3, $z3
+; CHECK: $z3 = ORR_ZZZ $z2, $z2
+; CHECK: $z2 = ORR_ZZZ $z1, $z1
+; CHECK: $z1 = ORR_ZZZ $z0, $z0
+; CHECK: $z0 = ORR_ZZZ $z1, $z1
+; CHECK: $z1 = ORR_ZZZ $z2, $z2
+; CHECK: $z2 = ORR_ZZZ $z3, $z3
+; CHECK: $z3 = ORR_ZZZ $z4, $z4
+; CHECK: RET_ReallyLR
+$z1_z2_z3_z4 = COPY $z0_z1_z2_z3
+$z0_z1_z2_z3 = COPY $z1_z2_z3_z4
+RET_ReallyLR
+
+...
__
[llvm-branch-commits] [llvm] 967b84c - [AArch64][SVE] Don't support fixedStack for SVE objects.
Author: Sander de Smalen
Date: 2020-07-31T17:27:47+02:00
New Revision: 967b84c7a7e2a39d01ab4266bf5eac8c2de98ce5
URL:
https://github.com/llvm/llvm-project/commit/967b84c7a7e2a39d01ab4266bf5eac8c2de98ce5
DIFF:
https://github.com/llvm/llvm-project/commit/967b84c7a7e2a39d01ab4266bf5eac8c2de98ce5.diff
LOG: [AArch64][SVE] Don't support fixedStack for SVE objects.
Fixed stack objects are preallocated and defined to be allocated before
any of the regular stack objects. These are normally used to model stack
arguments.
The AAPCS does not support passing SVE registers on the stack by value
(only by reference). The current layout also doesn't place them before
all stack objects, but rather before all SVE objects. Removing this
simplifies the code that emits the allocation/deallocation
around callee-saved registers (D84042).
This patch also removes all uses of fixedStack from from
framelayout-sve.mir, where this was used purely for testing purposes.
Reviewers: paulwalker-arm, efriedma, rengolin
Reviewed By: paulwalker-arm
Differential Revision: https://reviews.llvm.org/D84538
(cherry picked from commit 54492a5843a34684ce21ae201dd8ca3e509288fd)
Added:
Modified:
llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
llvm/test/CodeGen/AArch64/framelayout-sve.mir
Removed:
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index efa3fd5ca9ce..cc563dd70632 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -2595,20 +2595,21 @@ static int64_t
determineSVEStackObjectOffsets(MachineFrameInfo &MFI,
int &MinCSFrameIndex,
int &MaxCSFrameIndex,
bool AssignOffsets) {
+#ifndef NDEBUG
// First process all fixed stack objects.
- int64_t Offset = 0;
for (int I = MFI.getObjectIndexBegin(); I != 0; ++I)
-if (MFI.getStackID(I) == TargetStackID::SVEVector) {
- int64_t FixedOffset = -MFI.getObjectOffset(I);
- if (FixedOffset > Offset)
-Offset = FixedOffset;
-}
+assert(MFI.getStackID(I) != TargetStackID::SVEVector &&
+ "SVE vectors should never be passed on the stack by value, only by "
+ "reference.");
+#endif
auto Assign = [&MFI](int FI, int64_t Offset) {
LLVM_DEBUG(dbgs() << "alloc FI(" << FI << ") at SP[" << Offset << "]\n");
MFI.setObjectOffset(FI, Offset);
};
+ int64_t Offset = 0;
+
// Then process all callee saved slots.
if (getSVECalleeSaveSlotRange(MFI, MinCSFrameIndex, MaxCSFrameIndex)) {
// Make sure to align the last callee save slot.
diff --git a/llvm/test/CodeGen/AArch64/framelayout-sve.mir
b/llvm/test/CodeGen/AArch64/framelayout-sve.mir
index 7903df64863b..575c839fbd15 100644
--- a/llvm/test/CodeGen/AArch64/framelayout-sve.mir
+++ b/llvm/test/CodeGen/AArch64/framelayout-sve.mir
@@ -41,10 +41,10 @@
# +--+
# |scratchreg| // x29 is used as scratch reg.
# +--+
-# | %fixed- | // scalable SVE object of n * 18 bytes, aligned to 16 bytes,
-# | stack.0 | // to be materialized with 2*ADDVL (<=> 2 * n * 16bytes)
+# | %stack.0 | // scalable SVE object of n * 18 bytes, aligned to 16 bytes,
+# | | // to be materialized with 2*ADDVL (<=> 2 * n * 16bytes)
# +--+
-# | %stack.0 | // not scalable
+# | %stack.1 | // not scalable
# +--+ <- SP
# CHECK-LABEL: name: test_allocate_sve
@@ -60,10 +60,9 @@
# CHECK-NEXT: $sp, $[[SCRATCH]] = frame-destroy LDRXpost $sp, 16
# CHECK-NEXT: RET_ReallyLR
name:test_allocate_sve
-fixedStack:
- - { id: 0, stack-id: sve-vec, size: 18, alignment: 2, offset: -18 }
stack:
- - { id: 0, stack-id: default, size: 16, alignment: 8 }
+ - { id: 0, stack-id: sve-vec, size: 18, alignment: 2 }
+ - { id: 1, stack-id: default, size: 16, alignment: 8 }
body: |
bb.0.entry:
RET_ReallyLR
@@ -73,10 +72,9 @@ body: |
# | x20, x21 | // callee saves
# |scratchreg| // x29 is used as scratch reg.
# +--+
-# | %fixed- | // scalable objects
-# | stack.0 |
+# | %stack.0 | // scalable objects
# +--+
-# | %stack.0 | // not scalable
+# | %stack.1 | // not scalable
# +--+ <- SP
# CHECK-LABEL: name: test_allocate_sve_gpr_callee_saves
@@ -95,10 +93,9 @@ body: |
# CHECK-NEXT: $sp, $[[SCRATCH]] = frame-destroy LDRXpost $sp, 32
# CHECK-NEXT: RET_ReallyLR
name:test_allocate_sve_gpr_callee_saves
-fixedStack:
- - { id: 0, stack-id: sve-vec, size: 18, alignment: 2, offset: -18 }
stack:
- - { id: 0, stack-id: default, size: 16, alignment: 8 }
+ - { id: 0, stack-id: sve-vec, size: 18, alignment: 2 }
+ - { id: 1, stack-id: default, size: 16, alignment: 8 }
body: |
bb.0.entry:
$x20 = IMPLICI
[llvm-branch-commits] [llvm] 5583444 - [SVE][CodeGen] At -O0 fallback to DAG ISel when translating alloca with scalable types
Author: David Sherwood
Date: 2020-07-31T17:27:51+02:00
New Revision: 5583444d188015fbcf97d16c946b2617af81698a
URL:
https://github.com/llvm/llvm-project/commit/5583444d188015fbcf97d16c946b2617af81698a
DIFF:
https://github.com/llvm/llvm-project/commit/5583444d188015fbcf97d16c946b2617af81698a.diff
LOG: [SVE][CodeGen] At -O0 fallback to DAG ISel when translating alloca with
scalable types
When building code at -O0 We weren't falling back to DAG ISel correctly
when encountering alloca instructions with scalable vector types. This
is because the alloca has no operands that are scalable. I've fixed this by
adding a check in AArch64ISelLowering::fallBackToDAGISel for alloca
instructions with scalable types.
Differential Revision: https://reviews.llvm.org/D84746
(cherry picked from commit 23ad660b5d34930b2b5362f1bba63daee78f6aa4)
Added:
Modified:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll
Removed:
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index d9951b7b8c5b..2c992c07fad9 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -14922,6 +14922,11 @@ bool AArch64TargetLowering::fallBackToDAGISel(const
Instruction &Inst) const {
if (isa(Inst.getOperand(i)->getType()))
return true;
+ if (const AllocaInst *AI = dyn_cast(&Inst)) {
+if (isa(AI->getAllocatedType()))
+ return true;
+ }
+
return false;
}
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll
b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll
index cf596c98d462..ea382af14933 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll
@@ -244,6 +244,14 @@ define i8 @scalable_call(i8* %addr) #1 {
ret i8 %res
}
+; FALLBACK-WITH-REPORT-ERR: remark: :0:0: unable to translate
instruction{{.*}}scalable_alloca
+; FALLBACK-WITH-REPORT-OUT-LABEL: scalable_alloca
+define void @scalable_alloca() #1 {
+ %local0 = alloca
+ load volatile , * %local0
+ ret void
+}
+
; FALLBACK-WITH-REPORT-ERR: remark: :0:0: unable to translate
instruction{{.*}}asm_indirect_output
; FALLBACK-WITH-REPORT-OUT-LABEL: asm_indirect_output
define void @asm_indirect_output() {
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] bec3064 - [SVE][CodeGen] Add simple integer add tests for SVE tuple types
Author: David Sherwood
Date: 2020-07-31T17:27:50+02:00
New Revision: bec306442de89c71c2268e7e2629b4d454895a56
URL:
https://github.com/llvm/llvm-project/commit/bec306442de89c71c2268e7e2629b4d454895a56
DIFF:
https://github.com/llvm/llvm-project/commit/bec306442de89c71c2268e7e2629b4d454895a56.diff
LOG: [SVE][CodeGen] Add simple integer add tests for SVE tuple types
I have added tests to:
CodeGen/AArch64/sve-intrinsics-int-arith.ll
for doing simple integer add operations on tuple types. Since these
tests introduced new warnings due to incorrect use of
getVectorNumElements() I have also fixed up these warnings in the
same patch. These fixes are:
1. In narrowExtractedVectorBinOp I have changed the code to bail out
early for scalable vector types, since we've not yet hit a case that
proves the optimisations are profitable for scalable vectors.
2. In DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS I have replaced
calls to getVectorNumElements with getVectorMinNumElements in cases
that work with scalable vectors. For the other cases I have added
asserts that the vector is not scalable because we should not be
using shuffle vectors and build vectors in such cases.
Differential revision: https://reviews.llvm.org/D84016
(cherry picked from commit 207877175944656bd9b52d36f391a092854572be)
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith.ll
Removed:
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index a026d3960026..ec384d2a7c56 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -19190,7 +19190,10 @@ static SDValue narrowExtractedVectorBinOp(SDNode
*Extract, SelectionDAG &DAG) {
// The binop must be a vector type, so we can extract some fraction of it.
EVT WideBVT = BinOp.getValueType();
- if (!WideBVT.isVector())
+ // The optimisations below currently assume we are dealing with fixed length
+ // vectors. It is possible to add support for scalable vectors, but at the
+ // moment we've done no analysis to prove whether they are profitable or not.
+ if (!WideBVT.isFixedLengthVector())
return SDValue();
EVT VT = Extract->getValueType(0);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 6963de2e5029..c81d03cac81b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -3610,16 +3610,15 @@ SDValue
DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) {
EVT InVT = N->getOperand(0).getValueType();
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),
N->getValueType(0));
SDLoc dl(N);
- unsigned WidenNumElts = WidenVT.getVectorNumElements();
- unsigned NumInElts = InVT.getVectorNumElements();
unsigned NumOperands = N->getNumOperands();
bool InputWidened = false; // Indicates we need to widen the input.
if (getTypeAction(InVT) != TargetLowering::TypeWidenVector) {
-if (WidenVT.getVectorNumElements() % InVT.getVectorNumElements() == 0) {
+unsigned WidenNumElts = WidenVT.getVectorMinNumElements();
+unsigned NumInElts = InVT.getVectorMinNumElements();
+if (WidenNumElts % NumInElts == 0) {
// Add undef vectors to widen to correct length.
- unsigned NumConcat = WidenVT.getVectorNumElements() /
- InVT.getVectorNumElements();
+ unsigned NumConcat = WidenNumElts / NumInElts;
SDValue UndefVal = DAG.getUNDEF(InVT);
SmallVector Ops(NumConcat);
for (unsigned i=0; i < NumOperands; ++i)
@@ -3643,6 +3642,11 @@ SDValue
DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) {
return GetWidenedVector(N->getOperand(0));
if (NumOperands == 2) {
+assert(!WidenVT.isScalableVector() &&
+ "Cannot use vector shuffles to widen CONCAT_VECTOR result");
+unsigned WidenNumElts = WidenVT.getVectorNumElements();
+unsigned NumInElts = InVT.getVectorNumElements();
+
// Replace concat of two operands with a shuffle.
SmallVector MaskOps(WidenNumElts, -1);
for (unsigned i = 0; i < NumInElts; ++i) {
@@ -3657,6 +3661,11 @@ SDValue
DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) {
}
}
+ assert(!WidenVT.isScalableVector() &&
+ "Cannot use build vectors to widen CONCAT_VECTOR result");
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+ unsigned NumInElts = InVT.getVectorNumElements();
+
// Fall back to use extracts and build vector.
EVT EltVT = WidenVT.getVectorElementType();
SmallVector Ops(WidenNumElts);
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith.ll
b/llvm/test/CodeGen/AA
[llvm-branch-commits] [llvm] 07a7044 - [SVE] Add checks for no warnings in CodeGen/AArch64/sve-sext-zext.ll
Author: David Sherwood
Date: 2020-07-31T17:27:49+02:00
New Revision: 07a7044b805a422469041928c7c6ee55bcdda2a4
URL:
https://github.com/llvm/llvm-project/commit/07a7044b805a422469041928c7c6ee55bcdda2a4
DIFF:
https://github.com/llvm/llvm-project/commit/07a7044b805a422469041928c7c6ee55bcdda2a4.diff
LOG: [SVE] Add checks for no warnings in CodeGen/AArch64/sve-sext-zext.ll
Previous patches fixed up all the warnings in this test:
llvm/test/CodeGen/AArch64/sve-sext-zext.ll
and this change simply checks that no new warnings are added in future.
Differential revision: https://reviews.llvm.org/D83205
(cherry picked from commit f43b5c7a76ab83dcc80e6769d41d5c4b761312b1)
Added:
Modified:
llvm/test/CodeGen/AArch64/sve-sext-zext.ll
Removed:
diff --git a/llvm/test/CodeGen/AArch64/sve-sext-zext.ll
b/llvm/test/CodeGen/AArch64/sve-sext-zext.ll
index 24cf433306bb..1275811d175e 100644
--- a/llvm/test/CodeGen/AArch64/sve-sext-zext.ll
+++ b/llvm/test/CodeGen/AArch64/sve-sext-zext.ll
@@ -1,5 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
+; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
+
+; WARN-NOT: warning
define @sext_i1_i8( %a) {
; CHECK-LABEL: sext_i1_i8:
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] 4fd4ec6 - [AArch64][SVE] Add support for trunc to .
Author: Eli Friedman
Date: 2020-07-31T17:27:45+02:00
New Revision: 4fd4ec63813fd5b22d81adb6e201cb16ccf72b69
URL:
https://github.com/llvm/llvm-project/commit/4fd4ec63813fd5b22d81adb6e201cb16ccf72b69
DIFF:
https://github.com/llvm/llvm-project/commit/4fd4ec63813fd5b22d81adb6e201cb16ccf72b69.diff
LOG: [AArch64][SVE] Add support for trunc to .
This isn't a natively supported operation, so convert it to a
mask+compare.
In addition to the operation itself, fix up some surrounding stuff to
make the testcase work: we need concat_vectors on i1 vectors, we need
legalization of i1 vector truncates, and we need to fix up all the
relevant uses of getVectorNumElements().
Differential Revision: https://reviews.llvm.org/D83811
(cherry picked from commit b8f765a1e17f8d212ab1cd8f630d35adc7495556)
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
llvm/test/CodeGen/AArch64/sve-trunc.ll
Removed:
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index f14b3dba4f31..a026d3960026 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -11372,9 +11372,10 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
// Stop if more than one members are non-undef.
if (NumDefs > 1)
break;
+
VTs.push_back(EVT::getVectorVT(*DAG.getContext(),
VT.getVectorElementType(),
- X.getValueType().getVectorNumElements()));
+
X.getValueType().getVectorElementCount()));
}
if (NumDefs == 0)
@@ -18795,6 +18796,11 @@ static SDValue combineConcatVectorOfScalars(SDNode *N,
SelectionDAG &DAG) {
static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
EVT VT = N->getValueType(0);
EVT OpVT = N->getOperand(0).getValueType();
+
+ // We currently can't generate an appropriate shuffle for a scalable vector.
+ if (VT.isScalableVector())
+return SDValue();
+
int NumElts = VT.getVectorNumElements();
int NumOpElts = OpVT.getVectorNumElements();
@@ -19055,11 +19061,14 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
return V;
// Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
- // nodes often generate nop CONCAT_VECTOR nodes.
- // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that
- // place the incoming vectors at the exact same location.
+ // nodes often generate nop CONCAT_VECTOR nodes. Scan the CONCAT_VECTOR
+ // operands and look for a CONCAT operations that place the incoming vectors
+ // at the exact same location.
+ //
+ // For scalable vectors, EXTRACT_SUBVECTOR indexes are implicitly scaled.
SDValue SingleSource = SDValue();
- unsigned PartNumElem =
N->getOperand(0).getValueType().getVectorNumElements();
+ unsigned PartNumElem =
+ N->getOperand(0).getValueType().getVectorMinNumElements();
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
SDValue Op = N->getOperand(i);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index b2299931021c..1394f084c6dc 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -2151,7 +2151,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_UnaryOp(SDNode *N) {
EVT InVT = Lo.getValueType();
EVT OutVT = EVT::getVectorVT(*DAG.getContext(), ResVT.getVectorElementType(),
- InVT.getVectorNumElements());
+ InVT.getVectorElementCount());
if (N->isStrictFPOpcode()) {
Lo = DAG.getNode(N->getOpcode(), dl, { OutVT, MVT::Other },
@@ -2559,13 +2559,9 @@ SDValue
DAGTypeLegalizer::SplitVecOp_TruncateHelper(SDNode *N) {
SDValue InVec = N->getOperand(OpNo);
EVT InVT = InVec->getValueType(0);
EVT OutVT = N->getValueType(0);
- unsigned NumElements = OutVT.getVectorNumElements();
+ ElementCount NumElements = OutVT.getVectorElementCount();
bool IsFloat = OutVT.isFloatingPoint();
- // Widening should have already made sure this is a power-two vector
- // if we're trying to split it at all. assert() that's true, just in case.
- assert(!(NumElements & 1) && "Splitting vector, but not in half!");
-
unsigned InElementSize = InVT.getScalarSizeInBits();
unsigned OutElementSize = OutVT.getScalarSizeInBits();
@@ -2595,6 +2591,9 @@ SDValue
DAGTypeLegalizer::SplitVecOp_TruncateHelper(SDNode *N) {
GetSplitVector(InVec, InLoVec, InHiVec);
// Truncate them to 1/2 the element size.
+ //
+ // This assumes the number of elements is a
[llvm-branch-commits] [lld] 821e924 - [LLD] [Mingw] Don't export symbols from profile generate
Author: Peiyuan Song
Date: 2020-07-31T19:58:15+02:00
New Revision: 821e924f0d3e0fc9b5991a126fd094eec12bd535
URL:
https://github.com/llvm/llvm-project/commit/821e924f0d3e0fc9b5991a126fd094eec12bd535
DIFF:
https://github.com/llvm/llvm-project/commit/821e924f0d3e0fc9b5991a126fd094eec12bd535.diff
LOG: [LLD] [Mingw] Don't export symbols from profile generate
Differential Revision: https://reviews.llvm.org/D84756
(cherry picked from commit da324f9904634855a0a3549284758cb079723cdf)
Added:
Modified:
lld/COFF/MinGW.cpp
Removed:
diff --git a/lld/COFF/MinGW.cpp b/lld/COFF/MinGW.cpp
index bded985f04d0..e24cdca6ee34 100644
--- a/lld/COFF/MinGW.cpp
+++ b/lld/COFF/MinGW.cpp
@@ -34,6 +34,11 @@ AutoExporter::AutoExporter() {
"libclang_rt.builtins-arm",
"libclang_rt.builtins-i386",
"libclang_rt.builtins-x86_64",
+ "libclang_rt.profile",
+ "libclang_rt.profile-aarch64",
+ "libclang_rt.profile-arm",
+ "libclang_rt.profile-i386",
+ "libclang_rt.profile-x86_64",
"libc++",
"libc++abi",
"libunwind",
@@ -57,6 +62,10 @@ AutoExporter::AutoExporter() {
"__builtin_",
// Artificial symbols such as .refptr
".",
+ // profile generate symbols
+ "__profc_",
+ "__profd_",
+ "__profvp_",
};
excludeSymbolSuffixes = {
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [openmp] df10da2 - [OpenMP] Use weak attribute in interface only for static library
Author: Joachim Protze
Date: 2020-07-31T20:10:34+02:00
New Revision: df10da2ff0bd378917665fab295f025295413271
URL:
https://github.com/llvm/llvm-project/commit/df10da2ff0bd378917665fab295f025295413271
DIFF:
https://github.com/llvm/llvm-project/commit/df10da2ff0bd378917665fab295f025295413271.diff
LOG: [OpenMP] Use weak attribute in interface only for static library
This is to address the issue reported at:
https://bugs.llvm.org/show_bug.cgi?id=46863
Since weak is meaningless for a shared library interface function, this patch
disables the attribute, when the OpenMP library is built as shared library.
ompt_start_tool is not an interface function, but a internally called function
possibly implemented by an OMPT tool.
This function needs to be weak if possible to allow overwriting ompt_start_tool
with a function implementation built into the application.
Differential Revision: https://reviews.llvm.org/D84871
(cherry picked from commit 03116a9f8c2fc98577e153083aaf9b6a701ab8f9)
Added:
Modified:
openmp/runtime/src/kmp_ftn_entry.h
openmp/runtime/src/kmp_os.h
openmp/runtime/src/ompt-specific.cpp
Removed:
diff --git a/openmp/runtime/src/kmp_ftn_entry.h
b/openmp/runtime/src/kmp_ftn_entry.h
index ab57907e088e..b4b0dea0d1af 100644
--- a/openmp/runtime/src/kmp_ftn_entry.h
+++ b/openmp/runtime/src/kmp_ftn_entry.h
@@ -939,7 +939,7 @@ void FTN_STDCALL
KMP_EXPAND_NAME(FTN_SET_DEFAULT_DEVICE)(int KMP_DEREF arg) {
// Get number of NON-HOST devices.
// libomptarget, if loaded, provides this function in api.cpp.
-int FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_NUM_DEVICES)(void) KMP_WEAK_ATTRIBUTE;
+int FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_NUM_DEVICES)(void)
KMP_WEAK_ATTRIBUTE_EXTERNAL;
int FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_NUM_DEVICES)(void) {
#if KMP_MIC || KMP_OS_DARWIN || KMP_OS_WINDOWS || defined(KMP_STUB)
return 0;
@@ -957,13 +957,13 @@ int FTN_STDCALL
KMP_EXPAND_NAME(FTN_GET_NUM_DEVICES)(void) {
// This function always returns true when called on host device.
// Compiler/libomptarget should handle when it is called inside target region.
-int FTN_STDCALL KMP_EXPAND_NAME(FTN_IS_INITIAL_DEVICE)(void)
KMP_WEAK_ATTRIBUTE;
+int FTN_STDCALL KMP_EXPAND_NAME(FTN_IS_INITIAL_DEVICE)(void)
KMP_WEAK_ATTRIBUTE_EXTERNAL;
int FTN_STDCALL KMP_EXPAND_NAME(FTN_IS_INITIAL_DEVICE)(void) {
return 1; // This is the host
}
// libomptarget, if loaded, provides this function
-int FTN_STDCALL FTN_GET_INITIAL_DEVICE(void) KMP_WEAK_ATTRIBUTE;
+int FTN_STDCALL FTN_GET_INITIAL_DEVICE(void) KMP_WEAK_ATTRIBUTE_EXTERNAL;
int FTN_STDCALL FTN_GET_INITIAL_DEVICE(void) {
#if KMP_MIC || KMP_OS_DARWIN || KMP_OS_WINDOWS || defined(KMP_STUB)
return KMP_HOST_DEVICE;
@@ -1318,7 +1318,7 @@ int FTN_STDCALL
KMP_EXPAND_NAME(FTN_GET_MAX_TASK_PRIORITY)(void) {
// This function will be defined in libomptarget. When libomptarget is not
// loaded, we assume we are on the host and return KMP_HOST_DEVICE.
// Compiler/libomptarget will handle this if called inside target.
-int FTN_STDCALL FTN_GET_DEVICE_NUM(void) KMP_WEAK_ATTRIBUTE;
+int FTN_STDCALL FTN_GET_DEVICE_NUM(void) KMP_WEAK_ATTRIBUTE_EXTERNAL;
int FTN_STDCALL FTN_GET_DEVICE_NUM(void) { return KMP_HOST_DEVICE; }
// Compiler will ensure that this is only called from host in sequential region
diff --git a/openmp/runtime/src/kmp_os.h b/openmp/runtime/src/kmp_os.h
index bfe7765b2a96..d1511904e94b 100644
--- a/openmp/runtime/src/kmp_os.h
+++ b/openmp/runtime/src/kmp_os.h
@@ -338,10 +338,16 @@ extern "C" {
#define KMP_ALIAS(alias_of) __attribute__((alias(alias_of)))
#endif
+#if KMP_HAVE_WEAK_ATTRIBUTE && !KMP_DYNAMIC_LIB
+#define KMP_WEAK_ATTRIBUTE_EXTERNAL __attribute__((weak))
+#else
+#define KMP_WEAK_ATTRIBUTE_EXTERNAL /* Nothing */
+#endif
+
#if KMP_HAVE_WEAK_ATTRIBUTE
-#define KMP_WEAK_ATTRIBUTE __attribute__((weak))
+#define KMP_WEAK_ATTRIBUTE_INTERNAL __attribute__((weak))
#else
-#define KMP_WEAK_ATTRIBUTE /* Nothing */
+#define KMP_WEAK_ATTRIBUTE_INTERNAL /* Nothing */
#endif
// Define KMP_VERSION_SYMBOL and KMP_EXPAND_NAME
diff --git a/openmp/runtime/src/ompt-specific.cpp
b/openmp/runtime/src/ompt-specific.cpp
index a7288f08a661..9be699110fc6 100644
--- a/openmp/runtime/src/ompt-specific.cpp
+++ b/openmp/runtime/src/ompt-specific.cpp
@@ -27,7 +27,7 @@
#define THREAD_LOCAL __thread
#endif
-#define OMPT_WEAK_ATTRIBUTE KMP_WEAK_ATTRIBUTE
+#define OMPT_WEAK_ATTRIBUTE KMP_WEAK_ATTRIBUTE_INTERNAL
//**
// macros
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] 903c872 - [analyzer] Fix out-of-tree only clang build by not relaying on private header
Author: Balazs Benics
Date: 2020-07-31T20:31:44+02:00
New Revision: 903c872b169dc88f434cf84c0aee32e429e1cc56
URL:
https://github.com/llvm/llvm-project/commit/903c872b169dc88f434cf84c0aee32e429e1cc56
DIFF:
https://github.com/llvm/llvm-project/commit/903c872b169dc88f434cf84c0aee32e429e1cc56.diff
LOG: [analyzer] Fix out-of-tree only clang build by not relaying on private
header
It turned out that the D78704 included a private LLVM header, which is excluded
from the LLVM install target.
I'm substituting that `#include` with the public one by moving the necessary
`#define` into that. There was a discussion about this at D78704 and on the
cfe-dev mailing list.
I'm also placing a note to remind others of this pitfall.
Reviewed By: mgorny
Differential Revision: https://reviews.llvm.org/D84929
(cherry picked from commit 63d3aeb529a7b0fb95c2092ca38ad21c1f5cfd74)
Added:
Modified:
clang/unittests/StaticAnalyzer/FalsePositiveRefutationBRVisitorTest.cpp
llvm/include/llvm/Config/config.h.cmake
llvm/include/llvm/Config/llvm-config.h.cmake
Removed:
diff --git
a/clang/unittests/StaticAnalyzer/FalsePositiveRefutationBRVisitorTest.cpp
b/clang/unittests/StaticAnalyzer/FalsePositiveRefutationBRVisitorTest.cpp
index 7c151c182113..e67dcacca0a9 100644
--- a/clang/unittests/StaticAnalyzer/FalsePositiveRefutationBRVisitorTest.cpp
+++ b/clang/unittests/StaticAnalyzer/FalsePositiveRefutationBRVisitorTest.cpp
@@ -16,7 +16,7 @@
#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
#include "clang/StaticAnalyzer/Frontend/AnalysisConsumer.h"
#include "clang/StaticAnalyzer/Frontend/CheckerRegistry.h"
-#include "llvm/Config/config.h"
+#include "llvm/Config/llvm-config.h"
#include "gtest/gtest.h"
// FIXME: Use GTEST_SKIP() instead if GTest is updated to version 1.10.0
diff --git a/llvm/include/llvm/Config/config.h.cmake
b/llvm/include/llvm/Config/config.h.cmake
index 290f74bd02d2..9a682481ccaf 100644
--- a/llvm/include/llvm/Config/config.h.cmake
+++ b/llvm/include/llvm/Config/config.h.cmake
@@ -1,6 +1,9 @@
#ifndef CONFIG_H
#define CONFIG_H
+// Include this header only under the llvm source tree.
+// This is a private header.
+
/* Exported configuration */
#include "llvm/Config/llvm-config.h"
@@ -338,9 +341,6 @@
/* Whether GlobalISel rule coverage is being collected */
#cmakedefine01 LLVM_GISEL_COV_ENABLED
-/* Define if we have z3 and want to build it */
-#cmakedefine LLVM_WITH_Z3 ${LLVM_WITH_Z3}
-
/* Define to the default GlobalISel coverage file prefix */
#cmakedefine LLVM_GISEL_COV_PREFIX "${LLVM_GISEL_COV_PREFIX}"
diff --git a/llvm/include/llvm/Config/llvm-config.h.cmake
b/llvm/include/llvm/Config/llvm-config.h.cmake
index 82b682ddb3dc..c1556e61f040 100644
--- a/llvm/include/llvm/Config/llvm-config.h.cmake
+++ b/llvm/include/llvm/Config/llvm-config.h.cmake
@@ -79,6 +79,9 @@
*/
#cmakedefine01 LLVM_FORCE_ENABLE_STATS
+/* Define if we have z3 and want to build it */
+#cmakedefine LLVM_WITH_Z3 ${LLVM_WITH_Z3}
+
/* Define if LLVM was built with a dependency to the libtensorflow dynamic
library */
#cmakedefine LLVM_HAVE_TF_API
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
