[llvm-branch-commits] [llvm] 3122c66 - [AArch64][SVE] Remove chains of unnecessary SVE reinterpret intrinsics
Author: Joe Ellis Date: 2021-01-13T09:44:09Z New Revision: 3122c66aee7b709046753873c4e94db73742b3de URL: https://github.com/llvm/llvm-project/commit/3122c66aee7b709046753873c4e94db73742b3de DIFF: https://github.com/llvm/llvm-project/commit/3122c66aee7b709046753873c4e94db73742b3de.diff LOG: [AArch64][SVE] Remove chains of unnecessary SVE reinterpret intrinsics This commit extends SVEIntrinsicOpts::optimizeConvertFromSVBool to identify and remove longer chains of redundant SVE reintepret intrinsics. For example, the following chain of redundant SVE reinterprets is now recognised as redundant: %a = %1 = @llvm.aarch64.sve.convert.to.svbool( %a) %2 = @llvm.aarch64.sve.convert.from.svbool( %1) %3 = @llvm.aarch64.sve.convert.to.svbool( %2) %4 = @llvm.aarch64.sve.convert.from.svbool( %3) %5 = @llvm.aarch64.sve.convert.to.svbool( %4) %6 = @llvm.aarch64.sve.convert.from.svbool( %5) ret %6 and will be replaced with: ret %a Eliminating these can sometimes mean emitting fewer unnecessary loads/stores when lowering to assembly. Differential Revision: https://reviews.llvm.org/D94074 Added: Modified: llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp llvm/test/CodeGen/AArch64/sve-intrinsic-opts-reinterpret.ll Removed: diff --git a/llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp b/llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp index 8e8b12c07bbf..9911f33371c6 100644 --- a/llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp +++ b/llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp @@ -177,22 +177,50 @@ bool SVEIntrinsicOpts::optimizeConvertFromSVBool(IntrinsicInst *I) { if (isa(I->getArgOperand(0))) return processPhiNode(I); - // If we have a reinterpret intrinsic I of type A which is converting from - // another reinterpret Y of type B, and the source type of Y is A, then we can - // elide away both reinterprets if there are no other users of Y. - auto *Y = isReinterpretToSVBool(I->getArgOperand(0)); - if (!Y) -return false; + SmallVector CandidatesForRemoval; + Value *Cursor = I->getOperand(0), *EarliestReplacement = nullptr; + + const auto *IVTy = cast(I->getType()); + + // Walk the chain of conversions. + while (Cursor) { +// If the type of the cursor has fewer lanes than the final result, zeroing +// must take place, which breaks the equivalence chain. +const auto *CursorVTy = cast(Cursor->getType()); +if (CursorVTy->getElementCount().getKnownMinValue() < +IVTy->getElementCount().getKnownMinValue()) + break; + +// If the cursor has the same type as I, it is a viable replacement. +if (Cursor->getType() == IVTy) + EarliestReplacement = Cursor; - Value *SourceVal = Y->getArgOperand(0); - if (I->getType() != SourceVal->getType()) +auto *IntrinsicCursor = dyn_cast(Cursor); + +// If this is not an SVE conversion intrinsic, this is the end of the chain. +if (!IntrinsicCursor || !(IntrinsicCursor->getIntrinsicID() == + Intrinsic::aarch64_sve_convert_to_svbool || + IntrinsicCursor->getIntrinsicID() == + Intrinsic::aarch64_sve_convert_from_svbool)) + break; + +CandidatesForRemoval.insert(CandidatesForRemoval.begin(), IntrinsicCursor); +Cursor = IntrinsicCursor->getOperand(0); + } + + // If no viable replacement in the conversion chain was found, there is + // nothing to do. + if (!EarliestReplacement) return false; - I->replaceAllUsesWith(SourceVal); + I->replaceAllUsesWith(EarliestReplacement); I->eraseFromParent(); - if (Y->use_empty()) -Y->eraseFromParent(); + while (!CandidatesForRemoval.empty()) { +Instruction *Candidate = CandidatesForRemoval.pop_back_val(); +if (Candidate->use_empty()) + Candidate->eraseFromParent(); + } return true; } diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsic-opts-reinterpret.ll b/llvm/test/CodeGen/AArch64/sve-intrinsic-opts-reinterpret.ll index 47e0ff8f19c7..22c61d0565af 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsic-opts-reinterpret.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsic-opts-reinterpret.ll @@ -67,6 +67,62 @@ define @reinterpret_test_d_rev( %a) { ret %2 } +define @reinterpret_test_full_chain( %a) { +; OPT-LABEL: @reinterpret_test_full_chain( +; OPT: ret %a + %1 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %a) + %2 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %1) + %3 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %2) + %4 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %3) + %5 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %4) + %6 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %5) + ret %6 +} + +; The last two reinterprets are not necessary, since they are doing the same +; work as the first two. +define @rei
[llvm-branch-commits] [clang] 3d5b18a - [clang][AArch64][SVE] Avoid going through memory for coerced VLST arguments
Author: Joe Ellis Date: 2021-01-05T15:18:21Z New Revision: 3d5b18a3fdf47ae2286642131e4a92968dd01c2a URL: https://github.com/llvm/llvm-project/commit/3d5b18a3fdf47ae2286642131e4a92968dd01c2a DIFF: https://github.com/llvm/llvm-project/commit/3d5b18a3fdf47ae2286642131e4a92968dd01c2a.diff LOG: [clang][AArch64][SVE] Avoid going through memory for coerced VLST arguments VLST arguments are coerced to VLATs at the function boundary for consistency with the VLAT ABI. They are then bitcast back to VLSTs in the function prolog. Previously, this conversion is done through memory. With the introduction of the llvm.vector.{insert,extract} intrinsic, we can avoid going through memory here. Depends on D92761 Differential Revision: https://reviews.llvm.org/D92762 Added: Modified: clang/lib/CodeGen/CGCall.cpp clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.cpp clang/test/CodeGen/attr-arm-sve-vector-bits-call.c clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c Removed: diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index e28736bd3d2f..f1987408165b 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -2688,6 +2688,27 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, break; } + // VLST arguments are coerced to VLATs at the function boundary for + // ABI consistency. If this is a VLST that was coerced to + // a VLAT at the function boundary and the types match up, use + // llvm.experimental.vector.extract to convert back to the original + // VLST. + if (auto *VecTyTo = dyn_cast(ConvertType(Ty))) { +auto *Coerced = Fn->getArg(FirstIRArg); +if (auto *VecTyFrom = +dyn_cast(Coerced->getType())) { + if (VecTyFrom->getElementType() == VecTyTo->getElementType()) { +llvm::Value *Zero = llvm::Constant::getNullValue(CGM.Int64Ty); + +assert(NumIRArgs == 1); +Coerced->setName(Arg->getName() + ".coerce"); + ArgVals.push_back(ParamValue::forDirect(Builder.CreateExtractVector( +VecTyTo, Coerced, Zero, "castFixedSve"))); +break; + } +} + } + Address Alloca = CreateMemTemp(Ty, getContext().getDeclAlign(Arg), Arg->getName()); diff --git a/clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c b/clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c index 9b733d21dbbb..9d3206a75f0e 100644 --- a/clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c +++ b/clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c @@ -59,14 +59,14 @@ typedef int8_t vec_int8 __attribute__((vector_size(N / 8))); // CHECK-LABEL: define{{.*}} void @f2( // CHECK-SAME: <[[#div(VBITS,8)]] x i8>* noalias nocapture sret(<[[#div(VBITS,8)]] x i8>) align 16 %agg.result, <[[#div(VBITS,8)]] x i8>* nocapture readonly %0) -// CHECK-NEXT: entry: -// CHECK-NEXT:[[X:%.*]] = load <[[#div(VBITS,8)]] x i8>, <[[#div(VBITS,8)]] x i8>* [[TMP0:%.*]], align 16, [[TBAA6:!tbaa !.*]] -// CHECK-NEXT:[[TMP1:%.*]] = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) -// CHECK-NEXT:[[CASTSCALABLESVE:%.*]] = call @llvm.experimental.vector.insert.nxv16i8.v[[#div(VBITS,8)]]i8( undef, <[[#div(VBITS,8)]] x i8> [[X]], i64 0) -// CHECK-NEXT:[[TMP2:%.*]] = call @llvm.aarch64.sve.asrd.nxv16i8( [[TMP1]], [[CASTSCALABLESVE]], i32 1) -// CHECK-NEXT:[[CASTFIXEDSVE:%.*]] = call <[[#div(VBITS,8)]] x i8> @llvm.experimental.vector.extract.v[[#div(VBITS,8)]]i8.nxv16i8( [[TMP2]], i64 0) -// CHECK-NEXT:store <[[#div(VBITS,8)]] x i8> [[CASTFIXEDSVE]], <[[#div(VBITS,8)]] x i8>* [[AGG_RESULT:%.*]], align 16, [[TBAA6]] -// CHECK-NEXT:ret void +// CHECK-NEXT: entry: +// CHECK-NEXT: [[X:%.*]] = load <[[#div(VBITS,8)]] x i8>, <[[#div(VBITS,8)]] x i8>* [[TMP0:%.*]], align 16, [[TBAA6:!tbaa !.*]] +// CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.experimental.vector.insert.nxv16i8.v[[#div(VBITS,8)]]i8( undef, <[[#div(VBITS,8)]] x i8> [[X]], i64 0) +// CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.asrd.nxv16i8( [[TMP1]], [[CASTSCALABLESVE]], i32 1) +// CHECK-NEXT: [[CASTFIXEDSVE:%.*]] = call <[[#div(VBITS,8)]] x i8> @llvm.experimental.vector.extract.v[[#div(VBITS,8)]]i8.nxv16i8( [[TMP2]], i64 0) +// CHECK-NEXT: store <[[#div(VBITS,8)]] x i8> [[CASTFIXEDSVE]], <[[#div(VBITS,8)]] x i8>* [[AGG_RESULT:%.*]], align 16, [[TBAA6]] +// CHECK-NEXT: ret void vec_int8 f2(vec_int8 x) { return svasrd_x(svptrue_b8(), x, 1); } #endif @@ -78,24 +78,18 @@ void f3(vec1); typedef svint8_t vec2 __attrib
[llvm-branch-commits] [clang] 8ea72b3 - [clang][AArch64][SVE] Avoid going through memory for coerced VLST return values
Author: Joe Ellis Date: 2021-01-11T12:10:59Z New Revision: 8ea72b388734ce660f861e0dfbe53d203e94876a URL: https://github.com/llvm/llvm-project/commit/8ea72b388734ce660f861e0dfbe53d203e94876a DIFF: https://github.com/llvm/llvm-project/commit/8ea72b388734ce660f861e0dfbe53d203e94876a.diff LOG: [clang][AArch64][SVE] Avoid going through memory for coerced VLST return values VLST return values are coerced to VLATs in the function epilog for consistency with the VLAT ABI. Previously, this coercion was done through memory. It is preferable to use the llvm.experimental.vector.insert intrinsic to avoid going through memory here. Reviewed By: c-rhodes Differential Revision: https://reviews.llvm.org/D94290 Added: Modified: clang/lib/CodeGen/CGCall.cpp clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.cpp clang/test/CodeGen/attr-arm-sve-vector-bits-call.c clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c clang/test/CodeGen/attr-arm-sve-vector-bits-codegen.c Removed: diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index f1987408165b..2cc7203d1194 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -1265,6 +1265,21 @@ static llvm::Value *CreateCoercedLoad(Address Src, llvm::Type *Ty, return CGF.Builder.CreateLoad(Src); } + // If coercing a fixed vector to a scalable vector for ABI compatibility, and + // the types match, use the llvm.experimental.vector.insert intrinsic to + // perform the conversion. + if (auto *ScalableDst = dyn_cast(Ty)) { +if (auto *FixedSrc = dyn_cast(SrcTy)) { + if (ScalableDst->getElementType() == FixedSrc->getElementType()) { +auto *Load = CGF.Builder.CreateLoad(Src); +auto *UndefVec = llvm::UndefValue::get(ScalableDst); +auto *Zero = llvm::Constant::getNullValue(CGF.CGM.Int64Ty); +return CGF.Builder.CreateInsertVector(ScalableDst, UndefVec, Load, Zero, + "castScalableSve"); + } +} + } + // Otherwise do coercion through memory. This is stupid, but simple. Address Tmp = CreateTempAllocaForCoercion(CGF, Ty, Src.getAlignment(), Src.getName()); diff --git a/clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.cpp b/clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.cpp index 6fafc2ca2db9..a808d50884ea 100644 --- a/clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.cpp +++ b/clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.cpp @@ -48,14 +48,11 @@ void test02() { // CHECK-SAME:[[#VBITS]] // CHECK-SAME:EES_( %x.coerce, %y.coerce) // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 16 // CHECK-NEXT: [[X:%.*]] = call <[[#div(VBITS, 32)]] x i32> @llvm.experimental.vector.extract.v[[#div(VBITS, 32)]]i32.nxv4i32( [[X_COERCE:%.*]], i64 0) // CHECK-NEXT: [[Y:%.*]] = call <[[#div(VBITS, 32)]] x i32> @llvm.experimental.vector.extract.v[[#div(VBITS, 32)]]i32.nxv4i32( [[X_COERCE1:%.*]], i64 0) // CHECK-NEXT: [[ADD:%.*]] = add <[[#div(VBITS, 32)]] x i32> [[Y]], [[X]] -// CHECK-NEXT: [[RETVAL_0__SROA_CAST:%.*]] = bitcast * [[RETVAL_COERCE]] to <[[#div(VBITS, 32)]] x i32>* -// CHECK-NEXT: store <[[#div(VBITS, 32)]] x i32> [[ADD]], <[[#div(VBITS, 32)]] x i32>* [[RETVAL_0__SROA_CAST]], align 16 -// CHECK-NEXT: [[TMP0:%.*]] = load , * [[RETVAL_COERCE]], align 16 -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.experimental.vector.insert.nxv4i32.v[[#div(VBITS, 32)]]i32( undef, <[[#div(VBITS, 32)]] x i32> [[ADD]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] typedef svint32_t vec __attribute__((arm_sve_vector_bits(N))); auto f(vec x, vec y) { return x + y; } // Returns a vec. #endif diff --git a/clang/test/CodeGen/attr-arm-sve-vector-bits-call.c b/clang/test/CodeGen/attr-arm-sve-vector-bits-call.c index f909b7164622..f988d54bacd4 100644 --- a/clang/test/CodeGen/attr-arm-sve-vector-bits-call.c +++ b/clang/test/CodeGen/attr-arm-sve-vector-bits-call.c @@ -24,14 +24,11 @@ svint32_t sizeless_callee(svint32_t x) { // CHECK-LABEL: @fixed_caller( // CHECK-NEXT: entry: -// CHECK-NEXT:[[RETVAL_COERCE:%.*]] = alloca , align 16 // CHECK-NEXT:[[X:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32( [[X_COERCE:%.*]], i64 0) // CHECK-NEXT:[[CASTSCALABLESVE:%.*]] = call @llvm.experimental.vector.insert.nxv4i32.v16i32( undef, <16 x i32> [[X]], i64 0) // CHECK-NEXT:[[CASTFIXEDSVE:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32( [[CASTSCALABLESVE]], i64 0) -// CHECK-NEXT:[[RETVAL_0__SROA_CAST:%.*]] = bitcast * [[RETVAL_COERCE]] to <16 x i32>* -// CHECK-NEXT:store <16 x i32> [[CASTFIXEDSVE]], <16 x i32>* [[RETVAL_0__SROA_CAST]], align 16 -// C
[llvm-branch-commits] [llvm] 0073582 - [DAGCombiner] Use getVectorElementCount inside visitINSERT_SUBVECTOR
Author: Joe Ellis Date: 2021-01-11T14:15:11Z New Revision: 007358239decd45819a6fa44eb2a2e07fd85e796 URL: https://github.com/llvm/llvm-project/commit/007358239decd45819a6fa44eb2a2e07fd85e796 DIFF: https://github.com/llvm/llvm-project/commit/007358239decd45819a6fa44eb2a2e07fd85e796.diff LOG: [DAGCombiner] Use getVectorElementCount inside visitINSERT_SUBVECTOR This avoids TypeSize-/ElementCount-related warnings. Differential Revision: https://reviews.llvm.org/D92747 Added: llvm/test/CodeGen/AArch64/dag-combine-insert-subvector.ll Modified: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Removed: diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index be57d9250db7..75dbc2227f6a 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -21008,8 +21008,8 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) { if (N0.isUndef() && N1.getOpcode() == ISD::BITCAST && N1.getOperand(0).getOpcode() == ISD::EXTRACT_SUBVECTOR && N1.getOperand(0).getOperand(1) == N2 && - N1.getOperand(0).getOperand(0).getValueType().getVectorNumElements() == - VT.getVectorNumElements() && + N1.getOperand(0).getOperand(0).getValueType().getVectorElementCount() == + VT.getVectorElementCount() && N1.getOperand(0).getOperand(0).getValueType().getSizeInBits() == VT.getSizeInBits()) { return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0)); @@ -21026,7 +21026,7 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) { EVT CN1VT = CN1.getValueType(); if (CN0VT.isVector() && CN1VT.isVector() && CN0VT.getVectorElementType() == CN1VT.getVectorElementType() && -CN0VT.getVectorNumElements() == VT.getVectorNumElements()) { +CN0VT.getVectorElementCount() == VT.getVectorElementCount()) { SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), CN0.getValueType(), CN0, CN1, N2); return DAG.getBitcast(VT, NewINSERT); @@ -21107,8 +21107,10 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) { // If the input vector is a concatenation, and the insert replaces // one of the pieces, we can optimize into a single concat_vectors. if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() && - N0.getOperand(0).getValueType() == N1.getValueType()) { -unsigned Factor = N1.getValueType().getVectorNumElements(); + N0.getOperand(0).getValueType() == N1.getValueType() && + N0.getOperand(0).getValueType().isScalableVector() == + N1.getValueType().isScalableVector()) { +unsigned Factor = N1.getValueType().getVectorMinNumElements(); SmallVector Ops(N0->op_begin(), N0->op_end()); Ops[InsIdx / Factor] = N1; return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops); diff --git a/llvm/test/CodeGen/AArch64/dag-combine-insert-subvector.ll b/llvm/test/CodeGen/AArch64/dag-combine-insert-subvector.ll new file mode 100644 index ..a89e3a09c1f1 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/dag-combine-insert-subvector.ll @@ -0,0 +1,28 @@ +; RUN: llc < %s -o /dev/null 2>&1 | FileCheck --allow-empty %s + +; This regression test is defending against a ElementCount warning 'Possible incorrect use of +; EVT::getVectorNumElements() for scalable vector'. This warning appeared in +; DAGCombiner::visitINSERT_SUBVECTOR because of the use of getVectorNumElements() on scalable +; types. + +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. +; CHECK-NOT: warning: + +target triple = "aarch64-unknown-linux-gnu" +attributes #0 = {"target-features"="+sve"} + +declare <16 x float> @llvm.experimental.vector.extract.v16f32.nxv4f32(, i64) +declare @llvm.experimental.vector.insert.nxv2f64.v8f64(, <8 x double>, i64) + +define @reproducer_one( %vec_a) #0 { + %a = call <16 x float> @llvm.experimental.vector.extract.v16f32.nxv4f32( %vec_a, i64 0) + %b = bitcast <16 x float> %a to <8 x double> + %retval = call @llvm.experimental.vector.insert.nxv2f64.v8f64( undef, <8 x double> %b, i64 0) + ret %retval +} + +define @reproducer_two(<4 x double> %a, <4 x double> %b) #0 { + %concat = shufflevector <4 x double> %a, <4 x double> %b, <8 x i32> + %retval = call @llvm.experimental.vector.insert.nxv2f64.v8f64( undef, <8 x double> %concat, i64 0) + ret %retval +} ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] 5a2a836 - [AArch64][NEON] Remove undocumented vceqz{, q}_p16, vml{a, s}q_n_f64 intrinsics
Author: Joe Ellis Date: 2020-12-15T17:19:16Z New Revision: 5a2a8369e82cea9689b0ff60f3e9baa7fc79fbcf URL: https://github.com/llvm/llvm-project/commit/5a2a8369e82cea9689b0ff60f3e9baa7fc79fbcf DIFF: https://github.com/llvm/llvm-project/commit/5a2a8369e82cea9689b0ff60f3e9baa7fc79fbcf.diff LOG: [AArch64][NEON] Remove undocumented vceqz{,q}_p16, vml{a,s}q_n_f64 intrinsics Prior to this patch, Clang supported the following C/C++ intrinsics: vceqz_p16 vceqzq_p16 vmlaq_n_f64 vmlsq_n_f64 ... exposed through arm_neon.h. However, these intrinsics are not part of the ACLE, allowing developers to write code that is not compatible with other toolchains. This patch removes these intrinsics. There is a bug report capturing this issue here: https://bugs.llvm.org/show_bug.cgi?id=47471 Reviewed By: bsmith Differential Revision: https://reviews.llvm.org/D93206 Added: Modified: clang/include/clang/Basic/arm_neon.td clang/test/CodeGen/aarch64-neon-fma.c clang/test/CodeGen/aarch64-neon-misc.c Removed: diff --git a/clang/include/clang/Basic/arm_neon.td b/clang/include/clang/Basic/arm_neon.td index 4d4e42dd514b..6f1380d58c16 100644 --- a/clang/include/clang/Basic/arm_neon.td +++ b/clang/include/clang/Basic/arm_neon.td @@ -786,9 +786,6 @@ def VMUL_N_A64 : IOpInst<"vmul_n", "..1", "Qd", OP_MUL_N>; def FMLA_N : SOpInst<"vfma_n", "...1", "dQd", OP_FMLA_N>; def FMLS_N : SOpInst<"vfms_n", "...1", "fdQfQd", OP_FMLS_N>; -def MLA_N : SOpInst<"vmla_n", "...1", "Qd", OP_MLA_N>; -def MLS_N : SOpInst<"vmls_n", "...1", "Qd", OP_MLS_N>; - // Logical operations def BSL : SInst<"vbsl", ".U..", "dPlQdQPl">; @@ -868,7 +865,7 @@ def CFMGT : SOpInst<"vcgt", "U..", "lUldQdQlQUl", OP_GT>; def CFMLT : SOpInst<"vclt", "U..", "lUldQdQlQUl", OP_LT>; def CMEQ : SInst<"vceqz", "U.", - "csilfUcUsUiUlPcPsPlQcQsQiQlQfQUcQUsQUiQUlQPcQPsdQdQPl">; + "csilfUcUsUiUlPcPlQcQsQiQlQfQUcQUsQUiQUlQPcdQdQPl">; def CMGE : SInst<"vcgez", "U.", "csilfdQcQsQiQlQfQd">; def CMLE : SInst<"vclez", "U.", "csilfdQcQsQiQlQfQd">; def CMGT : SInst<"vcgtz", "U.", "csilfdQcQsQiQlQfQd">; diff --git a/clang/test/CodeGen/aarch64-neon-fma.c b/clang/test/CodeGen/aarch64-neon-fma.c index c2dd315ed9fc..0726218d2e15 100644 --- a/clang/test/CodeGen/aarch64-neon-fma.c +++ b/clang/test/CodeGen/aarch64-neon-fma.c @@ -26,16 +26,6 @@ float32x4_t test_vmlaq_n_f32(float32x4_t a, float32x4_t b, float32_t c) { return vmlaq_n_f32(a, b, c); } -// CHECK-LABEL: define <2 x double> @test_vmlaq_n_f64(<2 x double> %a, <2 x double> %b, double %c) #1 { -// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x double> undef, double %c, i32 0 -// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x double> [[VECINIT_I]], double %c, i32 1 -// CHECK: [[MUL_I:%.*]] = fmul <2 x double> %b, [[VECINIT1_I]] -// CHECK: [[ADD_I:%.*]] = fadd <2 x double> %a, [[MUL_I]] -// CHECK: ret <2 x double> [[ADD_I]] -float64x2_t test_vmlaq_n_f64(float64x2_t a, float64x2_t b, float64_t c) { - return vmlaq_n_f64(a, b, c); -} - // CHECK-LABEL: define <4 x float> @test_vmlsq_n_f32(<4 x float> %a, <4 x float> %b, float %c) #1 { // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %c, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %c, i32 1 @@ -58,16 +48,6 @@ float32x2_t test_vmls_n_f32(float32x2_t a, float32x2_t b, float32_t c) { return vmls_n_f32(a, b, c); } -// CHECK-LABEL: define <2 x double> @test_vmlsq_n_f64(<2 x double> %a, <2 x double> %b, double %c) #1 { -// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x double> undef, double %c, i32 0 -// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x double> [[VECINIT_I]], double %c, i32 1 -// CHECK: [[MUL_I:%.*]] = fmul <2 x double> %b, [[VECINIT1_I]] -// CHECK: [[SUB_I:%.*]] = fsub <2 x double> %a, [[MUL_I]] -// CHECK: ret <2 x double> [[SUB_I]] -float64x2_t test_vmlsq_n_f64(float64x2_t a, float64x2_t b, float64_t c) { - return vmlsq_n_f64(a, b, c); -} - // CHECK-LABEL: define <2 x float> @test_vmla_lane_f32_0(<2 x float> %a, <2 x float> %b, <2 x float> %v) #0 { // CHECK:[[TMP0:%.*]] = bitcast <2 x float> [[V:%.*]] to <8 x i8> // CHECK:[[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> diff --git a/clang/test/CodeGen/aarch64-neon-misc.c b/clang/test/CodeGen/aarch64-neon-misc.c index 88020a1a69c2..5517fe3dc411 100644 --- a/clang/test/CodeGen/aarch64-neon-misc.c +++ b/clang/test/CodeGen/aarch64-neon-misc.c @@ -198,24 +198,6 @@ uint8x16_t test_vceqzq_p8(poly8x16_t a) { return vceqzq_p8(a); } -// CHECK-LABEL: @test_vceqz_p16( -// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = icmp eq <4 x i16> %a, zeroinitializer -// CHECK: [[VCEQZ_I:%.*]] = sext <4 x i1> [[TMP1]] to <4
[llvm-branch-commits] [clang] dad07ba - [clang][AArch64][SVE] Avoid going through memory for VLAT <-> VLST casts
Author: Joe Ellis Date: 2020-12-16T12:24:32Z New Revision: dad07baf123e672b1d5d5e7c21e73b92399d5a0c URL: https://github.com/llvm/llvm-project/commit/dad07baf123e672b1d5d5e7c21e73b92399d5a0c DIFF: https://github.com/llvm/llvm-project/commit/dad07baf123e672b1d5d5e7c21e73b92399d5a0c.diff LOG: [clang][AArch64][SVE] Avoid going through memory for VLAT <-> VLST casts This change makes use of the llvm.vector.extract intrinsic to avoid going through memory when performing bitcasts between vector-length agnostic types and vector-length specific types. Depends on D91362 Reviewed By: c-rhodes Differential Revision: https://reviews.llvm.org/D92761 Added: Modified: clang/lib/CodeGen/CGExprScalar.cpp clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c clang/test/CodeGen/attr-arm-sve-vector-bits-bitcast.c clang/test/CodeGen/attr-arm-sve-vector-bits-call.c clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c clang/test/CodeGen/attr-arm-sve-vector-bits-codegen.c clang/test/CodeGen/attr-arm-sve-vector-bits-globals.c llvm/include/llvm/IR/IRBuilder.h Removed: diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp index 973cefd831e6..c9cf1d0dfd89 100644 --- a/clang/lib/CodeGen/CGExprScalar.cpp +++ b/clang/lib/CodeGen/CGExprScalar.cpp @@ -1996,7 +1996,39 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) { } } +// If Src is a fixed vector and Dst is a scalable vector, and both have the +// same element type, use the llvm.experimental.vector.insert intrinsic to +// perform the bitcast. +if (const auto *FixedSrc = dyn_cast(SrcTy)) { + if (const auto *ScalableDst = dyn_cast(DstTy)) { +if (FixedSrc->getElementType() == ScalableDst->getElementType()) { + llvm::Value *UndefVec = llvm::UndefValue::get(DstTy); + llvm::Value *Zero = llvm::Constant::getNullValue(CGF.CGM.Int64Ty); + return Builder.CreateInsertVector(DstTy, UndefVec, Src, Zero, +"castScalableSve"); +} + } +} + +// If Src is a scalable vector and Dst is a fixed vector, and both have the +// same element type, use the llvm.experimental.vector.extract intrinsic to +// perform the bitcast. +if (const auto *ScalableSrc = dyn_cast(SrcTy)) { + if (const auto *FixedDst = dyn_cast(DstTy)) { +if (ScalableSrc->getElementType() == FixedDst->getElementType()) { + llvm::Value *Zero = llvm::Constant::getNullValue(CGF.CGM.Int64Ty); + return Builder.CreateExtractVector(DstTy, Src, Zero, "castFixedSve"); +} + } +} + // Perform VLAT <-> VLST bitcast through memory. +// TODO: since the llvm.experimental.vector.{insert,extract} intrinsics +// require the element types of the vectors to be the same, we +// need to keep this around for casting between predicates, or more +// generally for bitcasts between VLAT <-> VLST where the element +// types of the vectors are not the same, until we figure out a better +// way of doing these casts. if ((isa(SrcTy) && isa(DstTy)) || (isa(SrcTy) && diff --git a/clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c b/clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c index fed7708c6893..beba6a3f0199 100644 --- a/clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c +++ b/clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c @@ -51,34 +51,22 @@ vec2048 x2048 = {0, 1, 2, 3, 3 , 2 , 1, 0, 0, 1, 2, 3, 3 , 2 , 1, 0, typedef int8_t vec_int8 __attribute__((vector_size(N / 8))); // CHECK128-LABEL: define <16 x i8> @f2(<16 x i8> %x) // CHECK128-NEXT: entry: -// CHECK128-NEXT:%x.addr = alloca <16 x i8>, align 16 -// CHECK128-NEXT:%saved-call-rvalue = alloca , align 16 -// CHECK128-NEXT:store <16 x i8> %x, <16 x i8>* %x.addr, align 16 -// CHECK128-NEXT:%0 = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) -// CHECK128-NEXT:%1 = bitcast <16 x i8>* %x.addr to * -// CHECK128-NEXT:%2 = load , * %1, align 16 -// CHECK128-NEXT:%3 = call @llvm.aarch64.sve.asrd.nxv16i8( %0, %2, i32 1) -// CHECK128-NEXT:store %3, * %saved-call-rvalue, align 16 -// CHECK128-NEXT:%castFixedSve = bitcast * %saved-call-rvalue to <16 x i8>* -// CHECK128-NEXT:%4 = load <16 x i8>, <16 x i8>* %castFixedSve, align 16 -// CHECK128-NEXT:ret <16 x i8> %4 +// CHECK128-NEXT:[[TMP0:%.*]] = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) +// CHECK128-NEXT:[[CASTSCALABLESVE:%.*]] = call @llvm.experimental.vector.insert.nxv16i8.v16i8( undef, <16 x i8> [[X:%.*]], i64 0) +// CHECK128-NEXT:[[TMP1:%.*]] = call @llvm.aarch64.sve.asrd.nxv16i8( [[TMP0]], [[CASTSCALABLESVE]], i32 1) +// CHECK128-NEXT:
[llvm-branch-commits] [clang] 3c696a2 - [AArch64][SVE] Allow lax conversion between VLATs and GNU vectors
Author: Joe Ellis Date: 2020-11-23T10:47:17Z New Revision: 3c696a212ba4328e4f8f92136bc4d728a6490ef7 URL: https://github.com/llvm/llvm-project/commit/3c696a212ba4328e4f8f92136bc4d728a6490ef7 DIFF: https://github.com/llvm/llvm-project/commit/3c696a212ba4328e4f8f92136bc4d728a6490ef7.diff LOG: [AArch64][SVE] Allow lax conversion between VLATs and GNU vectors Previously, lax conversions were only allowed between SVE vector-length agnostic types and vector-length specific types. This meant that code such as the following: #include #define N __ARM_FEATURE_SVE_BITS #define FIXED_ATTR __attribute__ ((vector_size (N/8))) typedef float fixed_float32_t FIXED_ATTR; void foo() { fixed_float32_t fs32; svfloat64_t s64; fs32 = s64; } was not allowed. This patch makes a minor change to areLaxCompatibleSveTypes to allow for lax conversions to be performed between SVE vector-length agnostic types and GNU vectors. Differential Revision: https://reviews.llvm.org/D91696 Added: Modified: clang/lib/AST/ASTContext.cpp clang/test/Sema/aarch64-sve-lax-vector-conversions.c clang/test/Sema/attr-arm-sve-vector-bits.c clang/test/SemaCXX/aarch64-sve-lax-vector-conversions.cpp Removed: diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index f54916babed7..67ee8c0956d6 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -8586,10 +8586,20 @@ bool ASTContext::areLaxCompatibleSveTypes(QualType FirstType, const auto *VecTy = SecondType->getAs(); if (VecTy && -VecTy->getVectorKind() == VectorType::SveFixedLengthDataVector) { +(VecTy->getVectorKind() == VectorType::SveFixedLengthDataVector || + VecTy->getVectorKind() == VectorType::GenericVector)) { const LangOptions::LaxVectorConversionKind LVCKind = getLangOpts().getLaxVectorConversions(); + // If __ARM_FEATURE_SVE_BITS != N do not allow GNU vector lax conversion. + // "Whenever __ARM_FEATURE_SVE_BITS==N, GNUT implicitly + // converts to VLAT and VLAT implicitly converts to GNUT." + // ACLE Spec Version 00bet6, 3.7.3.2. Behavior common to vectors and + // predicates. + if (VecTy->getVectorKind() == VectorType::GenericVector && + getTypeSize(SecondType) != getLangOpts().ArmSveVectorBits) +return false; + // If -flax-vector-conversions=all is specified, the types are // certainly compatible. if (LVCKind == LangOptions::LaxVectorConversionKind::All) diff --git a/clang/test/Sema/aarch64-sve-lax-vector-conversions.c b/clang/test/Sema/aarch64-sve-lax-vector-conversions.c index e2fe87f7dd20..1a1addcf1c1b 100644 --- a/clang/test/Sema/aarch64-sve-lax-vector-conversions.c +++ b/clang/test/Sema/aarch64-sve-lax-vector-conversions.c @@ -7,32 +7,61 @@ #include #define N __ARM_FEATURE_SVE_BITS -#define FIXED_ATTR __attribute__((arm_sve_vector_bits(N))) +#define SVE_FIXED_ATTR __attribute__((arm_sve_vector_bits(N))) +#define GNU_FIXED_ATTR __attribute__((vector_size(N / 8))) -typedef svfloat32_t fixed_float32_t FIXED_ATTR; -typedef svint32_t fixed_int32_t FIXED_ATTR; +typedef svfloat32_t sve_fixed_float32_t SVE_FIXED_ATTR; +typedef svint32_t sve_fixed_int32_t SVE_FIXED_ATTR; +typedef float gnu_fixed_float32_t GNU_FIXED_ATTR; +typedef int gnu_fixed_int32_t GNU_FIXED_ATTR; -void allowed_with_integer_lax_conversions() { - fixed_int32_t fi32; +void sve_allowed_with_integer_lax_conversions() { + sve_fixed_int32_t fi32; svint64_t si64; // The implicit cast here should fail if -flax-vector-conversions=none, but pass if // -flax-vector-conversions={integer,all}. fi32 = si64; - // lax-vector-none-error@-1 {{assigning to 'fixed_int32_t' (vector of 16 'int' values) from incompatible type}} + // lax-vector-none-error@-1 {{assigning to 'sve_fixed_int32_t' (vector of 16 'int' values) from incompatible type}} si64 = fi32; // lax-vector-none-error@-1 {{assigning to 'svint64_t' (aka '__SVInt64_t') from incompatible type}} } -void allowed_with_all_lax_conversions() { - fixed_float32_t ff32; +void sve_allowed_with_all_lax_conversions() { + sve_fixed_float32_t ff32; svfloat64_t sf64; // The implicit cast here should fail if -flax-vector-conversions={none,integer}, but pass if // -flax-vector-conversions=all. ff32 = sf64; - // lax-vector-none-error@-1 {{assigning to 'fixed_float32_t' (vector of 16 'float' values) from incompatible type}} - // lax-vector-integer-error@-2 {{assigning to 'fixed_float32_t' (vector of 16 'float' values) from incompatible type}} + // lax-vector-none-error@-1 {{assigning to 'sve_fixed_float32_t' (vector of 16 'float' values) from incompatible type}} + // lax-vector-integer-error@-2 {{assigning to 'sve_fixed_float32_t' (vector of 16 'float' values) from incompatible type}} + sf64 = ff32; + //
[llvm-branch-commits] [llvm] 06654a5 - [SVE] Fix TypeSize warning in RuntimePointerChecking::insert
Author: Joe Ellis Date: 2020-11-25T16:59:03Z New Revision: 06654a5348bfc510208514a30c552f4f2c4c0ee7 URL: https://github.com/llvm/llvm-project/commit/06654a5348bfc510208514a30c552f4f2c4c0ee7 DIFF: https://github.com/llvm/llvm-project/commit/06654a5348bfc510208514a30c552f4f2c4c0ee7.diff LOG: [SVE] Fix TypeSize warning in RuntimePointerChecking::insert The TypeSize warning would occur because RuntimePointerChecking::insert was not scalable vector aware. The fix is to use ScalarEvolution::getSizeOfExpr to grab the size of types. Differential Revision: https://reviews.llvm.org/D90171 Added: llvm/test/Analysis/LoopAccessAnalysis/memcheck-store-vs-alloc-size.ll llvm/test/Analysis/LoopAccessAnalysis/runtime-pointer-checking-insert-typesize.ll Modified: llvm/include/llvm/Analysis/ScalarEvolution.h llvm/lib/Analysis/LoopAccessAnalysis.cpp llvm/lib/Analysis/ScalarEvolution.cpp Removed: diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h index 9c19ec986444..a7a24f086fbe 100644 --- a/llvm/include/llvm/Analysis/ScalarEvolution.h +++ b/llvm/include/llvm/Analysis/ScalarEvolution.h @@ -600,9 +600,17 @@ class ScalarEvolution { return getConstant(Ty, -1, /*isSigned=*/true); } - /// Return an expression for sizeof AllocTy that is type IntTy + /// Return an expression for sizeof ScalableTy that is type IntTy, where + /// ScalableTy is a scalable vector type. + const SCEV *getSizeOfScalableVectorExpr(Type *IntTy, + ScalableVectorType *ScalableTy); + + /// Return an expression for the alloc size of AllocTy that is type IntTy const SCEV *getSizeOfExpr(Type *IntTy, Type *AllocTy); + /// Return an expression for the store size of StoreTy that is type IntTy + const SCEV *getStoreSizeOfExpr(Type *IntTy, Type *StoreTy); + /// Return an expression for offsetof on the given field with type IntTy const SCEV *getOffsetOfExpr(Type *IntTy, StructType *STy, unsigned FieldNo); diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp index 19a8ea23b70b..d37c07801b2e 100644 --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -224,9 +224,9 @@ void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, bool WritePtr, } // Add the size of the pointed element to ScEnd. auto &DL = Lp->getHeader()->getModule()->getDataLayout(); -unsigned EltSize = -DL.getTypeStoreSizeInBits(Ptr->getType()->getPointerElementType()) / 8; -const SCEV *EltSizeSCEV = SE->getConstant(ScEnd->getType(), EltSize); +Type *IdxTy = DL.getIndexType(Ptr->getType()); +const SCEV *EltSizeSCEV = +SE->getStoreSizeOfExpr(IdxTy, Ptr->getType()->getPointerElementType()); ScEnd = SE->getAddExpr(ScEnd, EltSizeSCEV); } diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index 5f77f4aa05c2..5a7f1b94a4e8 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -3660,28 +3660,42 @@ const SCEV *ScalarEvolution::getUMinExpr(SmallVectorImpl &Ops) { return getMinMaxExpr(scUMinExpr, Ops); } +const SCEV * +ScalarEvolution::getSizeOfScalableVectorExpr(Type *IntTy, + ScalableVectorType *ScalableTy) { + Constant *NullPtr = Constant::getNullValue(ScalableTy->getPointerTo()); + Constant *One = ConstantInt::get(IntTy, 1); + Constant *GEP = ConstantExpr::getGetElementPtr(ScalableTy, NullPtr, One); + // Note that the expression we created is the final expression, we don't + // want to simplify it any further Also, if we call a normal getSCEV(), + // we'll end up in an endless recursion. So just create an SCEVUnknown. + return getUnknown(ConstantExpr::getPtrToInt(GEP, IntTy)); +} + const SCEV *ScalarEvolution::getSizeOfExpr(Type *IntTy, Type *AllocTy) { - if (isa(AllocTy)) { -Constant *NullPtr = Constant::getNullValue(AllocTy->getPointerTo()); -Constant *One = ConstantInt::get(IntTy, 1); -Constant *GEP = ConstantExpr::getGetElementPtr(AllocTy, NullPtr, One); -// Note that the expression we created is the final expression, we don't -// want to simplify it any further Also, if we call a normal getSCEV(), -// we'll end up in an endless recursion. So just create an SCEVUnknown. -return getUnknown(ConstantExpr::getPtrToInt(GEP, IntTy)); - } - // We can bypass creating a target-independent - // constant expression and then folding it back into a ConstantInt. - // This is just a compile-time optimization. + if (auto *ScalableAllocTy = dyn_cast(AllocTy)) +return getSizeOfScalableVectorExpr(IntTy, ScalableAllocTy); + // We can bypass creating a target-independent constant expression and then + // folding it back into a ConstantInt. This is just a compile
[llvm-branch-commits] [llvm] f6150aa - [SelectionDAGBuilder] Update signature of `getRegsAndSizes()`.
Author: Francesco Petrogalli Date: 2020-11-30T17:38:51Z New Revision: f6150aa41a48ac8b5372fe4d6ccdfff96e432431 URL: https://github.com/llvm/llvm-project/commit/f6150aa41a48ac8b5372fe4d6ccdfff96e432431 DIFF: https://github.com/llvm/llvm-project/commit/f6150aa41a48ac8b5372fe4d6ccdfff96e432431.diff LOG: [SelectionDAGBuilder] Update signature of `getRegsAndSizes()`. The mapping between registers and relative size has been updated to use TypeSize to account for the size of scalable EVTs. The patch is a NFCI, if not for the fact that with this change the function `getUnderlyingArgRegs` does not raise a warning for implicit conversion of `TypeSize` to `unsigned` when generating machine code from the test added to the patch. Reviewed By: arsenm Differential Revision: https://reviews.llvm.org/D92096 Added: llvm/test/CodeGen/AArch64/sdag-no-typesize-warnings-regandsizes.ll Modified: llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h Removed: diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index dd5beb33ecce..9d2174f4b85a 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -980,14 +980,14 @@ void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching, } } -SmallVector, 4> +SmallVector, 4> RegsForValue::getRegsAndSizes() const { - SmallVector, 4> OutVec; + SmallVector, 4> OutVec; unsigned I = 0; for (auto CountAndVT : zip_first(RegCount, RegVTs)) { unsigned RegCount = std::get<0>(CountAndVT); MVT RegisterVT = std::get<1>(CountAndVT); -unsigned RegisterSize = RegisterVT.getSizeInBits(); +TypeSize RegisterSize = RegisterVT.getSizeInBits(); for (unsigned E = I + RegCount; I != E; ++I) OutVec.push_back(std::make_pair(Regs[I], RegisterSize)); } @@ -5317,7 +5317,7 @@ static SDValue expandDivFix(unsigned Opcode, const SDLoc &DL, // getUnderlyingArgRegs - Find underlying registers used for a truncated, // bitcasted, or split argument. Returns a list of static void -getUnderlyingArgRegs(SmallVectorImpl> &Regs, +getUnderlyingArgRegs(SmallVectorImpl> &Regs, const SDValue &N) { switch (N.getOpcode()) { case ISD::CopyFromReg: { @@ -5428,7 +5428,7 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( if (FI != std::numeric_limits::max()) Op = MachineOperand::CreateFI(FI); - SmallVector, 8> ArgRegsAndSizes; + SmallVector, 8> ArgRegsAndSizes; if (!Op && N.getNode()) { getUnderlyingArgRegs(ArgRegsAndSizes, N); Register Reg; @@ -5458,8 +5458,8 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( if (!Op) { // Create a DBG_VALUE for each decomposed value in ArgRegs to cover Reg -auto splitMultiRegDbgValue - = [&](ArrayRef> SplitRegs) { +auto splitMultiRegDbgValue = [&](ArrayRef> + SplitRegs) { unsigned Offset = 0; for (auto RegAndSize : SplitRegs) { // If the expression is already a fragment, the current register diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 89b5de0a9f21..bf2023674342 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -895,7 +895,7 @@ struct RegsForValue { } /// Return a list of registers and their sizes. - SmallVector, 4> getRegsAndSizes() const; + SmallVector, 4> getRegsAndSizes() const; }; } // end namespace llvm diff --git a/llvm/test/CodeGen/AArch64/sdag-no-typesize-warnings-regandsizes.ll b/llvm/test/CodeGen/AArch64/sdag-no-typesize-warnings-regandsizes.ll new file mode 100644 index ..5a519bea2a70 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sdag-no-typesize-warnings-regandsizes.ll @@ -0,0 +1,30 @@ +; RUN: llc -mtriple=aarch64-unknown-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s +; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t + +; If this check fails please read +; clang/test/CodeGen/aarch64-sve-intrinsics/README for instructions on +; how to resolve it. + +; WARN-NOT: warning + +; CHECK-LABEL: do_something: +define @do_something( %vx) { +entry: + call void @llvm.dbg.value(metadata %vx, metadata !3, metadata !DIExpression()), !dbg !5 + %0 = tail call @f( %vx) + ret %0 +} + +declare @f() + +declare void @llvm.dbg.value(metadata, metadata, metadata) + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!2} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1) +!1 = !DIFile(filename: "file.c", directory: "/") +!2 = !{i32 2, !"Debug Info Version", i32 3} +!3 = !DILocalVariable(scope: !4) +!4 = distinct !DISubprogram(unit: !0) +!5 = !DILocation(scope: !4) __
[llvm-branch-commits] [llvm] 78c0ea5 - [DAGCombine] Fix TypeSize warning in DAGCombine::visitLIFETIME_END
Author: Joe Ellis Date: 2020-12-03T12:12:41Z New Revision: 78c0ea54a22aea5c3ff9030b66d2e1c50ca2c1a3 URL: https://github.com/llvm/llvm-project/commit/78c0ea54a22aea5c3ff9030b66d2e1c50ca2c1a3 DIFF: https://github.com/llvm/llvm-project/commit/78c0ea54a22aea5c3ff9030b66d2e1c50ca2c1a3.diff LOG: [DAGCombine] Fix TypeSize warning in DAGCombine::visitLIFETIME_END Bail out early if we encounter a scalable store. Reviewed By: peterwaller-arm Differential Revision: https://reviews.llvm.org/D92392 Added: llvm/test/CodeGen/AArch64/dag-combine-lifetime-end-store-typesize.ll Modified: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Removed: diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 661e05bdd579..6ce6e1093dc6 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -17607,11 +17607,16 @@ SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) { // TODO: Can relax for unordered atomics (see D66309) if (!ST->isSimple() || ST->isIndexed()) continue; + const TypeSize StoreSize = ST->getMemoryVT().getStoreSize(); + // The bounds of a scalable store are not known until runtime, so this + // store cannot be elided. + if (StoreSize.isScalable()) +continue; const BaseIndexOffset StoreBase = BaseIndexOffset::match(ST, DAG); // If we store purely within object bounds just before its lifetime ends, // we can remove the store. if (LifetimeEndBase.contains(DAG, LifetimeEnd->getSize() * 8, StoreBase, - ST->getMemoryVT().getStoreSizeInBits())) { + StoreSize.getFixedSize() * 8)) { LLVM_DEBUG(dbgs() << "\nRemoving store:"; StoreBase.dump(); dbgs() << "\nwithin LIFETIME_END of : "; LifetimeEndBase.dump(); dbgs() << "\n"); diff --git a/llvm/test/CodeGen/AArch64/dag-combine-lifetime-end-store-typesize.ll b/llvm/test/CodeGen/AArch64/dag-combine-lifetime-end-store-typesize.ll new file mode 100644 index ..fd5b85a57de1 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/dag-combine-lifetime-end-store-typesize.ll @@ -0,0 +1,21 @@ +; RUN: llc -mtriple=aarch64-- < %s 2>&1 | FileCheck --allow-empty %s + +; This regression test is defending against a TypeSize warning 'assumption that TypeSize is not +; scalable'. This warning appeared in DAGCombiner::visitLIFETIME_END when visiting a LIFETIME_END +; node linked to a scalable store. + +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. +; CHECK-NOT: warning: {{.*}}TypeSize is not scalable + +declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) +declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) + +define void @foo(* nocapture dereferenceable(16) %ptr) { +entry: + %tmp = alloca , align 8 + %tmp_ptr = bitcast * %tmp to i8* + call void @llvm.lifetime.start.p0i8(i64 32, i8* %tmp_ptr) + store undef, * %ptr + call void @llvm.lifetime.end.p0i8(i64 32, i8* %tmp_ptr) + ret void +} ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] 80c33de - [SelectionDAG] Add llvm.vector.{extract, insert} intrinsics
Author: Joe Ellis Date: 2020-12-09T11:08:41Z New Revision: 80c33de2d3c59ca357c67b2b2475d27f79dd8a8b URL: https://github.com/llvm/llvm-project/commit/80c33de2d3c59ca357c67b2b2475d27f79dd8a8b DIFF: https://github.com/llvm/llvm-project/commit/80c33de2d3c59ca357c67b2b2475d27f79dd8a8b.diff LOG: [SelectionDAG] Add llvm.vector.{extract,insert} intrinsics This commit adds two new intrinsics. - llvm.experimental.vector.insert: used to insert a vector into another vector starting at a given index. - llvm.experimental.vector.extract: used to extract a subvector from a larger vector starting from a given index. The codegen work for these intrinsics has already been completed; this commit is simply exposing the existing ISD nodes to LLVM IR. Reviewed By: cameron.mcinally Differential Revision: https://reviews.llvm.org/D91362 Added: llvm/test/CodeGen/AArch64/sve-extract-vector.ll llvm/test/CodeGen/AArch64/sve-insert-vector.ll llvm/test/Transforms/InstCombine/canonicalize-vector-extract.ll llvm/test/Transforms/InstCombine/canonicalize-vector-insert.ll llvm/test/Verifier/extract-vector-mismatched-element-types.ll llvm/test/Verifier/insert-vector-mismatched-element-types.ll Modified: llvm/docs/LangRef.rst llvm/include/llvm/IR/Intrinsics.td llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp llvm/lib/IR/Verifier.cpp llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp Removed: diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 6724a4019030..b29eb589e2d7 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -16095,6 +16095,81 @@ Arguments: "" The argument to this intrinsic must be a vector of floating-point values. +'``llvm.experimental.vector.insert``' Intrinsic +^^^ + +Syntax: +""" +This is an overloaded intrinsic. You can use ``llvm.experimental.vector.insert`` +to insert a fixed-width vector into a scalable vector, but not the other way +around. + +:: + + declare @llvm.experimental.vector.insert.v4f32( %vec, <4 x float> %subvec, i64 %idx) + declare @llvm.experimental.vector.insert.v2f64( %vec, <2 x double> %subvec, i64 %idx) + +Overview: +" + +The '``llvm.experimental.vector.insert.*``' intrinsics insert a vector into another vector +starting from a given index. The return type matches the type of the vector we +insert into. Conceptually, this can be used to build a scalable vector out of +non-scalable vectors. + +Arguments: +"" + +The ``vec`` is the vector which ``subvec`` will be inserted into. +The ``subvec`` is the vector that will be inserted. + +``idx`` represents the starting element number at which ``subvec`` will be +inserted. ``idx`` must be a constant multiple of ``subvec``'s known minimum +vector length. If ``subvec`` is a scalable vector, ``idx`` is first scaled by +the runtime scaling factor of ``subvec``. The elements of ``vec`` starting at +``idx`` are overwritten with ``subvec``. Elements ``idx`` through (``idx`` + +num_elements(``subvec``) - 1) must be valid ``vec`` indices. If this condition +cannot be determined statically but is false at runtime, then the result vector +is undefined. + + +'``llvm.experimental.vector.extract``' Intrinsic + + +Syntax: +""" +This is an overloaded intrinsic. You can use +``llvm.experimental.vector.extract`` to extract a fixed-width vector from a +scalable vector, but not the other way around. + +:: + + declare <4 x float> @llvm.experimental.vector.extract.v4f32( %vec, i64 %idx) + declare <2 x double> @llvm.experimental.vector.extract.v2f64( %vec, i64 %idx) + +Overview: +" + +The '``llvm.experimental.vector.extract.*``' intrinsics extract a vector from +within another vector starting from a given index. The return type must be +explicitly specified. Conceptually, this can be used to decompose a scalable +vector into non-scalable parts. + +Arguments: +"" + +The ``vec`` is the vector from which we will extract a subvector. + +The ``idx`` specifies the starting element number within ``vec`` from which a +subvector is extracted. ``idx`` must be a constant multiple of the known-minimum +vector length of the result type. If the result type is a scalable vector, +``idx`` is first scaled by the result type's runtime scaling factor. Elements +``idx`` through (``idx`` + num_elements(result_type) - 1) must be valid vector +indices. If this condition cannot be determined statically but is false at +runtime, then the result vector is undefined. The ``idx`` parameter must be a +vector index constant type (for most targets this will be an integer pointer +type). + Matrix Intrinsics - diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index 710479103459..eb6c408b4f85 100644 --- a/llv
[llvm-branch-commits] [llvm] d863a0d - [SelectionDAG] Implement SplitVecOp_INSERT_SUBVECTOR
Author: Joe Ellis Date: 2020-12-11T11:07:59Z New Revision: d863a0ddebc889af31b8f729103e9d965a40a495 URL: https://github.com/llvm/llvm-project/commit/d863a0ddebc889af31b8f729103e9d965a40a495 DIFF: https://github.com/llvm/llvm-project/commit/d863a0ddebc889af31b8f729103e9d965a40a495.diff LOG: [SelectionDAG] Implement SplitVecOp_INSERT_SUBVECTOR This function is needed for when it is necessary to split the subvector operand of an llvm.experimental.vector.insert call. Splitting the subvector operand means performing two insertions: one inserting the lower part of the split subvector into the destination vector, and another for inserting the upper part. Through experimenting, it seems quite rare to need split the subvector operand, but this is necessary to avoid assertion errors. Differential Revision: https://reviews.llvm.org/D92760 Added: llvm/test/CodeGen/AArch64/split-vector-insert.ll Modified: llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp Removed: diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index fed111f4d64e..aea2e9ba2bd5 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -841,6 +841,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue SplitVecOp_TruncateHelper(SDNode *N); SDValue SplitVecOp_BITCAST(SDNode *N); + SDValue SplitVecOp_INSERT_SUBVECTOR(SDNode *N, unsigned OpNo); SDValue SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N); SDValue SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N); SDValue SplitVecOp_ExtVecInRegOp(SDNode *N); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 86a1f6bff9f7..3c642df7ba11 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -2062,6 +2062,7 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { case ISD::SETCC: Res = SplitVecOp_VSETCC(N); break; case ISD::BITCAST: Res = SplitVecOp_BITCAST(N); break; case ISD::EXTRACT_SUBVECTOR: Res = SplitVecOp_EXTRACT_SUBVECTOR(N); break; + case ISD::INSERT_SUBVECTOR: Res = SplitVecOp_INSERT_SUBVECTOR(N, OpNo); break; case ISD::EXTRACT_VECTOR_ELT:Res = SplitVecOp_EXTRACT_VECTOR_ELT(N); break; case ISD::CONCAT_VECTORS:Res = SplitVecOp_CONCAT_VECTORS(N); break; case ISD::TRUNCATE: @@ -2278,6 +2279,32 @@ SDValue DAGTypeLegalizer::SplitVecOp_BITCAST(SDNode *N) { JoinIntegers(Lo, Hi)); } +SDValue DAGTypeLegalizer::SplitVecOp_INSERT_SUBVECTOR(SDNode *N, + unsigned OpNo) { + assert(OpNo == 1 && "Invalid OpNo; can only split SubVec."); + // We know that the result type is legal. + EVT ResVT = N->getValueType(0); + + SDValue Vec = N->getOperand(0); + SDValue SubVec = N->getOperand(1); + SDValue Idx = N->getOperand(2); + SDLoc dl(N); + + SDValue Lo, Hi; + GetSplitVector(SubVec, Lo, Hi); + + uint64_t IdxVal = cast(Idx)->getZExtValue(); + uint64_t LoElts = Lo.getValueType().getVectorMinNumElements(); + + SDValue FirstInsertion = + DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Vec, Lo, Idx); + SDValue SecondInsertion = + DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, FirstInsertion, Hi, + DAG.getVectorIdxConstant(IdxVal + LoElts, dl)); + + return SecondInsertion; +} + SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N) { // We know that the extracted result type is legal. EVT SubVT = N->getValueType(0); diff --git a/llvm/test/CodeGen/AArch64/split-vector-insert.ll b/llvm/test/CodeGen/AArch64/split-vector-insert.ll new file mode 100644 index ..3fb86ae6b963 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/split-vector-insert.ll @@ -0,0 +1,115 @@ +; RUN: llc < %s -debug-only=legalize-types 2>&1 | FileCheck %s --check-prefix=CHECK-LEGALIZATION +; RUN: llc < %s | FileCheck %s +; REQUIRES: asserts + +target triple = "aarch64-unknown-linux-gnu" +attributes #0 = {"target-features"="+sve"} + +declare @llvm.experimental.vector.insert.nxv2i64.v8i64(, <8 x i64>, i64) +declare @llvm.experimental.vector.insert.nxv2f64.v8f64(, <8 x double>, i64) + +define @test_nxv2i64_v8i64( %a, <8 x i64> %b) #0 { +; CHECK-LEGALIZATION: Legally typed node: [[T1:t[0-9]+]]: nxv2i64 = insert_subvector {{t[0-9]+}}, {{t[0-9]+}}, Constant:i64<0> +; CHECK-LEGALIZATION: Legally typed node: [[T2:t[0-9]+]]: nxv2i64 = insert_subvector [[T1]], {{t[0-9]+}}, Constant:i64<2> +; CHECK-LEGALIZATION: Legally typed node: [[T3:t[0-9]+]]: nxv2i64 = insert_subvector [[T2]], {{t[0-9]+}}, Constant:i64<4> +; CHECK-LEGALIZATION: Legally typed node: [[T4:t[0-9]+]]: nxv2i64 = insert_subvector [[T3]], {{t[0-9]+}}, Constant:i64