[clang] 5a2a836 - [AArch64][NEON] Remove undocumented vceqz{, q}_p16, vml{a,s}q_n_f64 intrinsics
Author: Joe Ellis Date: 2020-12-15T17:19:16Z New Revision: 5a2a8369e82cea9689b0ff60f3e9baa7fc79fbcf URL: https://github.com/llvm/llvm-project/commit/5a2a8369e82cea9689b0ff60f3e9baa7fc79fbcf DIFF: https://github.com/llvm/llvm-project/commit/5a2a8369e82cea9689b0ff60f3e9baa7fc79fbcf.diff LOG: [AArch64][NEON] Remove undocumented vceqz{,q}_p16, vml{a,s}q_n_f64 intrinsics Prior to this patch, Clang supported the following C/C++ intrinsics: vceqz_p16 vceqzq_p16 vmlaq_n_f64 vmlsq_n_f64 ... exposed through arm_neon.h. However, these intrinsics are not part of the ACLE, allowing developers to write code that is not compatible with other toolchains. This patch removes these intrinsics. There is a bug report capturing this issue here: https://bugs.llvm.org/show_bug.cgi?id=47471 Reviewed By: bsmith Differential Revision: https://reviews.llvm.org/D93206 Added: Modified: clang/include/clang/Basic/arm_neon.td clang/test/CodeGen/aarch64-neon-fma.c clang/test/CodeGen/aarch64-neon-misc.c Removed: diff --git a/clang/include/clang/Basic/arm_neon.td b/clang/include/clang/Basic/arm_neon.td index 4d4e42dd514b..6f1380d58c16 100644 --- a/clang/include/clang/Basic/arm_neon.td +++ b/clang/include/clang/Basic/arm_neon.td @@ -786,9 +786,6 @@ def VMUL_N_A64 : IOpInst<"vmul_n", "..1", "Qd", OP_MUL_N>; def FMLA_N : SOpInst<"vfma_n", "...1", "dQd", OP_FMLA_N>; def FMLS_N : SOpInst<"vfms_n", "...1", "fdQfQd", OP_FMLS_N>; -def MLA_N : SOpInst<"vmla_n", "...1", "Qd", OP_MLA_N>; -def MLS_N : SOpInst<"vmls_n", "...1", "Qd", OP_MLS_N>; - // Logical operations def BSL : SInst<"vbsl", ".U..", "dPlQdQPl">; @@ -868,7 +865,7 @@ def CFMGT : SOpInst<"vcgt", "U..", "lUldQdQlQUl", OP_GT>; def CFMLT : SOpInst<"vclt", "U..", "lUldQdQlQUl", OP_LT>; def CMEQ : SInst<"vceqz", "U.", - "csilfUcUsUiUlPcPsPlQcQsQiQlQfQUcQUsQUiQUlQPcQPsdQdQPl">; + "csilfUcUsUiUlPcPlQcQsQiQlQfQUcQUsQUiQUlQPcdQdQPl">; def CMGE : SInst<"vcgez", "U.", "csilfdQcQsQiQlQfQd">; def CMLE : SInst<"vclez", "U.", "csilfdQcQsQiQlQfQd">; def CMGT : SInst<"vcgtz", "U.", "csilfdQcQsQiQlQfQd">; diff --git a/clang/test/CodeGen/aarch64-neon-fma.c b/clang/test/CodeGen/aarch64-neon-fma.c index c2dd315ed9fc..0726218d2e15 100644 --- a/clang/test/CodeGen/aarch64-neon-fma.c +++ b/clang/test/CodeGen/aarch64-neon-fma.c @@ -26,16 +26,6 @@ float32x4_t test_vmlaq_n_f32(float32x4_t a, float32x4_t b, float32_t c) { return vmlaq_n_f32(a, b, c); } -// CHECK-LABEL: define <2 x double> @test_vmlaq_n_f64(<2 x double> %a, <2 x double> %b, double %c) #1 { -// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x double> undef, double %c, i32 0 -// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x double> [[VECINIT_I]], double %c, i32 1 -// CHECK: [[MUL_I:%.*]] = fmul <2 x double> %b, [[VECINIT1_I]] -// CHECK: [[ADD_I:%.*]] = fadd <2 x double> %a, [[MUL_I]] -// CHECK: ret <2 x double> [[ADD_I]] -float64x2_t test_vmlaq_n_f64(float64x2_t a, float64x2_t b, float64_t c) { - return vmlaq_n_f64(a, b, c); -} - // CHECK-LABEL: define <4 x float> @test_vmlsq_n_f32(<4 x float> %a, <4 x float> %b, float %c) #1 { // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %c, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %c, i32 1 @@ -58,16 +48,6 @@ float32x2_t test_vmls_n_f32(float32x2_t a, float32x2_t b, float32_t c) { return vmls_n_f32(a, b, c); } -// CHECK-LABEL: define <2 x double> @test_vmlsq_n_f64(<2 x double> %a, <2 x double> %b, double %c) #1 { -// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x double> undef, double %c, i32 0 -// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x double> [[VECINIT_I]], double %c, i32 1 -// CHECK: [[MUL_I:%.*]] = fmul <2 x double> %b, [[VECINIT1_I]] -// CHECK: [[SUB_I:%.*]] = fsub <2 x double> %a, [[MUL_I]] -// CHECK: ret <2 x double> [[SUB_I]] -float64x2_t test_vmlsq_n_f64(float64x2_t a, float64x2_t b, float64_t c) { - return vmlsq_n_f64(a, b, c); -} - // CHECK-LABEL: define <2 x float> @test_vmla_lane_f32_0(<2 x float> %a, <2 x float> %b, <2 x float> %v) #0 { // CHECK:[[TMP0:%.*]] = bitcast <2 x float> [[V:%.*]] to <8 x i8> // CHECK:[[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> diff --git a/clang/test/CodeGen/aarch64-neon-misc.c b/clang/test/CodeGen/aarch64-neon-misc.c index 88020a1a69c2..5517fe3dc411 100644 --- a/clang/test/CodeGen/aarch64-neon-misc.c +++ b/clang/test/CodeGen/aarch64-neon-misc.c @@ -198,24 +198,6 @@ uint8x16_t test_vceqzq_p8(poly8x16_t a) { return vceqzq_p8(a); } -// CHECK-LABEL: @test_vceqz_p16( -// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = icmp eq <4 x i16> %a, zeroinitializer -// CHECK: [[VCEQZ_I:%.*]] = sext <4 x i1> [[TMP1]] to <4
[clang] dad07ba - [clang][AArch64][SVE] Avoid going through memory for VLAT <-> VLST casts
Author: Joe Ellis Date: 2020-12-16T12:24:32Z New Revision: dad07baf123e672b1d5d5e7c21e73b92399d5a0c URL: https://github.com/llvm/llvm-project/commit/dad07baf123e672b1d5d5e7c21e73b92399d5a0c DIFF: https://github.com/llvm/llvm-project/commit/dad07baf123e672b1d5d5e7c21e73b92399d5a0c.diff LOG: [clang][AArch64][SVE] Avoid going through memory for VLAT <-> VLST casts This change makes use of the llvm.vector.extract intrinsic to avoid going through memory when performing bitcasts between vector-length agnostic types and vector-length specific types. Depends on D91362 Reviewed By: c-rhodes Differential Revision: https://reviews.llvm.org/D92761 Added: Modified: clang/lib/CodeGen/CGExprScalar.cpp clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c clang/test/CodeGen/attr-arm-sve-vector-bits-bitcast.c clang/test/CodeGen/attr-arm-sve-vector-bits-call.c clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c clang/test/CodeGen/attr-arm-sve-vector-bits-codegen.c clang/test/CodeGen/attr-arm-sve-vector-bits-globals.c llvm/include/llvm/IR/IRBuilder.h Removed: diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp index 973cefd831e6..c9cf1d0dfd89 100644 --- a/clang/lib/CodeGen/CGExprScalar.cpp +++ b/clang/lib/CodeGen/CGExprScalar.cpp @@ -1996,7 +1996,39 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) { } } +// If Src is a fixed vector and Dst is a scalable vector, and both have the +// same element type, use the llvm.experimental.vector.insert intrinsic to +// perform the bitcast. +if (const auto *FixedSrc = dyn_cast(SrcTy)) { + if (const auto *ScalableDst = dyn_cast(DstTy)) { +if (FixedSrc->getElementType() == ScalableDst->getElementType()) { + llvm::Value *UndefVec = llvm::UndefValue::get(DstTy); + llvm::Value *Zero = llvm::Constant::getNullValue(CGF.CGM.Int64Ty); + return Builder.CreateInsertVector(DstTy, UndefVec, Src, Zero, +"castScalableSve"); +} + } +} + +// If Src is a scalable vector and Dst is a fixed vector, and both have the +// same element type, use the llvm.experimental.vector.extract intrinsic to +// perform the bitcast. +if (const auto *ScalableSrc = dyn_cast(SrcTy)) { + if (const auto *FixedDst = dyn_cast(DstTy)) { +if (ScalableSrc->getElementType() == FixedDst->getElementType()) { + llvm::Value *Zero = llvm::Constant::getNullValue(CGF.CGM.Int64Ty); + return Builder.CreateExtractVector(DstTy, Src, Zero, "castFixedSve"); +} + } +} + // Perform VLAT <-> VLST bitcast through memory. +// TODO: since the llvm.experimental.vector.{insert,extract} intrinsics +// require the element types of the vectors to be the same, we +// need to keep this around for casting between predicates, or more +// generally for bitcasts between VLAT <-> VLST where the element +// types of the vectors are not the same, until we figure out a better +// way of doing these casts. if ((isa(SrcTy) && isa(DstTy)) || (isa(SrcTy) && diff --git a/clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c b/clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c index fed7708c6893..beba6a3f0199 100644 --- a/clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c +++ b/clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c @@ -51,34 +51,22 @@ vec2048 x2048 = {0, 1, 2, 3, 3 , 2 , 1, 0, 0, 1, 2, 3, 3 , 2 , 1, 0, typedef int8_t vec_int8 __attribute__((vector_size(N / 8))); // CHECK128-LABEL: define <16 x i8> @f2(<16 x i8> %x) // CHECK128-NEXT: entry: -// CHECK128-NEXT:%x.addr = alloca <16 x i8>, align 16 -// CHECK128-NEXT:%saved-call-rvalue = alloca , align 16 -// CHECK128-NEXT:store <16 x i8> %x, <16 x i8>* %x.addr, align 16 -// CHECK128-NEXT:%0 = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) -// CHECK128-NEXT:%1 = bitcast <16 x i8>* %x.addr to * -// CHECK128-NEXT:%2 = load , * %1, align 16 -// CHECK128-NEXT:%3 = call @llvm.aarch64.sve.asrd.nxv16i8( %0, %2, i32 1) -// CHECK128-NEXT:store %3, * %saved-call-rvalue, align 16 -// CHECK128-NEXT:%castFixedSve = bitcast * %saved-call-rvalue to <16 x i8>* -// CHECK128-NEXT:%4 = load <16 x i8>, <16 x i8>* %castFixedSve, align 16 -// CHECK128-NEXT:ret <16 x i8> %4 +// CHECK128-NEXT:[[TMP0:%.*]] = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) +// CHECK128-NEXT:[[CASTSCALABLESVE:%.*]] = call @llvm.experimental.vector.insert.nxv16i8.v16i8( undef, <16 x i8> [[X:%.*]], i64 0) +// CHECK128-NEXT:[[TMP1:%.*]] = call @llvm.aarch64.sve.asrd.nxv16i8( [[TMP0]], [[CASTSCALABLESVE]], i32 1) +// CHECK128-NEXT:
[clang] 23a96b8 - [AArch64][SVE] Support implicit lax vector conversions for SVE types
Author: Joe Ellis Date: 2020-11-17T14:50:17Z New Revision: 23a96b84a8d985b686a4e06dec1f7aebc0cca6c6 URL: https://github.com/llvm/llvm-project/commit/23a96b84a8d985b686a4e06dec1f7aebc0cca6c6 DIFF: https://github.com/llvm/llvm-project/commit/23a96b84a8d985b686a4e06dec1f7aebc0cca6c6.diff LOG: [AArch64][SVE] Support implicit lax vector conversions for SVE types Lax vector conversions was behaving incorrectly for implicit casts between scalable and fixed-length vector types. For example, this: #include #define N __ARM_FEATURE_SVE_BITS #define FIXED_ATTR __attribute__((arm_sve_vector_bits(N))) typedef svfloat32_t fixed_float32_t FIXED_ATTR; void allowed_depending() { fixed_float32_t fs32; svfloat64_t s64; fs32 = s64; } ... would fail because the vectors have differing lane sizes. This patch implements the correct behaviour for -flax-vector-conversions={none,all,integer}. Specifically: - -flax-vector-conversions=none prevents all lax vector conversions between scalable and fixed-sized vectors. - -flax-vector-conversions=integer allows lax vector conversions between scalable and fixed-size vectors whose element types are integers. - -flax-vector-conversions=all allows all lax vector conversions between scalable and fixed-size vectors (including those with floating point element types). The implicit conversions are implemented as bitcasts. Reviewed By: fpetrogalli Differential Revision: https://reviews.llvm.org/D91067 Added: clang/test/Sema/aarch64-sve-lax-vector-conversions.c clang/test/SemaCXX/aarch64-sve-lax-vector-conversions.cpp Modified: clang/include/clang/AST/ASTContext.h clang/lib/AST/ASTContext.cpp clang/lib/Sema/SemaExpr.cpp clang/lib/Sema/SemaOverload.cpp clang/test/Sema/attr-arm-sve-vector-bits.c Removed: diff --git a/clang/include/clang/AST/ASTContext.h b/clang/include/clang/AST/ASTContext.h index 9fc9c924b51c..8c0930237583 100644 --- a/clang/include/clang/AST/ASTContext.h +++ b/clang/include/clang/AST/ASTContext.h @@ -2088,6 +2088,10 @@ class ASTContext : public RefCountedBase { /// vector-length. bool areCompatibleSveTypes(QualType FirstType, QualType SecondType); + /// Return true if the given vector types are lax-compatible SVE vector types, + /// false otherwise. + bool areLaxCompatibleSveTypes(QualType FirstType, QualType SecondType); + /// Return true if the type has been explicitly qualified with ObjC ownership. /// A type may be implicitly qualified with ownership under ObjC ARC, and in /// some cases the compiler treats these diff erently. diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index d63f299f021f..836065291fea 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -8574,6 +8574,41 @@ bool ASTContext::areCompatibleSveTypes(QualType FirstType, IsValidCast(SecondType, FirstType); } +bool ASTContext::areLaxCompatibleSveTypes(QualType FirstType, + QualType SecondType) { + assert(((FirstType->isSizelessBuiltinType() && SecondType->isVectorType()) || + (FirstType->isVectorType() && SecondType->isSizelessBuiltinType())) && + "Expected SVE builtin type and vector type!"); + + auto IsLaxCompatible = [this](QualType FirstType, QualType SecondType) { +if (!FirstType->getAs()) + return false; + +const auto *VecTy = SecondType->getAs(); +if (VecTy && +VecTy->getVectorKind() == VectorType::SveFixedLengthDataVector) { + const LangOptions::LaxVectorConversionKind LVCKind = + getLangOpts().getLaxVectorConversions(); + + // If -flax-vector-conversions=all is specified, the types are + // certainly compatible. + if (LVCKind == LangOptions::LaxVectorConversionKind::All) +return true; + + // If -flax-vector-conversions=integer is specified, the types are + // compatible if the elements are integer types. + if (LVCKind == LangOptions::LaxVectorConversionKind::Integer) +return VecTy->getElementType().getCanonicalType()->isIntegerType() && + FirstType->getSveEltType(*this)->isIntegerType(); +} + +return false; + }; + + return IsLaxCompatible(FirstType, SecondType) || + IsLaxCompatible(SecondType, FirstType); +} + bool ASTContext::hasDirectOwnershipQualifier(QualType Ty) const { while (true) { // __strong id diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index 0a25720c0f7b..e1a87ede3bdd 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -9003,12 +9003,13 @@ Sema::CheckAssignmentConstraints(QualType LHSType, ExprResult &RHS, } // Allow assignments between fixed-length and sizeless SVE vectors. -if (((LHSType->isSizelessBuiltinType() && RHSType->isVectorType()) || - (LHSType-
[clang] 1d7abcf - [AArch64][SVE] Add tests for VLST -> VLAT lax conversions
Author: Joe Ellis Date: 2020-11-17T18:03:42Z New Revision: 1d7abcf99e3d8f9ac7e24be5758da7cfef656400 URL: https://github.com/llvm/llvm-project/commit/1d7abcf99e3d8f9ac7e24be5758da7cfef656400 DIFF: https://github.com/llvm/llvm-project/commit/1d7abcf99e3d8f9ac7e24be5758da7cfef656400.diff LOG: [AArch64][SVE] Add tests for VLST -> VLAT lax conversions These were previously missing from the SVE lax conversions tests introduced in this commit: 23a96b84a8d985b686a4e06dec1f7aebc0cca6c6 (https://reviews.llvm.org/D91067) Differential Revision: https://reviews.llvm.org/D91642 Added: Modified: clang/test/Sema/aarch64-sve-lax-vector-conversions.c clang/test/SemaCXX/aarch64-sve-lax-vector-conversions.cpp Removed: diff --git a/clang/test/Sema/aarch64-sve-lax-vector-conversions.c b/clang/test/Sema/aarch64-sve-lax-vector-conversions.c index 68393275e54f..e2fe87f7dd20 100644 --- a/clang/test/Sema/aarch64-sve-lax-vector-conversions.c +++ b/clang/test/Sema/aarch64-sve-lax-vector-conversions.c @@ -20,6 +20,8 @@ void allowed_with_integer_lax_conversions() { // -flax-vector-conversions={integer,all}. fi32 = si64; // lax-vector-none-error@-1 {{assigning to 'fixed_int32_t' (vector of 16 'int' values) from incompatible type}} + si64 = fi32; + // lax-vector-none-error@-1 {{assigning to 'svint64_t' (aka '__SVInt64_t') from incompatible type}} } void allowed_with_all_lax_conversions() { @@ -31,4 +33,7 @@ void allowed_with_all_lax_conversions() { ff32 = sf64; // lax-vector-none-error@-1 {{assigning to 'fixed_float32_t' (vector of 16 'float' values) from incompatible type}} // lax-vector-integer-error@-2 {{assigning to 'fixed_float32_t' (vector of 16 'float' values) from incompatible type}} + sf64 = ff32; + // lax-vector-none-error@-1 {{assigning to 'svfloat64_t' (aka '__SVFloat64_t') from incompatible type}} + // lax-vector-integer-error@-2 {{assigning to 'svfloat64_t' (aka '__SVFloat64_t') from incompatible type}} } diff --git a/clang/test/SemaCXX/aarch64-sve-lax-vector-conversions.cpp b/clang/test/SemaCXX/aarch64-sve-lax-vector-conversions.cpp index 68393275e54f..e2fe87f7dd20 100644 --- a/clang/test/SemaCXX/aarch64-sve-lax-vector-conversions.cpp +++ b/clang/test/SemaCXX/aarch64-sve-lax-vector-conversions.cpp @@ -20,6 +20,8 @@ void allowed_with_integer_lax_conversions() { // -flax-vector-conversions={integer,all}. fi32 = si64; // lax-vector-none-error@-1 {{assigning to 'fixed_int32_t' (vector of 16 'int' values) from incompatible type}} + si64 = fi32; + // lax-vector-none-error@-1 {{assigning to 'svint64_t' (aka '__SVInt64_t') from incompatible type}} } void allowed_with_all_lax_conversions() { @@ -31,4 +33,7 @@ void allowed_with_all_lax_conversions() { ff32 = sf64; // lax-vector-none-error@-1 {{assigning to 'fixed_float32_t' (vector of 16 'float' values) from incompatible type}} // lax-vector-integer-error@-2 {{assigning to 'fixed_float32_t' (vector of 16 'float' values) from incompatible type}} + sf64 = ff32; + // lax-vector-none-error@-1 {{assigning to 'svfloat64_t' (aka '__SVFloat64_t') from incompatible type}} + // lax-vector-integer-error@-2 {{assigning to 'svfloat64_t' (aka '__SVFloat64_t') from incompatible type}} } ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] 1e2da38 - [AArch64][SVE] Allow C-style casts between fixed-size and scalable vectors
Author: Joe Ellis Date: 2020-11-19T11:18:35Z New Revision: 1e2da3839cc3543629ecb847fd3aa34edb64b42a URL: https://github.com/llvm/llvm-project/commit/1e2da3839cc3543629ecb847fd3aa34edb64b42a DIFF: https://github.com/llvm/llvm-project/commit/1e2da3839cc3543629ecb847fd3aa34edb64b42a.diff LOG: [AArch64][SVE] Allow C-style casts between fixed-size and scalable vectors This patch allows C-style casting between fixed-size and scalable vectors. This kind of cast was previously blocked by the compiler, but it should be allowed. Differential Revision: https://reviews.llvm.org/D91262 Added: clang/test/Sema/aarch64-sve-explicit-casts-fixed-size.c clang/test/SemaCXX/aarch64-sve-explicit-casts-fixed-size.cpp Modified: clang/include/clang/Sema/Sema.h clang/lib/Sema/SemaCast.cpp clang/lib/Sema/SemaExpr.cpp Removed: diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index fb95b8cbd193..20c7e8c2ed1b 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -11501,6 +11501,8 @@ class Sema final { QualType CheckMatrixMultiplyOperands(ExprResult &LHS, ExprResult &RHS, SourceLocation Loc, bool IsCompAssign); + bool isValidSveBitcast(QualType srcType, QualType destType); + bool areLaxCompatibleVectorTypes(QualType srcType, QualType destType); bool isLaxVectorConversion(QualType srcType, QualType destType); diff --git a/clang/lib/Sema/SemaCast.cpp b/clang/lib/Sema/SemaCast.cpp index 0bd240585bd7..671820afd485 100644 --- a/clang/lib/Sema/SemaCast.cpp +++ b/clang/lib/Sema/SemaCast.cpp @@ -2219,6 +2219,12 @@ static TryCastResult TryReinterpretCast(Sema &Self, ExprResult &SrcExpr, bool destIsVector = DestType->isVectorType(); bool srcIsVector = SrcType->isVectorType(); if (srcIsVector || destIsVector) { +// Allow bitcasting between SVE VLATs and VLSTs, and vice-versa. +if (Self.isValidSveBitcast(SrcType, DestType)) { + Kind = CK_BitCast; + return TC_Success; +} + // The non-vector type, if any, must have integral type. This is // the same rule that C vector casts use; note, however, that enum // types are not integral in C++. @@ -2752,6 +2758,13 @@ void CastOperation::CheckCStyleCast() { return; } + // Allow bitcasting between compatible SVE vector types. + if ((SrcType->isVectorType() || DestType->isVectorType()) && + Self.isValidSveBitcast(SrcType, DestType)) { +Kind = CK_BitCast; +return; + } + if (!DestType->isScalarType() && !DestType->isVectorType()) { const RecordType *DestRecordTy = DestType->getAs(); diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index e1a87ede3bdd..7a8124fadfd7 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -7197,6 +7197,28 @@ static bool breakDownVectorType(QualType type, uint64_t &len, return true; } +/// Are the two types SVE-bitcast-compatible types? I.e. is bitcasting from the +/// first SVE type (e.g. an SVE VLAT) to the second type (e.g. an SVE VLST) +/// allowed? +/// +/// This will also return false if the two given types do not make sense from +/// the perspective of SVE bitcasts. +bool Sema::isValidSveBitcast(QualType srcTy, QualType destTy) { + assert(srcTy->isVectorType() || destTy->isVectorType()); + + auto ValidScalableConversion = [](QualType FirstType, QualType SecondType) { +if (!FirstType->isSizelessBuiltinType()) + return false; + +const auto *VecTy = SecondType->getAs(); +return VecTy && + VecTy->getVectorKind() == VectorType::SveFixedLengthDataVector; + }; + + return ValidScalableConversion(srcTy, destTy) || + ValidScalableConversion(destTy, srcTy); +} + /// Are the two types lax-compatible vector types? That is, given /// that one of them is a vector, do they have equal storage sizes, /// where the storage size is the number of elements times the element diff --git a/clang/test/Sema/aarch64-sve-explicit-casts-fixed-size.c b/clang/test/Sema/aarch64-sve-explicit-casts-fixed-size.c new file mode 100644 index ..a93110db7cce --- /dev/null +++ b/clang/test/Sema/aarch64-sve-explicit-casts-fixed-size.c @@ -0,0 +1,49 @@ +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -msve-vector-bits=128 -flax-vector-conversions=none -fallow-half-arguments-and-returns -ffreestanding -fsyntax-only -verify %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -msve-vector-bits=256 -flax-vector-conversions=none -fallow-half-arguments-and-returns -ffreestanding -fsyntax-only -verify %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -msve-vector-bits=512 -flax-vector-conversions=none -fallow-half-arguments-and-returns -ffreestanding -fsyntax-only -verify %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu
[clang] 3c696a2 - [AArch64][SVE] Allow lax conversion between VLATs and GNU vectors
Author: Joe Ellis Date: 2020-11-23T10:47:17Z New Revision: 3c696a212ba4328e4f8f92136bc4d728a6490ef7 URL: https://github.com/llvm/llvm-project/commit/3c696a212ba4328e4f8f92136bc4d728a6490ef7 DIFF: https://github.com/llvm/llvm-project/commit/3c696a212ba4328e4f8f92136bc4d728a6490ef7.diff LOG: [AArch64][SVE] Allow lax conversion between VLATs and GNU vectors Previously, lax conversions were only allowed between SVE vector-length agnostic types and vector-length specific types. This meant that code such as the following: #include #define N __ARM_FEATURE_SVE_BITS #define FIXED_ATTR __attribute__ ((vector_size (N/8))) typedef float fixed_float32_t FIXED_ATTR; void foo() { fixed_float32_t fs32; svfloat64_t s64; fs32 = s64; } was not allowed. This patch makes a minor change to areLaxCompatibleSveTypes to allow for lax conversions to be performed between SVE vector-length agnostic types and GNU vectors. Differential Revision: https://reviews.llvm.org/D91696 Added: Modified: clang/lib/AST/ASTContext.cpp clang/test/Sema/aarch64-sve-lax-vector-conversions.c clang/test/Sema/attr-arm-sve-vector-bits.c clang/test/SemaCXX/aarch64-sve-lax-vector-conversions.cpp Removed: diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index f54916babed7..67ee8c0956d6 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -8586,10 +8586,20 @@ bool ASTContext::areLaxCompatibleSveTypes(QualType FirstType, const auto *VecTy = SecondType->getAs(); if (VecTy && -VecTy->getVectorKind() == VectorType::SveFixedLengthDataVector) { +(VecTy->getVectorKind() == VectorType::SveFixedLengthDataVector || + VecTy->getVectorKind() == VectorType::GenericVector)) { const LangOptions::LaxVectorConversionKind LVCKind = getLangOpts().getLaxVectorConversions(); + // If __ARM_FEATURE_SVE_BITS != N do not allow GNU vector lax conversion. + // "Whenever __ARM_FEATURE_SVE_BITS==N, GNUT implicitly + // converts to VLAT and VLAT implicitly converts to GNUT." + // ACLE Spec Version 00bet6, 3.7.3.2. Behavior common to vectors and + // predicates. + if (VecTy->getVectorKind() == VectorType::GenericVector && + getTypeSize(SecondType) != getLangOpts().ArmSveVectorBits) +return false; + // If -flax-vector-conversions=all is specified, the types are // certainly compatible. if (LVCKind == LangOptions::LaxVectorConversionKind::All) diff --git a/clang/test/Sema/aarch64-sve-lax-vector-conversions.c b/clang/test/Sema/aarch64-sve-lax-vector-conversions.c index e2fe87f7dd20..1a1addcf1c1b 100644 --- a/clang/test/Sema/aarch64-sve-lax-vector-conversions.c +++ b/clang/test/Sema/aarch64-sve-lax-vector-conversions.c @@ -7,32 +7,61 @@ #include #define N __ARM_FEATURE_SVE_BITS -#define FIXED_ATTR __attribute__((arm_sve_vector_bits(N))) +#define SVE_FIXED_ATTR __attribute__((arm_sve_vector_bits(N))) +#define GNU_FIXED_ATTR __attribute__((vector_size(N / 8))) -typedef svfloat32_t fixed_float32_t FIXED_ATTR; -typedef svint32_t fixed_int32_t FIXED_ATTR; +typedef svfloat32_t sve_fixed_float32_t SVE_FIXED_ATTR; +typedef svint32_t sve_fixed_int32_t SVE_FIXED_ATTR; +typedef float gnu_fixed_float32_t GNU_FIXED_ATTR; +typedef int gnu_fixed_int32_t GNU_FIXED_ATTR; -void allowed_with_integer_lax_conversions() { - fixed_int32_t fi32; +void sve_allowed_with_integer_lax_conversions() { + sve_fixed_int32_t fi32; svint64_t si64; // The implicit cast here should fail if -flax-vector-conversions=none, but pass if // -flax-vector-conversions={integer,all}. fi32 = si64; - // lax-vector-none-error@-1 {{assigning to 'fixed_int32_t' (vector of 16 'int' values) from incompatible type}} + // lax-vector-none-error@-1 {{assigning to 'sve_fixed_int32_t' (vector of 16 'int' values) from incompatible type}} si64 = fi32; // lax-vector-none-error@-1 {{assigning to 'svint64_t' (aka '__SVInt64_t') from incompatible type}} } -void allowed_with_all_lax_conversions() { - fixed_float32_t ff32; +void sve_allowed_with_all_lax_conversions() { + sve_fixed_float32_t ff32; svfloat64_t sf64; // The implicit cast here should fail if -flax-vector-conversions={none,integer}, but pass if // -flax-vector-conversions=all. ff32 = sf64; - // lax-vector-none-error@-1 {{assigning to 'fixed_float32_t' (vector of 16 'float' values) from incompatible type}} - // lax-vector-integer-error@-2 {{assigning to 'fixed_float32_t' (vector of 16 'float' values) from incompatible type}} + // lax-vector-none-error@-1 {{assigning to 'sve_fixed_float32_t' (vector of 16 'float' values) from incompatible type}} + // lax-vector-integer-error@-2 {{assigning to 'sve_fixed_float32_t' (vector of 16 'float' values) from incompatible type}} + sf64 = ff32; + //
[clang] 2ed7db0 - [InstSimplify] Remove redundant {insert,extract}_vector intrinsic chains
Author: Joe Ellis Date: 2021-05-13T16:09:50Z New Revision: 2ed7db0d206b6af2fffa4cb2704264b76ca61266 URL: https://github.com/llvm/llvm-project/commit/2ed7db0d206b6af2fffa4cb2704264b76ca61266 DIFF: https://github.com/llvm/llvm-project/commit/2ed7db0d206b6af2fffa4cb2704264b76ca61266.diff LOG: [InstSimplify] Remove redundant {insert,extract}_vector intrinsic chains This commit removes some redundant {insert,extract}_vector intrinsic chains by implementing the following patterns as instsimplifies: (insert_vector _, (extract_vector X, 0), 0) -> X (extract_vector (insert_vector _, X, 0), 0) -> X Reviewed By: peterwaller-arm Differential Revision: https://reviews.llvm.org/D101986 Added: llvm/test/Transforms/InstSimplify/extract-vector.ll llvm/test/Transforms/InstSimplify/insert-vector.ll Modified: clang/test/CodeGen/attr-arm-sve-vector-bits-call.c clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c llvm/lib/Analysis/InstructionSimplify.cpp Removed: diff --git a/clang/test/CodeGen/attr-arm-sve-vector-bits-call.c b/clang/test/CodeGen/attr-arm-sve-vector-bits-call.c index f988d54bacd4f..edc307745a2aa 100644 --- a/clang/test/CodeGen/attr-arm-sve-vector-bits-call.c +++ b/clang/test/CodeGen/attr-arm-sve-vector-bits-call.c @@ -24,11 +24,7 @@ svint32_t sizeless_callee(svint32_t x) { // CHECK-LABEL: @fixed_caller( // CHECK-NEXT: entry: -// CHECK-NEXT:[[X:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32( [[X_COERCE:%.*]], i64 0) -// CHECK-NEXT:[[CASTSCALABLESVE:%.*]] = call @llvm.experimental.vector.insert.nxv4i32.v16i32( undef, <16 x i32> [[X]], i64 0) -// CHECK-NEXT:[[CASTFIXEDSVE:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32( [[CASTSCALABLESVE]], i64 0) -// CHECK-NEXT:[[CASTSCALABLESVE1:%.*]] = call @llvm.experimental.vector.insert.nxv4i32.v16i32( undef, <16 x i32> [[CASTFIXEDSVE]], i64 0) -// CHECK-NEXT:ret [[CASTSCALABLESVE1]] +// CHECK-NEXT:ret [[X_COERCE:%.*]] // fixed_int32_t fixed_caller(fixed_int32_t x) { return sizeless_callee(x); @@ -36,9 +32,7 @@ fixed_int32_t fixed_caller(fixed_int32_t x) { // CHECK-LABEL: @fixed_callee( // CHECK-NEXT: entry: -// CHECK-NEXT:[[X:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32( [[X_COERCE:%.*]], i64 0) -// CHECK-NEXT:[[CASTSCALABLESVE:%.*]] = call @llvm.experimental.vector.insert.nxv4i32.v16i32( undef, <16 x i32> [[X]], i64 0) -// CHECK-NEXT:ret [[CASTSCALABLESVE]] +// CHECK-NEXT:ret [[X_COERCE:%.*]] // fixed_int32_t fixed_callee(fixed_int32_t x) { return x; @@ -47,12 +41,9 @@ fixed_int32_t fixed_callee(fixed_int32_t x) { // CHECK-LABEL: @sizeless_caller( // CHECK-NEXT: entry: // CHECK-NEXT:[[COERCE1:%.*]] = alloca <16 x i32>, align 16 -// CHECK-NEXT:[[CASTFIXEDSVE:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32( [[X:%.*]], i64 0) -// CHECK-NEXT:[[CASTSCALABLESVE:%.*]] = call @llvm.experimental.vector.insert.nxv4i32.v16i32( undef, <16 x i32> [[CASTFIXEDSVE]], i64 0) -// CHECK-NEXT:[[CALL:%.*]] = call @fixed_callee( [[CASTSCALABLESVE]]) // CHECK-NEXT:[[TMP0:%.*]] = bitcast <16 x i32>* [[COERCE1]] to * -// CHECK-NEXT:store [[CALL]], * [[TMP0]], align 16 -// CHECK-NEXT:[[TMP1:%.*]] = load <16 x i32>, <16 x i32>* [[COERCE1]], align 16, [[TBAA6:!tbaa !.*]] +// CHECK-NEXT:store [[X:%.*]], * [[TMP0]], align 16 +// CHECK-NEXT:[[TMP1:%.*]] = load <16 x i32>, <16 x i32>* [[COERCE1]], align 16, !tbaa [[TBAA6:![0-9]+]] // CHECK-NEXT:[[CASTSCALABLESVE2:%.*]] = call @llvm.experimental.vector.insert.nxv4i32.v16i32( undef, <16 x i32> [[TMP1]], i64 0) // CHECK-NEXT:ret [[CASTSCALABLESVE2]] // @@ -66,15 +57,9 @@ svint32_t sizeless_caller(svint32_t x) { // CHECK-LABEL: @call_int32_ff( // CHECK-NEXT: entry: -// CHECK-NEXT:[[OP1:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32( [[OP1_COERCE:%.*]], i64 0) -// CHECK-NEXT:[[OP2:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32( [[OP2_COERCE:%.*]], i64 0) -// CHECK-NEXT:[[CASTSCALABLESVE:%.*]] = call @llvm.experimental.vector.insert.nxv4i32.v16i32( undef, <16 x i32> [[OP1]], i64 0) -// CHECK-NEXT:[[CASTSCALABLESVE2:%.*]] = call @llvm.experimental.vector.insert.nxv4i32.v16i32( undef, <16 x i32> [[OP2]], i64 0) // CHECK-NEXT:[[TMP0:%.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CHECK-NEXT:[[TMP1:%.*]] = call @llvm.aarch64.sve.sel.nxv4i32( [[TMP0]], [[CASTSCALABLESVE]], [[CASTSCALABLESVE2]]) -// CHECK-NEXT:[[CASTFIXEDSVE:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32( [[TMP1]], i64 0) -// CHECK-NEXT:[[CASTSCALABLESVE3:%.*]] = call @llvm.experimental.vector.insert.nxv4i32.v16i32( undef, <16 x i32> [[CASTFIXEDSVE]], i64 0) -// CH
[clang] 1f2122c - [clang][SVE] Use __inline__ instead of inline in arm_sve.h
Author: Joe Ellis Date: 2021-02-18T17:09:46Z New Revision: 1f2122c9b046a7d6d141f7c42528d8a3e2e66b70 URL: https://github.com/llvm/llvm-project/commit/1f2122c9b046a7d6d141f7c42528d8a3e2e66b70 DIFF: https://github.com/llvm/llvm-project/commit/1f2122c9b046a7d6d141f7c42528d8a3e2e66b70.diff LOG: [clang][SVE] Use __inline__ instead of inline in arm_sve.h The inline keyword is not defined in the C89 standard, so source files that include arm_sve.h will fail compilation if -std=c89 is specified. For consistency with arm_neon.h, we should use __inline__ instead. Reviewed By: paulwalker-arm Differential Revision: https://reviews.llvm.org/D96852 Added: Modified: clang/utils/TableGen/SveEmitter.cpp Removed: diff --git a/clang/utils/TableGen/SveEmitter.cpp b/clang/utils/TableGen/SveEmitter.cpp index 0e69600ef861..fe369e9d9408 100644 --- a/clang/utils/TableGen/SveEmitter.cpp +++ b/clang/utils/TableGen/SveEmitter.cpp @@ -1201,7 +1201,7 @@ void SVEEmitter::createHeader(raw_ostream &OS) { OS << "};\n\n"; OS << "/* Function attributes */\n"; - OS << "#define __aio static inline __attribute__((__always_inline__, " + OS << "#define __aio static __inline__ __attribute__((__always_inline__, " "__nodebug__, __overloadable__))\n\n"; // Add reinterpret functions. ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] 1b1b30c - [clang][SVE] Don't warn on vector to sizeless builtin implicit conversion
Author: Joe Ellis Date: 2021-02-23T13:40:58Z New Revision: 1b1b30cf0f7d9619afb32e16f4a7c007da4ffccf URL: https://github.com/llvm/llvm-project/commit/1b1b30cf0f7d9619afb32e16f4a7c007da4ffccf DIFF: https://github.com/llvm/llvm-project/commit/1b1b30cf0f7d9619afb32e16f4a7c007da4ffccf.diff LOG: [clang][SVE] Don't warn on vector to sizeless builtin implicit conversion This commit prevents warnings from -Wconversion when a clang vector type is implicitly converted to a sizeless builtin type -- for example, when implicitly converting a fixed-predicate to a scalable predicate. The code below: 1#include 2 3#define N __ARM_FEATURE_SVE_BITS 4#define FIXED_ATTR __attribute__((arm_sve_vector_bits (N))) 5typedef svbool_t fixed_svbool_t FIXED_ATTR; 6 7inline fixed_svbool_t foo(fixed_svbool_t p) { 8 return svnot_z(svptrue_b64(), p); 9} would previously raise this warning: warning: implicit conversion turns vector to scalar: \ 'fixed_svbool_t' (vector of 8 'unsigned char' values) to 'svbool_t' \ (aka '__SVBool_t') [-Wconversion] Note that many cases of these implicit conversions were already permitted because many functions inside arm_sve.h are spawned via preprocessor macros, and the call to isInSystemMacro would cover us in this case. This commit fixes the remaining cases. Differential Revision: https://reviews.llvm.org/D97053 Added: Modified: clang/lib/Sema/SemaChecking.cpp clang/test/SemaCXX/attr-arm-sve-vector-bits.cpp Removed: diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 2d3d36f4adad..b41d94361d50 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -12051,7 +12051,16 @@ static void CheckImplicitConversion(Sema &S, Expr *E, QualType T, checkObjCDictionaryLiteral(S, QualType(Target, 0), DictionaryLiteral); // Strip vector types. - if (isa(Source)) { + if (const auto *SourceVT = dyn_cast(Source)) { +if (Target->isVLSTBuiltinType()) { + auto SourceVectorKind = SourceVT->getVectorKind(); + if (SourceVectorKind == VectorType::SveFixedLengthDataVector || + SourceVectorKind == VectorType::SveFixedLengthPredicateVector || + (SourceVectorKind == VectorType::GenericVector && + S.Context.getTypeSize(Source) == S.getLangOpts().ArmSveVectorBits)) +return; +} + if (!isa(Target)) { if (S.SourceMgr.isInSystemMacro(CC)) return; diff --git a/clang/test/SemaCXX/attr-arm-sve-vector-bits.cpp b/clang/test/SemaCXX/attr-arm-sve-vector-bits.cpp index 5e796b7c8995..0437a264f65b 100644 --- a/clang/test/SemaCXX/attr-arm-sve-vector-bits.cpp +++ b/clang/test/SemaCXX/attr-arm-sve-vector-bits.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -ffreestanding -fsyntax-only -verify -std=c++11 -msve-vector-bits=512 -fallow-half-arguments-and-returns %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -ffreestanding -fsyntax-only -verify -std=c++11 -msve-vector-bits=512 -fallow-half-arguments-and-returns -Wconversion %s // expected-no-diagnostics #include ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] 3d5b18a - [clang][AArch64][SVE] Avoid going through memory for coerced VLST arguments
Author: Joe Ellis Date: 2021-01-05T15:18:21Z New Revision: 3d5b18a3fdf47ae2286642131e4a92968dd01c2a URL: https://github.com/llvm/llvm-project/commit/3d5b18a3fdf47ae2286642131e4a92968dd01c2a DIFF: https://github.com/llvm/llvm-project/commit/3d5b18a3fdf47ae2286642131e4a92968dd01c2a.diff LOG: [clang][AArch64][SVE] Avoid going through memory for coerced VLST arguments VLST arguments are coerced to VLATs at the function boundary for consistency with the VLAT ABI. They are then bitcast back to VLSTs in the function prolog. Previously, this conversion is done through memory. With the introduction of the llvm.vector.{insert,extract} intrinsic, we can avoid going through memory here. Depends on D92761 Differential Revision: https://reviews.llvm.org/D92762 Added: Modified: clang/lib/CodeGen/CGCall.cpp clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.cpp clang/test/CodeGen/attr-arm-sve-vector-bits-call.c clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c Removed: diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index e28736bd3d2f..f1987408165b 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -2688,6 +2688,27 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, break; } + // VLST arguments are coerced to VLATs at the function boundary for + // ABI consistency. If this is a VLST that was coerced to + // a VLAT at the function boundary and the types match up, use + // llvm.experimental.vector.extract to convert back to the original + // VLST. + if (auto *VecTyTo = dyn_cast(ConvertType(Ty))) { +auto *Coerced = Fn->getArg(FirstIRArg); +if (auto *VecTyFrom = +dyn_cast(Coerced->getType())) { + if (VecTyFrom->getElementType() == VecTyTo->getElementType()) { +llvm::Value *Zero = llvm::Constant::getNullValue(CGM.Int64Ty); + +assert(NumIRArgs == 1); +Coerced->setName(Arg->getName() + ".coerce"); + ArgVals.push_back(ParamValue::forDirect(Builder.CreateExtractVector( +VecTyTo, Coerced, Zero, "castFixedSve"))); +break; + } +} + } + Address Alloca = CreateMemTemp(Ty, getContext().getDeclAlign(Arg), Arg->getName()); diff --git a/clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c b/clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c index 9b733d21dbbb..9d3206a75f0e 100644 --- a/clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c +++ b/clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c @@ -59,14 +59,14 @@ typedef int8_t vec_int8 __attribute__((vector_size(N / 8))); // CHECK-LABEL: define{{.*}} void @f2( // CHECK-SAME: <[[#div(VBITS,8)]] x i8>* noalias nocapture sret(<[[#div(VBITS,8)]] x i8>) align 16 %agg.result, <[[#div(VBITS,8)]] x i8>* nocapture readonly %0) -// CHECK-NEXT: entry: -// CHECK-NEXT:[[X:%.*]] = load <[[#div(VBITS,8)]] x i8>, <[[#div(VBITS,8)]] x i8>* [[TMP0:%.*]], align 16, [[TBAA6:!tbaa !.*]] -// CHECK-NEXT:[[TMP1:%.*]] = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) -// CHECK-NEXT:[[CASTSCALABLESVE:%.*]] = call @llvm.experimental.vector.insert.nxv16i8.v[[#div(VBITS,8)]]i8( undef, <[[#div(VBITS,8)]] x i8> [[X]], i64 0) -// CHECK-NEXT:[[TMP2:%.*]] = call @llvm.aarch64.sve.asrd.nxv16i8( [[TMP1]], [[CASTSCALABLESVE]], i32 1) -// CHECK-NEXT:[[CASTFIXEDSVE:%.*]] = call <[[#div(VBITS,8)]] x i8> @llvm.experimental.vector.extract.v[[#div(VBITS,8)]]i8.nxv16i8( [[TMP2]], i64 0) -// CHECK-NEXT:store <[[#div(VBITS,8)]] x i8> [[CASTFIXEDSVE]], <[[#div(VBITS,8)]] x i8>* [[AGG_RESULT:%.*]], align 16, [[TBAA6]] -// CHECK-NEXT:ret void +// CHECK-NEXT: entry: +// CHECK-NEXT: [[X:%.*]] = load <[[#div(VBITS,8)]] x i8>, <[[#div(VBITS,8)]] x i8>* [[TMP0:%.*]], align 16, [[TBAA6:!tbaa !.*]] +// CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.experimental.vector.insert.nxv16i8.v[[#div(VBITS,8)]]i8( undef, <[[#div(VBITS,8)]] x i8> [[X]], i64 0) +// CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.asrd.nxv16i8( [[TMP1]], [[CASTSCALABLESVE]], i32 1) +// CHECK-NEXT: [[CASTFIXEDSVE:%.*]] = call <[[#div(VBITS,8)]] x i8> @llvm.experimental.vector.extract.v[[#div(VBITS,8)]]i8.nxv16i8( [[TMP2]], i64 0) +// CHECK-NEXT: store <[[#div(VBITS,8)]] x i8> [[CASTFIXEDSVE]], <[[#div(VBITS,8)]] x i8>* [[AGG_RESULT:%.*]], align 16, [[TBAA6]] +// CHECK-NEXT: ret void vec_int8 f2(vec_int8 x) { return svasrd_x(svptrue_b8(), x, 1); } #endif @@ -78,24 +78,18 @@ void f3(vec1); typedef svint8_t vec2 __attrib
[clang] 8ea72b3 - [clang][AArch64][SVE] Avoid going through memory for coerced VLST return values
Author: Joe Ellis Date: 2021-01-11T12:10:59Z New Revision: 8ea72b388734ce660f861e0dfbe53d203e94876a URL: https://github.com/llvm/llvm-project/commit/8ea72b388734ce660f861e0dfbe53d203e94876a DIFF: https://github.com/llvm/llvm-project/commit/8ea72b388734ce660f861e0dfbe53d203e94876a.diff LOG: [clang][AArch64][SVE] Avoid going through memory for coerced VLST return values VLST return values are coerced to VLATs in the function epilog for consistency with the VLAT ABI. Previously, this coercion was done through memory. It is preferable to use the llvm.experimental.vector.insert intrinsic to avoid going through memory here. Reviewed By: c-rhodes Differential Revision: https://reviews.llvm.org/D94290 Added: Modified: clang/lib/CodeGen/CGCall.cpp clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.cpp clang/test/CodeGen/attr-arm-sve-vector-bits-call.c clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c clang/test/CodeGen/attr-arm-sve-vector-bits-codegen.c Removed: diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index f1987408165b..2cc7203d1194 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -1265,6 +1265,21 @@ static llvm::Value *CreateCoercedLoad(Address Src, llvm::Type *Ty, return CGF.Builder.CreateLoad(Src); } + // If coercing a fixed vector to a scalable vector for ABI compatibility, and + // the types match, use the llvm.experimental.vector.insert intrinsic to + // perform the conversion. + if (auto *ScalableDst = dyn_cast(Ty)) { +if (auto *FixedSrc = dyn_cast(SrcTy)) { + if (ScalableDst->getElementType() == FixedSrc->getElementType()) { +auto *Load = CGF.Builder.CreateLoad(Src); +auto *UndefVec = llvm::UndefValue::get(ScalableDst); +auto *Zero = llvm::Constant::getNullValue(CGF.CGM.Int64Ty); +return CGF.Builder.CreateInsertVector(ScalableDst, UndefVec, Load, Zero, + "castScalableSve"); + } +} + } + // Otherwise do coercion through memory. This is stupid, but simple. Address Tmp = CreateTempAllocaForCoercion(CGF, Ty, Src.getAlignment(), Src.getName()); diff --git a/clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.cpp b/clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.cpp index 6fafc2ca2db9..a808d50884ea 100644 --- a/clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.cpp +++ b/clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.cpp @@ -48,14 +48,11 @@ void test02() { // CHECK-SAME:[[#VBITS]] // CHECK-SAME:EES_( %x.coerce, %y.coerce) // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 16 // CHECK-NEXT: [[X:%.*]] = call <[[#div(VBITS, 32)]] x i32> @llvm.experimental.vector.extract.v[[#div(VBITS, 32)]]i32.nxv4i32( [[X_COERCE:%.*]], i64 0) // CHECK-NEXT: [[Y:%.*]] = call <[[#div(VBITS, 32)]] x i32> @llvm.experimental.vector.extract.v[[#div(VBITS, 32)]]i32.nxv4i32( [[X_COERCE1:%.*]], i64 0) // CHECK-NEXT: [[ADD:%.*]] = add <[[#div(VBITS, 32)]] x i32> [[Y]], [[X]] -// CHECK-NEXT: [[RETVAL_0__SROA_CAST:%.*]] = bitcast * [[RETVAL_COERCE]] to <[[#div(VBITS, 32)]] x i32>* -// CHECK-NEXT: store <[[#div(VBITS, 32)]] x i32> [[ADD]], <[[#div(VBITS, 32)]] x i32>* [[RETVAL_0__SROA_CAST]], align 16 -// CHECK-NEXT: [[TMP0:%.*]] = load , * [[RETVAL_COERCE]], align 16 -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.experimental.vector.insert.nxv4i32.v[[#div(VBITS, 32)]]i32( undef, <[[#div(VBITS, 32)]] x i32> [[ADD]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] typedef svint32_t vec __attribute__((arm_sve_vector_bits(N))); auto f(vec x, vec y) { return x + y; } // Returns a vec. #endif diff --git a/clang/test/CodeGen/attr-arm-sve-vector-bits-call.c b/clang/test/CodeGen/attr-arm-sve-vector-bits-call.c index f909b7164622..f988d54bacd4 100644 --- a/clang/test/CodeGen/attr-arm-sve-vector-bits-call.c +++ b/clang/test/CodeGen/attr-arm-sve-vector-bits-call.c @@ -24,14 +24,11 @@ svint32_t sizeless_callee(svint32_t x) { // CHECK-LABEL: @fixed_caller( // CHECK-NEXT: entry: -// CHECK-NEXT:[[RETVAL_COERCE:%.*]] = alloca , align 16 // CHECK-NEXT:[[X:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32( [[X_COERCE:%.*]], i64 0) // CHECK-NEXT:[[CASTSCALABLESVE:%.*]] = call @llvm.experimental.vector.insert.nxv4i32.v16i32( undef, <16 x i32> [[X]], i64 0) // CHECK-NEXT:[[CASTFIXEDSVE:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32( [[CASTSCALABLESVE]], i64 0) -// CHECK-NEXT:[[RETVAL_0__SROA_CAST:%.*]] = bitcast * [[RETVAL_COERCE]] to <16 x i32>* -// CHECK-NEXT:store <16 x i32> [[CASTFIXEDSVE]], <16 x i32>* [[RETVAL_0__SROA_CAST]], align 16 -// C