[clang] 5a2a836 - [AArch64][NEON] Remove undocumented vceqz{, q}_p16, vml{a,s}q_n_f64 intrinsics

2020-12-15 Thread Joe Ellis via cfe-commits

Author: Joe Ellis
Date: 2020-12-15T17:19:16Z
New Revision: 5a2a8369e82cea9689b0ff60f3e9baa7fc79fbcf

URL: 
https://github.com/llvm/llvm-project/commit/5a2a8369e82cea9689b0ff60f3e9baa7fc79fbcf
DIFF: 
https://github.com/llvm/llvm-project/commit/5a2a8369e82cea9689b0ff60f3e9baa7fc79fbcf.diff

LOG: [AArch64][NEON] Remove undocumented vceqz{,q}_p16, vml{a,s}q_n_f64 
intrinsics

Prior to this patch, Clang supported the following C/C++ intrinsics:

vceqz_p16
vceqzq_p16
vmlaq_n_f64
vmlsq_n_f64

... exposed through arm_neon.h. However, these intrinsics are not part
of the ACLE, allowing developers to write code that is not compatible
with other toolchains.

This patch removes these intrinsics.

There is a bug report capturing this issue here:

https://bugs.llvm.org/show_bug.cgi?id=47471

Reviewed By: bsmith

Differential Revision: https://reviews.llvm.org/D93206

Added: 


Modified: 
clang/include/clang/Basic/arm_neon.td
clang/test/CodeGen/aarch64-neon-fma.c
clang/test/CodeGen/aarch64-neon-misc.c

Removed: 




diff  --git a/clang/include/clang/Basic/arm_neon.td 
b/clang/include/clang/Basic/arm_neon.td
index 4d4e42dd514b..6f1380d58c16 100644
--- a/clang/include/clang/Basic/arm_neon.td
+++ b/clang/include/clang/Basic/arm_neon.td
@@ -786,9 +786,6 @@ def VMUL_N_A64 : IOpInst<"vmul_n", "..1", "Qd", OP_MUL_N>;
 def FMLA_N : SOpInst<"vfma_n", "...1", "dQd", OP_FMLA_N>;
 def FMLS_N : SOpInst<"vfms_n", "...1", "fdQfQd", OP_FMLS_N>;
 
-def MLA_N : SOpInst<"vmla_n", "...1", "Qd", OP_MLA_N>;
-def MLS_N : SOpInst<"vmls_n", "...1", "Qd", OP_MLS_N>;
-
 

 // Logical operations
 def BSL : SInst<"vbsl", ".U..", "dPlQdQPl">;
@@ -868,7 +865,7 @@ def CFMGT  : SOpInst<"vcgt", "U..", "lUldQdQlQUl", OP_GT>;
 def CFMLT  : SOpInst<"vclt", "U..", "lUldQdQlQUl", OP_LT>;
 
 def CMEQ  : SInst<"vceqz", "U.",
-  "csilfUcUsUiUlPcPsPlQcQsQiQlQfQUcQUsQUiQUlQPcQPsdQdQPl">;
+  "csilfUcUsUiUlPcPlQcQsQiQlQfQUcQUsQUiQUlQPcdQdQPl">;
 def CMGE  : SInst<"vcgez", "U.", "csilfdQcQsQiQlQfQd">;
 def CMLE  : SInst<"vclez", "U.", "csilfdQcQsQiQlQfQd">;
 def CMGT  : SInst<"vcgtz", "U.", "csilfdQcQsQiQlQfQd">;

diff  --git a/clang/test/CodeGen/aarch64-neon-fma.c 
b/clang/test/CodeGen/aarch64-neon-fma.c
index c2dd315ed9fc..0726218d2e15 100644
--- a/clang/test/CodeGen/aarch64-neon-fma.c
+++ b/clang/test/CodeGen/aarch64-neon-fma.c
@@ -26,16 +26,6 @@ float32x4_t test_vmlaq_n_f32(float32x4_t a, float32x4_t b, 
float32_t c) {
   return vmlaq_n_f32(a, b, c);
 }
 
-// CHECK-LABEL: define <2 x double> @test_vmlaq_n_f64(<2 x double> %a, <2 x 
double> %b, double %c) #1 {
-// CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x double> undef, double %c, 
i32 0
-// CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x double> [[VECINIT_I]], 
double %c, i32 1
-// CHECK:   [[MUL_I:%.*]] = fmul <2 x double> %b, [[VECINIT1_I]]
-// CHECK:   [[ADD_I:%.*]] = fadd <2 x double> %a, [[MUL_I]]
-// CHECK:   ret <2 x double> [[ADD_I]]
-float64x2_t test_vmlaq_n_f64(float64x2_t a, float64x2_t b, float64_t c) {
-  return vmlaq_n_f64(a, b, c);
-}
-
 // CHECK-LABEL: define <4 x float> @test_vmlsq_n_f32(<4 x float> %a, <4 x 
float> %b, float %c) #1 {
 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %c, i32 0
 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], 
float %c, i32 1
@@ -58,16 +48,6 @@ float32x2_t test_vmls_n_f32(float32x2_t a, float32x2_t b, 
float32_t c) {
   return vmls_n_f32(a, b, c);
 }
 
-// CHECK-LABEL: define <2 x double> @test_vmlsq_n_f64(<2 x double> %a, <2 x 
double> %b, double %c) #1 {
-// CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x double> undef, double %c, 
i32 0
-// CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x double> [[VECINIT_I]], 
double %c, i32 1
-// CHECK:   [[MUL_I:%.*]] = fmul <2 x double> %b, [[VECINIT1_I]]
-// CHECK:   [[SUB_I:%.*]] = fsub <2 x double> %a, [[MUL_I]]
-// CHECK:   ret <2 x double> [[SUB_I]]
-float64x2_t test_vmlsq_n_f64(float64x2_t a, float64x2_t b, float64_t c) {
-  return vmlsq_n_f64(a, b, c);
-}
-
 // CHECK-LABEL: define <2 x float> @test_vmla_lane_f32_0(<2 x float> %a, <2 x 
float> %b, <2 x float> %v) #0 {
 // CHECK:[[TMP0:%.*]] = bitcast <2 x float> [[V:%.*]] to <8 x i8>
 // CHECK:[[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>

diff  --git a/clang/test/CodeGen/aarch64-neon-misc.c 
b/clang/test/CodeGen/aarch64-neon-misc.c
index 88020a1a69c2..5517fe3dc411 100644
--- a/clang/test/CodeGen/aarch64-neon-misc.c
+++ b/clang/test/CodeGen/aarch64-neon-misc.c
@@ -198,24 +198,6 @@ uint8x16_t test_vceqzq_p8(poly8x16_t a) {
   return vceqzq_p8(a);
 }
 
-// CHECK-LABEL: @test_vceqz_p16(
-// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK:   [[TMP1:%.*]] = icmp eq <4 x i16> %a, zeroinitializer
-// CHECK:   [[VCEQZ_I:%.*]] = sext <4 x i1> [[TMP1]] to <4

[clang] dad07ba - [clang][AArch64][SVE] Avoid going through memory for VLAT <-> VLST casts

2020-12-16 Thread Joe Ellis via cfe-commits

Author: Joe Ellis
Date: 2020-12-16T12:24:32Z
New Revision: dad07baf123e672b1d5d5e7c21e73b92399d5a0c

URL: 
https://github.com/llvm/llvm-project/commit/dad07baf123e672b1d5d5e7c21e73b92399d5a0c
DIFF: 
https://github.com/llvm/llvm-project/commit/dad07baf123e672b1d5d5e7c21e73b92399d5a0c.diff

LOG: [clang][AArch64][SVE] Avoid going through memory for VLAT <-> VLST casts

This change makes use of the llvm.vector.extract intrinsic to avoid
going through memory when performing bitcasts between vector-length
agnostic types and vector-length specific types.

Depends on D91362

Reviewed By: c-rhodes

Differential Revision: https://reviews.llvm.org/D92761

Added: 


Modified: 
clang/lib/CodeGen/CGExprScalar.cpp
clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c
clang/test/CodeGen/attr-arm-sve-vector-bits-bitcast.c
clang/test/CodeGen/attr-arm-sve-vector-bits-call.c
clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c
clang/test/CodeGen/attr-arm-sve-vector-bits-codegen.c
clang/test/CodeGen/attr-arm-sve-vector-bits-globals.c
llvm/include/llvm/IR/IRBuilder.h

Removed: 




diff  --git a/clang/lib/CodeGen/CGExprScalar.cpp 
b/clang/lib/CodeGen/CGExprScalar.cpp
index 973cefd831e6..c9cf1d0dfd89 100644
--- a/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -1996,7 +1996,39 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
   }
 }
 
+// If Src is a fixed vector and Dst is a scalable vector, and both have the
+// same element type, use the llvm.experimental.vector.insert intrinsic to
+// perform the bitcast.
+if (const auto *FixedSrc = dyn_cast(SrcTy)) {
+  if (const auto *ScalableDst = dyn_cast(DstTy)) 
{
+if (FixedSrc->getElementType() == ScalableDst->getElementType()) {
+  llvm::Value *UndefVec = llvm::UndefValue::get(DstTy);
+  llvm::Value *Zero = llvm::Constant::getNullValue(CGF.CGM.Int64Ty);
+  return Builder.CreateInsertVector(DstTy, UndefVec, Src, Zero,
+"castScalableSve");
+}
+  }
+}
+
+// If Src is a scalable vector and Dst is a fixed vector, and both have the
+// same element type, use the llvm.experimental.vector.extract intrinsic to
+// perform the bitcast.
+if (const auto *ScalableSrc = dyn_cast(SrcTy)) {
+  if (const auto *FixedDst = dyn_cast(DstTy)) {
+if (ScalableSrc->getElementType() == FixedDst->getElementType()) {
+  llvm::Value *Zero = llvm::Constant::getNullValue(CGF.CGM.Int64Ty);
+  return Builder.CreateExtractVector(DstTy, Src, Zero, "castFixedSve");
+}
+  }
+}
+
 // Perform VLAT <-> VLST bitcast through memory.
+// TODO: since the llvm.experimental.vector.{insert,extract} intrinsics
+//   require the element types of the vectors to be the same, we
+//   need to keep this around for casting between predicates, or more
+//   generally for bitcasts between VLAT <-> VLST where the element
+//   types of the vectors are not the same, until we figure out a 
better
+//   way of doing these casts.
 if ((isa(SrcTy) &&
  isa(DstTy)) ||
 (isa(SrcTy) &&

diff  --git 
a/clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c 
b/clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c
index fed7708c6893..beba6a3f0199 100644
--- a/clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c
+++ b/clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c
@@ -51,34 +51,22 @@ vec2048 x2048 = {0, 1, 2, 3, 3 , 2 , 1, 0, 0, 1, 2, 3, 3 , 
2 , 1, 0,
 typedef int8_t vec_int8 __attribute__((vector_size(N / 8)));
 // CHECK128-LABEL: define <16 x i8> @f2(<16 x i8> %x)
 // CHECK128-NEXT:  entry:
-// CHECK128-NEXT:%x.addr = alloca <16 x i8>, align 16
-// CHECK128-NEXT:%saved-call-rvalue = alloca , align 16
-// CHECK128-NEXT:store <16 x i8> %x, <16 x i8>* %x.addr, align 16
-// CHECK128-NEXT:%0 = call  
@llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
-// CHECK128-NEXT:%1 = bitcast <16 x i8>* %x.addr to *
-// CHECK128-NEXT:%2 = load , * %1, 
align 16
-// CHECK128-NEXT:%3 = call  
@llvm.aarch64.sve.asrd.nxv16i8( %0,  %2, 
i32 1)
-// CHECK128-NEXT:store  %3, * 
%saved-call-rvalue, align 16
-// CHECK128-NEXT:%castFixedSve = bitcast * 
%saved-call-rvalue to <16 x i8>*
-// CHECK128-NEXT:%4 = load <16 x i8>, <16 x i8>* %castFixedSve, align 16
-// CHECK128-NEXT:ret <16 x i8> %4
+// CHECK128-NEXT:[[TMP0:%.*]] = call  
@llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
+// CHECK128-NEXT:[[CASTSCALABLESVE:%.*]] = call  
@llvm.experimental.vector.insert.nxv16i8.v16i8( undef, <16 x 
i8> [[X:%.*]], i64 0)
+// CHECK128-NEXT:[[TMP1:%.*]] = call  
@llvm.aarch64.sve.asrd.nxv16i8( [[TMP0]],  
[[CASTSCALABLESVE]], i32 1)
+// CHECK128-NEXT:

[clang] 23a96b8 - [AArch64][SVE] Support implicit lax vector conversions for SVE types

2020-11-17 Thread Joe Ellis via cfe-commits

Author: Joe Ellis
Date: 2020-11-17T14:50:17Z
New Revision: 23a96b84a8d985b686a4e06dec1f7aebc0cca6c6

URL: 
https://github.com/llvm/llvm-project/commit/23a96b84a8d985b686a4e06dec1f7aebc0cca6c6
DIFF: 
https://github.com/llvm/llvm-project/commit/23a96b84a8d985b686a4e06dec1f7aebc0cca6c6.diff

LOG: [AArch64][SVE] Support implicit lax vector conversions for SVE types

Lax vector conversions was behaving incorrectly for implicit casts
between scalable and fixed-length vector types. For example, this:

#include 

#define N __ARM_FEATURE_SVE_BITS
#define FIXED_ATTR __attribute__((arm_sve_vector_bits(N)))

typedef svfloat32_t fixed_float32_t FIXED_ATTR;

void allowed_depending() {
  fixed_float32_t fs32;
  svfloat64_t s64;

  fs32 = s64;
}

... would fail because the vectors have differing lane sizes. This patch
implements the correct behaviour for
-flax-vector-conversions={none,all,integer}. Specifically:

- -flax-vector-conversions=none prevents all lax vector conversions
  between scalable and fixed-sized vectors.
- -flax-vector-conversions=integer allows lax vector conversions between
  scalable and fixed-size vectors whose element types are integers.
- -flax-vector-conversions=all allows all lax vector conversions between
  scalable and fixed-size vectors (including those with floating point
  element types).

The implicit conversions are implemented as bitcasts.

Reviewed By: fpetrogalli

Differential Revision: https://reviews.llvm.org/D91067

Added: 
clang/test/Sema/aarch64-sve-lax-vector-conversions.c
clang/test/SemaCXX/aarch64-sve-lax-vector-conversions.cpp

Modified: 
clang/include/clang/AST/ASTContext.h
clang/lib/AST/ASTContext.cpp
clang/lib/Sema/SemaExpr.cpp
clang/lib/Sema/SemaOverload.cpp
clang/test/Sema/attr-arm-sve-vector-bits.c

Removed: 




diff  --git a/clang/include/clang/AST/ASTContext.h 
b/clang/include/clang/AST/ASTContext.h
index 9fc9c924b51c..8c0930237583 100644
--- a/clang/include/clang/AST/ASTContext.h
+++ b/clang/include/clang/AST/ASTContext.h
@@ -2088,6 +2088,10 @@ class ASTContext : public RefCountedBase {
   /// vector-length.
   bool areCompatibleSveTypes(QualType FirstType, QualType SecondType);
 
+  /// Return true if the given vector types are lax-compatible SVE vector 
types,
+  /// false otherwise.
+  bool areLaxCompatibleSveTypes(QualType FirstType, QualType SecondType);
+
   /// Return true if the type has been explicitly qualified with ObjC 
ownership.
   /// A type may be implicitly qualified with ownership under ObjC ARC, and in
   /// some cases the compiler treats these 
diff erently.

diff  --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp
index d63f299f021f..836065291fea 100644
--- a/clang/lib/AST/ASTContext.cpp
+++ b/clang/lib/AST/ASTContext.cpp
@@ -8574,6 +8574,41 @@ bool ASTContext::areCompatibleSveTypes(QualType 
FirstType,
  IsValidCast(SecondType, FirstType);
 }
 
+bool ASTContext::areLaxCompatibleSveTypes(QualType FirstType,
+  QualType SecondType) {
+  assert(((FirstType->isSizelessBuiltinType() && SecondType->isVectorType()) ||
+  (FirstType->isVectorType() && SecondType->isSizelessBuiltinType())) 
&&
+ "Expected SVE builtin type and vector type!");
+
+  auto IsLaxCompatible = [this](QualType FirstType, QualType SecondType) {
+if (!FirstType->getAs())
+  return false;
+
+const auto *VecTy = SecondType->getAs();
+if (VecTy &&
+VecTy->getVectorKind() == VectorType::SveFixedLengthDataVector) {
+  const LangOptions::LaxVectorConversionKind LVCKind =
+  getLangOpts().getLaxVectorConversions();
+
+  // If -flax-vector-conversions=all is specified, the types are
+  // certainly compatible.
+  if (LVCKind == LangOptions::LaxVectorConversionKind::All)
+return true;
+
+  // If -flax-vector-conversions=integer is specified, the types are
+  // compatible if the elements are integer types.
+  if (LVCKind == LangOptions::LaxVectorConversionKind::Integer)
+return VecTy->getElementType().getCanonicalType()->isIntegerType() &&
+   FirstType->getSveEltType(*this)->isIntegerType();
+}
+
+return false;
+  };
+
+  return IsLaxCompatible(FirstType, SecondType) ||
+ IsLaxCompatible(SecondType, FirstType);
+}
+
 bool ASTContext::hasDirectOwnershipQualifier(QualType Ty) const {
   while (true) {
 // __strong id

diff  --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 0a25720c0f7b..e1a87ede3bdd 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -9003,12 +9003,13 @@ Sema::CheckAssignmentConstraints(QualType LHSType, 
ExprResult &RHS,
 }
 
 // Allow assignments between fixed-length and sizeless SVE vectors.
-if (((LHSType->isSizelessBuiltinType() && RHSType->isVectorType()) ||
- (LHSType-

[clang] 1d7abcf - [AArch64][SVE] Add tests for VLST -> VLAT lax conversions

2020-11-17 Thread Joe Ellis via cfe-commits

Author: Joe Ellis
Date: 2020-11-17T18:03:42Z
New Revision: 1d7abcf99e3d8f9ac7e24be5758da7cfef656400

URL: 
https://github.com/llvm/llvm-project/commit/1d7abcf99e3d8f9ac7e24be5758da7cfef656400
DIFF: 
https://github.com/llvm/llvm-project/commit/1d7abcf99e3d8f9ac7e24be5758da7cfef656400.diff

LOG: [AArch64][SVE] Add tests for VLST -> VLAT lax conversions

These were previously missing from the SVE lax conversions tests
introduced in this commit:

23a96b84a8d985b686a4e06dec1f7aebc0cca6c6
(https://reviews.llvm.org/D91067)

Differential Revision: https://reviews.llvm.org/D91642

Added: 


Modified: 
clang/test/Sema/aarch64-sve-lax-vector-conversions.c
clang/test/SemaCXX/aarch64-sve-lax-vector-conversions.cpp

Removed: 




diff  --git a/clang/test/Sema/aarch64-sve-lax-vector-conversions.c 
b/clang/test/Sema/aarch64-sve-lax-vector-conversions.c
index 68393275e54f..e2fe87f7dd20 100644
--- a/clang/test/Sema/aarch64-sve-lax-vector-conversions.c
+++ b/clang/test/Sema/aarch64-sve-lax-vector-conversions.c
@@ -20,6 +20,8 @@ void allowed_with_integer_lax_conversions() {
   // -flax-vector-conversions={integer,all}.
   fi32 = si64;
   // lax-vector-none-error@-1 {{assigning to 'fixed_int32_t' (vector of 16 
'int' values) from incompatible type}}
+  si64 = fi32;
+  // lax-vector-none-error@-1 {{assigning to 'svint64_t' (aka '__SVInt64_t') 
from incompatible type}}
 }
 
 void allowed_with_all_lax_conversions() {
@@ -31,4 +33,7 @@ void allowed_with_all_lax_conversions() {
   ff32 = sf64;
   // lax-vector-none-error@-1 {{assigning to 'fixed_float32_t' (vector of 16 
'float' values) from incompatible type}}
   // lax-vector-integer-error@-2 {{assigning to 'fixed_float32_t' (vector of 
16 'float' values) from incompatible type}}
+  sf64 = ff32;
+  // lax-vector-none-error@-1 {{assigning to 'svfloat64_t' (aka 
'__SVFloat64_t') from incompatible type}}
+  // lax-vector-integer-error@-2 {{assigning to 'svfloat64_t' (aka 
'__SVFloat64_t') from incompatible type}}
 }

diff  --git a/clang/test/SemaCXX/aarch64-sve-lax-vector-conversions.cpp 
b/clang/test/SemaCXX/aarch64-sve-lax-vector-conversions.cpp
index 68393275e54f..e2fe87f7dd20 100644
--- a/clang/test/SemaCXX/aarch64-sve-lax-vector-conversions.cpp
+++ b/clang/test/SemaCXX/aarch64-sve-lax-vector-conversions.cpp
@@ -20,6 +20,8 @@ void allowed_with_integer_lax_conversions() {
   // -flax-vector-conversions={integer,all}.
   fi32 = si64;
   // lax-vector-none-error@-1 {{assigning to 'fixed_int32_t' (vector of 16 
'int' values) from incompatible type}}
+  si64 = fi32;
+  // lax-vector-none-error@-1 {{assigning to 'svint64_t' (aka '__SVInt64_t') 
from incompatible type}}
 }
 
 void allowed_with_all_lax_conversions() {
@@ -31,4 +33,7 @@ void allowed_with_all_lax_conversions() {
   ff32 = sf64;
   // lax-vector-none-error@-1 {{assigning to 'fixed_float32_t' (vector of 16 
'float' values) from incompatible type}}
   // lax-vector-integer-error@-2 {{assigning to 'fixed_float32_t' (vector of 
16 'float' values) from incompatible type}}
+  sf64 = ff32;
+  // lax-vector-none-error@-1 {{assigning to 'svfloat64_t' (aka 
'__SVFloat64_t') from incompatible type}}
+  // lax-vector-integer-error@-2 {{assigning to 'svfloat64_t' (aka 
'__SVFloat64_t') from incompatible type}}
 }



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] 1e2da38 - [AArch64][SVE] Allow C-style casts between fixed-size and scalable vectors

2020-11-19 Thread Joe Ellis via cfe-commits

Author: Joe Ellis
Date: 2020-11-19T11:18:35Z
New Revision: 1e2da3839cc3543629ecb847fd3aa34edb64b42a

URL: 
https://github.com/llvm/llvm-project/commit/1e2da3839cc3543629ecb847fd3aa34edb64b42a
DIFF: 
https://github.com/llvm/llvm-project/commit/1e2da3839cc3543629ecb847fd3aa34edb64b42a.diff

LOG: [AArch64][SVE] Allow C-style casts between fixed-size and scalable vectors

This patch allows C-style casting between fixed-size and scalable
vectors. This kind of cast was previously blocked by the compiler, but
it should be allowed.

Differential Revision: https://reviews.llvm.org/D91262

Added: 
clang/test/Sema/aarch64-sve-explicit-casts-fixed-size.c
clang/test/SemaCXX/aarch64-sve-explicit-casts-fixed-size.cpp

Modified: 
clang/include/clang/Sema/Sema.h
clang/lib/Sema/SemaCast.cpp
clang/lib/Sema/SemaExpr.cpp

Removed: 




diff  --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index fb95b8cbd193..20c7e8c2ed1b 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -11501,6 +11501,8 @@ class Sema final {
   QualType CheckMatrixMultiplyOperands(ExprResult &LHS, ExprResult &RHS,
SourceLocation Loc, bool IsCompAssign);
 
+  bool isValidSveBitcast(QualType srcType, QualType destType);
+
   bool areLaxCompatibleVectorTypes(QualType srcType, QualType destType);
   bool isLaxVectorConversion(QualType srcType, QualType destType);
 

diff  --git a/clang/lib/Sema/SemaCast.cpp b/clang/lib/Sema/SemaCast.cpp
index 0bd240585bd7..671820afd485 100644
--- a/clang/lib/Sema/SemaCast.cpp
+++ b/clang/lib/Sema/SemaCast.cpp
@@ -2219,6 +2219,12 @@ static TryCastResult TryReinterpretCast(Sema &Self, 
ExprResult &SrcExpr,
   bool destIsVector = DestType->isVectorType();
   bool srcIsVector = SrcType->isVectorType();
   if (srcIsVector || destIsVector) {
+// Allow bitcasting between SVE VLATs and VLSTs, and vice-versa.
+if (Self.isValidSveBitcast(SrcType, DestType)) {
+  Kind = CK_BitCast;
+  return TC_Success;
+}
+
 // The non-vector type, if any, must have integral type.  This is
 // the same rule that C vector casts use; note, however, that enum
 // types are not integral in C++.
@@ -2752,6 +2758,13 @@ void CastOperation::CheckCStyleCast() {
 return;
   }
 
+  // Allow bitcasting between compatible SVE vector types.
+  if ((SrcType->isVectorType() || DestType->isVectorType()) &&
+  Self.isValidSveBitcast(SrcType, DestType)) {
+Kind = CK_BitCast;
+return;
+  }
+
   if (!DestType->isScalarType() && !DestType->isVectorType()) {
 const RecordType *DestRecordTy = DestType->getAs();
 

diff  --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index e1a87ede3bdd..7a8124fadfd7 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -7197,6 +7197,28 @@ static bool breakDownVectorType(QualType type, uint64_t 
&len,
   return true;
 }
 
+/// Are the two types SVE-bitcast-compatible types? I.e. is bitcasting from the
+/// first SVE type (e.g. an SVE VLAT) to the second type (e.g. an SVE VLST)
+/// allowed?
+///
+/// This will also return false if the two given types do not make sense from
+/// the perspective of SVE bitcasts.
+bool Sema::isValidSveBitcast(QualType srcTy, QualType destTy) {
+  assert(srcTy->isVectorType() || destTy->isVectorType());
+
+  auto ValidScalableConversion = [](QualType FirstType, QualType SecondType) {
+if (!FirstType->isSizelessBuiltinType())
+  return false;
+
+const auto *VecTy = SecondType->getAs();
+return VecTy &&
+   VecTy->getVectorKind() == VectorType::SveFixedLengthDataVector;
+  };
+
+  return ValidScalableConversion(srcTy, destTy) ||
+ ValidScalableConversion(destTy, srcTy);
+}
+
 /// Are the two types lax-compatible vector types?  That is, given
 /// that one of them is a vector, do they have equal storage sizes,
 /// where the storage size is the number of elements times the element

diff  --git a/clang/test/Sema/aarch64-sve-explicit-casts-fixed-size.c 
b/clang/test/Sema/aarch64-sve-explicit-casts-fixed-size.c
new file mode 100644
index ..a93110db7cce
--- /dev/null
+++ b/clang/test/Sema/aarch64-sve-explicit-casts-fixed-size.c
@@ -0,0 +1,49 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve 
-msve-vector-bits=128 -flax-vector-conversions=none 
-fallow-half-arguments-and-returns -ffreestanding -fsyntax-only -verify %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve 
-msve-vector-bits=256 -flax-vector-conversions=none 
-fallow-half-arguments-and-returns -ffreestanding -fsyntax-only -verify %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve 
-msve-vector-bits=512 -flax-vector-conversions=none 
-fallow-half-arguments-and-returns -ffreestanding -fsyntax-only -verify %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu 

[clang] 3c696a2 - [AArch64][SVE] Allow lax conversion between VLATs and GNU vectors

2020-11-23 Thread Joe Ellis via cfe-commits

Author: Joe Ellis
Date: 2020-11-23T10:47:17Z
New Revision: 3c696a212ba4328e4f8f92136bc4d728a6490ef7

URL: 
https://github.com/llvm/llvm-project/commit/3c696a212ba4328e4f8f92136bc4d728a6490ef7
DIFF: 
https://github.com/llvm/llvm-project/commit/3c696a212ba4328e4f8f92136bc4d728a6490ef7.diff

LOG: [AArch64][SVE] Allow lax conversion between VLATs and GNU vectors

Previously, lax conversions were only allowed between SVE vector-length
agnostic types and vector-length specific types. This meant that code
such as the following:

#include 
#define N __ARM_FEATURE_SVE_BITS
#define FIXED_ATTR __attribute__ ((vector_size (N/8)))
typedef float fixed_float32_t FIXED_ATTR;

void foo() {
fixed_float32_t fs32;
svfloat64_t s64;
fs32 = s64;
}

was not allowed.

This patch makes a minor change to areLaxCompatibleSveTypes to allow for
lax conversions to be performed between SVE vector-length agnostic types
and GNU vectors.

Differential Revision: https://reviews.llvm.org/D91696

Added: 


Modified: 
clang/lib/AST/ASTContext.cpp
clang/test/Sema/aarch64-sve-lax-vector-conversions.c
clang/test/Sema/attr-arm-sve-vector-bits.c
clang/test/SemaCXX/aarch64-sve-lax-vector-conversions.cpp

Removed: 




diff  --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp
index f54916babed7..67ee8c0956d6 100644
--- a/clang/lib/AST/ASTContext.cpp
+++ b/clang/lib/AST/ASTContext.cpp
@@ -8586,10 +8586,20 @@ bool ASTContext::areLaxCompatibleSveTypes(QualType 
FirstType,
 
 const auto *VecTy = SecondType->getAs();
 if (VecTy &&
-VecTy->getVectorKind() == VectorType::SveFixedLengthDataVector) {
+(VecTy->getVectorKind() == VectorType::SveFixedLengthDataVector ||
+ VecTy->getVectorKind() == VectorType::GenericVector)) {
   const LangOptions::LaxVectorConversionKind LVCKind =
   getLangOpts().getLaxVectorConversions();
 
+  // If __ARM_FEATURE_SVE_BITS != N do not allow GNU vector lax conversion.
+  // "Whenever __ARM_FEATURE_SVE_BITS==N, GNUT implicitly
+  // converts to VLAT and VLAT implicitly converts to GNUT."
+  // ACLE Spec Version 00bet6, 3.7.3.2. Behavior common to vectors and
+  // predicates.
+  if (VecTy->getVectorKind() == VectorType::GenericVector &&
+  getTypeSize(SecondType) != getLangOpts().ArmSveVectorBits)
+return false;
+
   // If -flax-vector-conversions=all is specified, the types are
   // certainly compatible.
   if (LVCKind == LangOptions::LaxVectorConversionKind::All)

diff  --git a/clang/test/Sema/aarch64-sve-lax-vector-conversions.c 
b/clang/test/Sema/aarch64-sve-lax-vector-conversions.c
index e2fe87f7dd20..1a1addcf1c1b 100644
--- a/clang/test/Sema/aarch64-sve-lax-vector-conversions.c
+++ b/clang/test/Sema/aarch64-sve-lax-vector-conversions.c
@@ -7,32 +7,61 @@
 #include 
 
 #define N __ARM_FEATURE_SVE_BITS
-#define FIXED_ATTR __attribute__((arm_sve_vector_bits(N)))
+#define SVE_FIXED_ATTR __attribute__((arm_sve_vector_bits(N)))
+#define GNU_FIXED_ATTR __attribute__((vector_size(N / 8)))
 
-typedef svfloat32_t fixed_float32_t FIXED_ATTR;
-typedef svint32_t fixed_int32_t FIXED_ATTR;
+typedef svfloat32_t sve_fixed_float32_t SVE_FIXED_ATTR;
+typedef svint32_t sve_fixed_int32_t SVE_FIXED_ATTR;
+typedef float gnu_fixed_float32_t GNU_FIXED_ATTR;
+typedef int gnu_fixed_int32_t GNU_FIXED_ATTR;
 
-void allowed_with_integer_lax_conversions() {
-  fixed_int32_t fi32;
+void sve_allowed_with_integer_lax_conversions() {
+  sve_fixed_int32_t fi32;
   svint64_t si64;
 
   // The implicit cast here should fail if -flax-vector-conversions=none, but 
pass if
   // -flax-vector-conversions={integer,all}.
   fi32 = si64;
-  // lax-vector-none-error@-1 {{assigning to 'fixed_int32_t' (vector of 16 
'int' values) from incompatible type}}
+  // lax-vector-none-error@-1 {{assigning to 'sve_fixed_int32_t' (vector of 16 
'int' values) from incompatible type}}
   si64 = fi32;
   // lax-vector-none-error@-1 {{assigning to 'svint64_t' (aka '__SVInt64_t') 
from incompatible type}}
 }
 
-void allowed_with_all_lax_conversions() {
-  fixed_float32_t ff32;
+void sve_allowed_with_all_lax_conversions() {
+  sve_fixed_float32_t ff32;
   svfloat64_t sf64;
 
   // The implicit cast here should fail if 
-flax-vector-conversions={none,integer}, but pass if
   // -flax-vector-conversions=all.
   ff32 = sf64;
-  // lax-vector-none-error@-1 {{assigning to 'fixed_float32_t' (vector of 16 
'float' values) from incompatible type}}
-  // lax-vector-integer-error@-2 {{assigning to 'fixed_float32_t' (vector of 
16 'float' values) from incompatible type}}
+  // lax-vector-none-error@-1 {{assigning to 'sve_fixed_float32_t' (vector of 
16 'float' values) from incompatible type}}
+  // lax-vector-integer-error@-2 {{assigning to 'sve_fixed_float32_t' (vector 
of 16 'float' values) from incompatible type}}
+  sf64 = ff32;
+  //

[clang] 2ed7db0 - [InstSimplify] Remove redundant {insert,extract}_vector intrinsic chains

2021-05-13 Thread Joe Ellis via cfe-commits

Author: Joe Ellis
Date: 2021-05-13T16:09:50Z
New Revision: 2ed7db0d206b6af2fffa4cb2704264b76ca61266

URL: 
https://github.com/llvm/llvm-project/commit/2ed7db0d206b6af2fffa4cb2704264b76ca61266
DIFF: 
https://github.com/llvm/llvm-project/commit/2ed7db0d206b6af2fffa4cb2704264b76ca61266.diff

LOG: [InstSimplify] Remove redundant {insert,extract}_vector intrinsic chains

This commit removes some redundant {insert,extract}_vector intrinsic
chains by implementing the following patterns as instsimplifies:

   (insert_vector _, (extract_vector X, 0), 0) -> X
   (extract_vector (insert_vector _, X, 0), 0) -> X

Reviewed By: peterwaller-arm

Differential Revision: https://reviews.llvm.org/D101986

Added: 
llvm/test/Transforms/InstSimplify/extract-vector.ll
llvm/test/Transforms/InstSimplify/insert-vector.ll

Modified: 
clang/test/CodeGen/attr-arm-sve-vector-bits-call.c
clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c
llvm/lib/Analysis/InstructionSimplify.cpp

Removed: 




diff  --git a/clang/test/CodeGen/attr-arm-sve-vector-bits-call.c 
b/clang/test/CodeGen/attr-arm-sve-vector-bits-call.c
index f988d54bacd4f..edc307745a2aa 100644
--- a/clang/test/CodeGen/attr-arm-sve-vector-bits-call.c
+++ b/clang/test/CodeGen/attr-arm-sve-vector-bits-call.c
@@ -24,11 +24,7 @@ svint32_t sizeless_callee(svint32_t x) {
 
 // CHECK-LABEL: @fixed_caller(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:[[X:%.*]] = call <16 x i32> 
@llvm.experimental.vector.extract.v16i32.nxv4i32( 
[[X_COERCE:%.*]], i64 0)
-// CHECK-NEXT:[[CASTSCALABLESVE:%.*]] = call  
@llvm.experimental.vector.insert.nxv4i32.v16i32( undef, <16 x 
i32> [[X]], i64 0)
-// CHECK-NEXT:[[CASTFIXEDSVE:%.*]] = call <16 x i32> 
@llvm.experimental.vector.extract.v16i32.nxv4i32( 
[[CASTSCALABLESVE]], i64 0)
-// CHECK-NEXT:[[CASTSCALABLESVE1:%.*]] = call  
@llvm.experimental.vector.insert.nxv4i32.v16i32( undef, <16 x 
i32> [[CASTFIXEDSVE]], i64 0)
-// CHECK-NEXT:ret  [[CASTSCALABLESVE1]]
+// CHECK-NEXT:ret  [[X_COERCE:%.*]]
 //
 fixed_int32_t fixed_caller(fixed_int32_t x) {
   return sizeless_callee(x);
@@ -36,9 +32,7 @@ fixed_int32_t fixed_caller(fixed_int32_t x) {
 
 // CHECK-LABEL: @fixed_callee(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:[[X:%.*]] = call <16 x i32> 
@llvm.experimental.vector.extract.v16i32.nxv4i32( 
[[X_COERCE:%.*]], i64 0)
-// CHECK-NEXT:[[CASTSCALABLESVE:%.*]] = call  
@llvm.experimental.vector.insert.nxv4i32.v16i32( undef, <16 x 
i32> [[X]], i64 0)
-// CHECK-NEXT:ret  [[CASTSCALABLESVE]]
+// CHECK-NEXT:ret  [[X_COERCE:%.*]]
 //
 fixed_int32_t fixed_callee(fixed_int32_t x) {
   return x;
@@ -47,12 +41,9 @@ fixed_int32_t fixed_callee(fixed_int32_t x) {
 // CHECK-LABEL: @sizeless_caller(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:[[COERCE1:%.*]] = alloca <16 x i32>, align 16
-// CHECK-NEXT:[[CASTFIXEDSVE:%.*]] = call <16 x i32> 
@llvm.experimental.vector.extract.v16i32.nxv4i32( [[X:%.*]], 
i64 0)
-// CHECK-NEXT:[[CASTSCALABLESVE:%.*]] = call  
@llvm.experimental.vector.insert.nxv4i32.v16i32( undef, <16 x 
i32> [[CASTFIXEDSVE]], i64 0)
-// CHECK-NEXT:[[CALL:%.*]] = call  @fixed_callee( [[CASTSCALABLESVE]])
 // CHECK-NEXT:[[TMP0:%.*]] = bitcast <16 x i32>* [[COERCE1]] to *
-// CHECK-NEXT:store  [[CALL]], * 
[[TMP0]], align 16
-// CHECK-NEXT:[[TMP1:%.*]] = load <16 x i32>, <16 x i32>* [[COERCE1]], 
align 16, [[TBAA6:!tbaa !.*]]
+// CHECK-NEXT:store  [[X:%.*]], * 
[[TMP0]], align 16
+// CHECK-NEXT:[[TMP1:%.*]] = load <16 x i32>, <16 x i32>* [[COERCE1]], 
align 16, !tbaa [[TBAA6:![0-9]+]]
 // CHECK-NEXT:[[CASTSCALABLESVE2:%.*]] = call  
@llvm.experimental.vector.insert.nxv4i32.v16i32( undef, <16 x 
i32> [[TMP1]], i64 0)
 // CHECK-NEXT:ret  [[CASTSCALABLESVE2]]
 //
@@ -66,15 +57,9 @@ svint32_t sizeless_caller(svint32_t x) {
 
 // CHECK-LABEL: @call_int32_ff(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:[[OP1:%.*]] = call <16 x i32> 
@llvm.experimental.vector.extract.v16i32.nxv4i32( 
[[OP1_COERCE:%.*]], i64 0)
-// CHECK-NEXT:[[OP2:%.*]] = call <16 x i32> 
@llvm.experimental.vector.extract.v16i32.nxv4i32( 
[[OP2_COERCE:%.*]], i64 0)
-// CHECK-NEXT:[[CASTSCALABLESVE:%.*]] = call  
@llvm.experimental.vector.insert.nxv4i32.v16i32( undef, <16 x 
i32> [[OP1]], i64 0)
-// CHECK-NEXT:[[CASTSCALABLESVE2:%.*]] = call  
@llvm.experimental.vector.insert.nxv4i32.v16i32( undef, <16 x 
i32> [[OP2]], i64 0)
 // CHECK-NEXT:[[TMP0:%.*]] = call  
@llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]])
-// CHECK-NEXT:[[TMP1:%.*]] = call  
@llvm.aarch64.sve.sel.nxv4i32( [[TMP0]],  
[[CASTSCALABLESVE]],  [[CASTSCALABLESVE2]])
-// CHECK-NEXT:[[CASTFIXEDSVE:%.*]] = call <16 x i32> 
@llvm.experimental.vector.extract.v16i32.nxv4i32( [[TMP1]], 
i64 0)
-// CHECK-NEXT:[[CASTSCALABLESVE3:%.*]] = call  
@llvm.experimental.vector.insert.nxv4i32.v16i32( undef, <16 x 
i32> [[CASTFIXEDSVE]], i64 0)
-// CH

[clang] 1f2122c - [clang][SVE] Use __inline__ instead of inline in arm_sve.h

2021-02-18 Thread Joe Ellis via cfe-commits

Author: Joe Ellis
Date: 2021-02-18T17:09:46Z
New Revision: 1f2122c9b046a7d6d141f7c42528d8a3e2e66b70

URL: 
https://github.com/llvm/llvm-project/commit/1f2122c9b046a7d6d141f7c42528d8a3e2e66b70
DIFF: 
https://github.com/llvm/llvm-project/commit/1f2122c9b046a7d6d141f7c42528d8a3e2e66b70.diff

LOG: [clang][SVE] Use __inline__ instead of inline in arm_sve.h

The inline keyword is not defined in the C89 standard, so source files
that include arm_sve.h will fail compilation if -std=c89 is specified.
For consistency with arm_neon.h, we should use __inline__ instead.

Reviewed By: paulwalker-arm

Differential Revision: https://reviews.llvm.org/D96852

Added: 


Modified: 
clang/utils/TableGen/SveEmitter.cpp

Removed: 




diff  --git a/clang/utils/TableGen/SveEmitter.cpp 
b/clang/utils/TableGen/SveEmitter.cpp
index 0e69600ef861..fe369e9d9408 100644
--- a/clang/utils/TableGen/SveEmitter.cpp
+++ b/clang/utils/TableGen/SveEmitter.cpp
@@ -1201,7 +1201,7 @@ void SVEEmitter::createHeader(raw_ostream &OS) {
   OS << "};\n\n";
 
   OS << "/* Function attributes */\n";
-  OS << "#define __aio static inline __attribute__((__always_inline__, "
+  OS << "#define __aio static __inline__ __attribute__((__always_inline__, "
 "__nodebug__, __overloadable__))\n\n";
 
   // Add reinterpret functions.



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] 1b1b30c - [clang][SVE] Don't warn on vector to sizeless builtin implicit conversion

2021-02-23 Thread Joe Ellis via cfe-commits

Author: Joe Ellis
Date: 2021-02-23T13:40:58Z
New Revision: 1b1b30cf0f7d9619afb32e16f4a7c007da4ffccf

URL: 
https://github.com/llvm/llvm-project/commit/1b1b30cf0f7d9619afb32e16f4a7c007da4ffccf
DIFF: 
https://github.com/llvm/llvm-project/commit/1b1b30cf0f7d9619afb32e16f4a7c007da4ffccf.diff

LOG: [clang][SVE] Don't warn on vector to sizeless builtin implicit conversion

This commit prevents warnings from -Wconversion when a clang vector type
is implicitly converted to a sizeless builtin type -- for example, when
implicitly converting a fixed-predicate to a scalable predicate.

The code below:

 1#include 
 2
 3#define N __ARM_FEATURE_SVE_BITS
 4#define FIXED_ATTR __attribute__((arm_sve_vector_bits (N)))
 5typedef svbool_t fixed_svbool_t FIXED_ATTR;
 6
 7inline fixed_svbool_t foo(fixed_svbool_t p) {
 8  return svnot_z(svptrue_b64(), p);
 9}

would previously raise this warning:

warning: implicit conversion turns vector to scalar: \
'fixed_svbool_t' (vector of 8 'unsigned char' values) to 'svbool_t' \
(aka '__SVBool_t') [-Wconversion]

Note that many cases of these implicit conversions were already
permitted because many functions inside arm_sve.h are spawned via
preprocessor macros, and the call to isInSystemMacro would cover us in
this case. This commit fixes the remaining cases.

Differential Revision: https://reviews.llvm.org/D97053

Added: 


Modified: 
clang/lib/Sema/SemaChecking.cpp
clang/test/SemaCXX/attr-arm-sve-vector-bits.cpp

Removed: 




diff  --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 2d3d36f4adad..b41d94361d50 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -12051,7 +12051,16 @@ static void CheckImplicitConversion(Sema &S, Expr *E, 
QualType T,
 checkObjCDictionaryLiteral(S, QualType(Target, 0), DictionaryLiteral);
 
   // Strip vector types.
-  if (isa(Source)) {
+  if (const auto *SourceVT = dyn_cast(Source)) {
+if (Target->isVLSTBuiltinType()) {
+  auto SourceVectorKind = SourceVT->getVectorKind();
+  if (SourceVectorKind == VectorType::SveFixedLengthDataVector ||
+  SourceVectorKind == VectorType::SveFixedLengthPredicateVector ||
+  (SourceVectorKind == VectorType::GenericVector &&
+   S.Context.getTypeSize(Source) == S.getLangOpts().ArmSveVectorBits))
+return;
+}
+
 if (!isa(Target)) {
   if (S.SourceMgr.isInSystemMacro(CC))
 return;

diff  --git a/clang/test/SemaCXX/attr-arm-sve-vector-bits.cpp 
b/clang/test/SemaCXX/attr-arm-sve-vector-bits.cpp
index 5e796b7c8995..0437a264f65b 100644
--- a/clang/test/SemaCXX/attr-arm-sve-vector-bits.cpp
+++ b/clang/test/SemaCXX/attr-arm-sve-vector-bits.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve 
-target-feature +bf16 -ffreestanding -fsyntax-only -verify -std=c++11 
-msve-vector-bits=512 -fallow-half-arguments-and-returns %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve 
-target-feature +bf16 -ffreestanding -fsyntax-only -verify -std=c++11 
-msve-vector-bits=512 -fallow-half-arguments-and-returns -Wconversion %s
 // expected-no-diagnostics
 
 #include 



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] 3d5b18a - [clang][AArch64][SVE] Avoid going through memory for coerced VLST arguments

2021-01-05 Thread Joe Ellis via cfe-commits

Author: Joe Ellis
Date: 2021-01-05T15:18:21Z
New Revision: 3d5b18a3fdf47ae2286642131e4a92968dd01c2a

URL: 
https://github.com/llvm/llvm-project/commit/3d5b18a3fdf47ae2286642131e4a92968dd01c2a
DIFF: 
https://github.com/llvm/llvm-project/commit/3d5b18a3fdf47ae2286642131e4a92968dd01c2a.diff

LOG: [clang][AArch64][SVE] Avoid going through memory for coerced VLST arguments

VLST arguments are coerced to VLATs at the function boundary for
consistency with the VLAT ABI. They are then bitcast back to VLSTs in
the function prolog. Previously, this conversion is done through memory.
With the introduction of the llvm.vector.{insert,extract} intrinsic, we
can avoid going through memory here.

Depends on D92761

Differential Revision: https://reviews.llvm.org/D92762

Added: 


Modified: 
clang/lib/CodeGen/CGCall.cpp
clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c
clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.cpp
clang/test/CodeGen/attr-arm-sve-vector-bits-call.c
clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c

Removed: 




diff  --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index e28736bd3d2f..f1987408165b 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -2688,6 +2688,27 @@ void CodeGenFunction::EmitFunctionProlog(const 
CGFunctionInfo &FI,
 break;
   }
 
+  // VLST arguments are coerced to VLATs at the function boundary for
+  // ABI consistency. If this is a VLST that was coerced to
+  // a VLAT at the function boundary and the types match up, use
+  // llvm.experimental.vector.extract to convert back to the original
+  // VLST.
+  if (auto *VecTyTo = dyn_cast(ConvertType(Ty))) {
+auto *Coerced = Fn->getArg(FirstIRArg);
+if (auto *VecTyFrom =
+dyn_cast(Coerced->getType())) {
+  if (VecTyFrom->getElementType() == VecTyTo->getElementType()) {
+llvm::Value *Zero = llvm::Constant::getNullValue(CGM.Int64Ty);
+
+assert(NumIRArgs == 1);
+Coerced->setName(Arg->getName() + ".coerce");
+
ArgVals.push_back(ParamValue::forDirect(Builder.CreateExtractVector(
+VecTyTo, Coerced, Zero, "castFixedSve")));
+break;
+  }
+}
+  }
+
   Address Alloca = CreateMemTemp(Ty, getContext().getDeclAlign(Arg),
  Arg->getName());
 

diff  --git 
a/clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c 
b/clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c
index 9b733d21dbbb..9d3206a75f0e 100644
--- a/clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c
+++ b/clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c
@@ -59,14 +59,14 @@ typedef int8_t vec_int8 __attribute__((vector_size(N / 8)));
 
 // CHECK-LABEL: define{{.*}} void @f2(
 // CHECK-SAME:   <[[#div(VBITS,8)]] x i8>* noalias nocapture 
sret(<[[#div(VBITS,8)]] x i8>) align 16 %agg.result, <[[#div(VBITS,8)]] x i8>* 
nocapture readonly %0)
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:[[X:%.*]] = load <[[#div(VBITS,8)]] x i8>, 
<[[#div(VBITS,8)]] x i8>* [[TMP0:%.*]], align 16, [[TBAA6:!tbaa !.*]]
-// CHECK-NEXT:[[TMP1:%.*]] = call  
@llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
-// CHECK-NEXT:[[CASTSCALABLESVE:%.*]] = call  
@llvm.experimental.vector.insert.nxv16i8.v[[#div(VBITS,8)]]i8( undef, <[[#div(VBITS,8)]] x i8> [[X]], i64 0)
-// CHECK-NEXT:[[TMP2:%.*]] = call  
@llvm.aarch64.sve.asrd.nxv16i8( [[TMP1]],  
[[CASTSCALABLESVE]], i32 1)
-// CHECK-NEXT:[[CASTFIXEDSVE:%.*]] = call <[[#div(VBITS,8)]] x i8> 
@llvm.experimental.vector.extract.v[[#div(VBITS,8)]]i8.nxv16i8( [[TMP2]], i64 0)
-// CHECK-NEXT:store <[[#div(VBITS,8)]] x i8> [[CASTFIXEDSVE]], 
<[[#div(VBITS,8)]] x i8>* [[AGG_RESULT:%.*]], align 16, [[TBAA6]]
-// CHECK-NEXT:ret void
+// CHECK-NEXT: entry:
+// CHECK-NEXT:   [[X:%.*]] = load <[[#div(VBITS,8)]] x i8>, <[[#div(VBITS,8)]] 
x i8>* [[TMP0:%.*]], align 16, [[TBAA6:!tbaa !.*]]
+// CHECK-NEXT:   [[TMP1:%.*]] = call  
@llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
+// CHECK-NEXT:   [[CASTSCALABLESVE:%.*]] = call  
@llvm.experimental.vector.insert.nxv16i8.v[[#div(VBITS,8)]]i8( undef, <[[#div(VBITS,8)]] x i8> [[X]], i64 0)
+// CHECK-NEXT:   [[TMP2:%.*]] = call  
@llvm.aarch64.sve.asrd.nxv16i8( [[TMP1]],  
[[CASTSCALABLESVE]], i32 1)
+// CHECK-NEXT:   [[CASTFIXEDSVE:%.*]] = call <[[#div(VBITS,8)]] x i8> 
@llvm.experimental.vector.extract.v[[#div(VBITS,8)]]i8.nxv16i8( [[TMP2]], i64 0)
+// CHECK-NEXT:   store <[[#div(VBITS,8)]] x i8> [[CASTFIXEDSVE]], 
<[[#div(VBITS,8)]] x i8>* [[AGG_RESULT:%.*]], align 16, [[TBAA6]]
+// CHECK-NEXT:   ret void
 vec_int8 f2(vec_int8 x) { return svasrd_x(svptrue_b8(), x, 1); }
 #endif
 
@@ -78,24 +78,18 @@ void f3(vec1);
 typedef svint8_t vec2 __attrib

[clang] 8ea72b3 - [clang][AArch64][SVE] Avoid going through memory for coerced VLST return values

2021-01-11 Thread Joe Ellis via cfe-commits

Author: Joe Ellis
Date: 2021-01-11T12:10:59Z
New Revision: 8ea72b388734ce660f861e0dfbe53d203e94876a

URL: 
https://github.com/llvm/llvm-project/commit/8ea72b388734ce660f861e0dfbe53d203e94876a
DIFF: 
https://github.com/llvm/llvm-project/commit/8ea72b388734ce660f861e0dfbe53d203e94876a.diff

LOG: [clang][AArch64][SVE] Avoid going through memory for coerced VLST return 
values

VLST return values are coerced to VLATs in the function epilog for
consistency with the VLAT ABI. Previously, this coercion was done
through memory. It is preferable to use the
llvm.experimental.vector.insert intrinsic to avoid going through memory
here.

Reviewed By: c-rhodes

Differential Revision: https://reviews.llvm.org/D94290

Added: 


Modified: 
clang/lib/CodeGen/CGCall.cpp
clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.cpp
clang/test/CodeGen/attr-arm-sve-vector-bits-call.c
clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c
clang/test/CodeGen/attr-arm-sve-vector-bits-codegen.c

Removed: 




diff  --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index f1987408165b..2cc7203d1194 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -1265,6 +1265,21 @@ static llvm::Value *CreateCoercedLoad(Address Src, 
llvm::Type *Ty,
 return CGF.Builder.CreateLoad(Src);
   }
 
+  // If coercing a fixed vector to a scalable vector for ABI compatibility, and
+  // the types match, use the llvm.experimental.vector.insert intrinsic to
+  // perform the conversion.
+  if (auto *ScalableDst = dyn_cast(Ty)) {
+if (auto *FixedSrc = dyn_cast(SrcTy)) {
+  if (ScalableDst->getElementType() == FixedSrc->getElementType()) {
+auto *Load = CGF.Builder.CreateLoad(Src);
+auto *UndefVec = llvm::UndefValue::get(ScalableDst);
+auto *Zero = llvm::Constant::getNullValue(CGF.CGM.Int64Ty);
+return CGF.Builder.CreateInsertVector(ScalableDst, UndefVec, Load, 
Zero,
+  "castScalableSve");
+  }
+}
+  }
+
   // Otherwise do coercion through memory. This is stupid, but simple.
   Address Tmp =
   CreateTempAllocaForCoercion(CGF, Ty, Src.getAlignment(), Src.getName());

diff  --git 
a/clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.cpp 
b/clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.cpp
index 6fafc2ca2db9..a808d50884ea 100644
--- a/clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.cpp
+++ b/clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.cpp
@@ -48,14 +48,11 @@ void test02() {
 // CHECK-SAME:[[#VBITS]]
 // CHECK-SAME:EES_( %x.coerce,  
%y.coerce)
 // CHECK-NEXT: entry:
-// CHECK-NEXT:   [[RETVAL_COERCE:%.*]] = alloca , align 16
 // CHECK-NEXT:   [[X:%.*]] = call <[[#div(VBITS, 32)]] x i32> 
@llvm.experimental.vector.extract.v[[#div(VBITS, 32)]]i32.nxv4i32( [[X_COERCE:%.*]], i64 0)
 // CHECK-NEXT:   [[Y:%.*]] = call <[[#div(VBITS, 32)]] x i32> 
@llvm.experimental.vector.extract.v[[#div(VBITS, 32)]]i32.nxv4i32( [[X_COERCE1:%.*]], i64 0)
 // CHECK-NEXT:   [[ADD:%.*]] = add <[[#div(VBITS, 32)]] x i32> [[Y]], [[X]]
-// CHECK-NEXT:   [[RETVAL_0__SROA_CAST:%.*]] = bitcast * 
[[RETVAL_COERCE]] to <[[#div(VBITS, 32)]] x i32>*
-// CHECK-NEXT:   store <[[#div(VBITS, 32)]] x i32> [[ADD]], <[[#div(VBITS, 
32)]] x i32>* [[RETVAL_0__SROA_CAST]], align 16
-// CHECK-NEXT:   [[TMP0:%.*]] = load , * 
[[RETVAL_COERCE]], align 16
-// CHECK-NEXT:   ret  [[TMP0]]
+// CHECK-NEXT:   [[CASTSCALABLESVE:%.*]] = call  
@llvm.experimental.vector.insert.nxv4i32.v[[#div(VBITS, 32)]]i32( undef, <[[#div(VBITS, 32)]] x i32> [[ADD]], i64 0)
+// CHECK-NEXT:   ret  [[CASTSCALABLESVE]]
 typedef svint32_t vec __attribute__((arm_sve_vector_bits(N)));
 auto f(vec x, vec y) { return x + y; } // Returns a vec.
 #endif

diff  --git a/clang/test/CodeGen/attr-arm-sve-vector-bits-call.c 
b/clang/test/CodeGen/attr-arm-sve-vector-bits-call.c
index f909b7164622..f988d54bacd4 100644
--- a/clang/test/CodeGen/attr-arm-sve-vector-bits-call.c
+++ b/clang/test/CodeGen/attr-arm-sve-vector-bits-call.c
@@ -24,14 +24,11 @@ svint32_t sizeless_callee(svint32_t x) {
 
 // CHECK-LABEL: @fixed_caller(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:[[RETVAL_COERCE:%.*]] = alloca , align 16
 // CHECK-NEXT:[[X:%.*]] = call <16 x i32> 
@llvm.experimental.vector.extract.v16i32.nxv4i32( 
[[X_COERCE:%.*]], i64 0)
 // CHECK-NEXT:[[CASTSCALABLESVE:%.*]] = call  
@llvm.experimental.vector.insert.nxv4i32.v16i32( undef, <16 x 
i32> [[X]], i64 0)
 // CHECK-NEXT:[[CASTFIXEDSVE:%.*]] = call <16 x i32> 
@llvm.experimental.vector.extract.v16i32.nxv4i32( 
[[CASTSCALABLESVE]], i64 0)
-// CHECK-NEXT:[[RETVAL_0__SROA_CAST:%.*]] = bitcast * 
[[RETVAL_COERCE]] to <16 x i32>*
-// CHECK-NEXT:store <16 x i32> [[CASTFIXEDSVE]], <16 x i32>* 
[[RETVAL_0__SROA_CAST]], align 16
-// C