https://github.com/eopXD updated https://github.com/llvm/llvm-project/pull/67018
>From dc77e5c7bca87badecc314b24cab4c10be0e02fa Mon Sep 17 00:00:00 2001 From: eopXD <yueh.ting.c...@gmail.com> Date: Thu, 21 Sep 2023 06:34:57 -0700 Subject: [PATCH] [Clang][RISCV] Handle RVV tuple types correctly as OutputOperand for inline asm The RVV tuple type maps to an aggregate type with homogeneous scalable vectors. EmitAsmStmt does not handle this correctly and this commit attempts to fix it. Expressing the type as a structure in inline asm calls will complicate the current code base, so instead, the return type is set to be a single scalable vector, then reconstructed with `vector.extract` and `insertvalue`. A follow-up commit will deal with details when associated with InputOperands. --- clang/lib/CodeGen/CGStmt.cpp | 52 ++++++++++++++++++- .../rvv-inline-asm.c | 41 +++++++++++++++ 2 files changed, 91 insertions(+), 2 deletions(-) create mode 100644 clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/rvv-inline-asm.c diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp index 6674aa2409a5947..755f30b9c9e6369 100644 --- a/clang/lib/CodeGen/CGStmt.cpp +++ b/clang/lib/CodeGen/CGStmt.cpp @@ -29,6 +29,7 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/StringExtras.h" #include "llvm/IR/Assumptions.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/Intrinsics.h" @@ -2392,6 +2393,26 @@ EmitAsmStores(CodeGenFunction &CGF, const AsmStmt &S, Tmp = Builder.CreateZExtOrTrunc(Tmp, TruncTy); } else if (TruncTy->isVectorTy()) { Tmp = Builder.CreateBitCast(Tmp, TruncTy); + } else if (TruncTy->isStructTy() && ResultRegQualTys[i]->isRVVType()) { + auto *STy = cast<llvm::StructType>(TruncTy); + auto *VTy = cast<llvm::ScalableVectorType>(STy->getElementType(0)); + + assert(STy->containsHomogeneousScalableVectorTypes() && + "Must be dealing with RVV tuple type"); + + unsigned MinElts = VTy->getElementCount().getKnownMinValue(); + llvm::Value *StructValue = llvm::PoisonValue::get(STy); + + for (unsigned Idx = 0, TupleSize = STy->getNumElements(); + Idx != TupleSize; ++Idx) { + llvm::Value *IdxValue = + llvm::ConstantInt::get(CGM.Int64Ty, Idx * MinElts); + llvm::Value *SubVec = Builder.CreateExtractVector(VTy, Tmp, IdxValue); + + StructValue = Builder.CreateInsertValue(StructValue, SubVec, Idx); + } + + Tmp = StructValue; } } @@ -2399,7 +2420,13 @@ EmitAsmStores(CodeGenFunction &CGF, const AsmStmt &S, // ResultTypeRequiresCast elements correspond to the first // ResultTypeRequiresCast.size() elements of RegResults. if ((i < ResultTypeRequiresCast.size()) && ResultTypeRequiresCast[i]) { - unsigned Size = CGF.getContext().getTypeSize(ResultRegQualTys[i]); + unsigned Size; + if (ResultRegQualTys[i]->isRVVType() && TruncTy->isStructTy()) { + Size = cast<llvm::ScalableVectorType>( + cast<llvm::StructType>(TruncTy)->getElementType(0)) + ->getScalarSizeInBits(); + } else + Size = CGF.getContext().getTypeSize(ResultRegQualTys[i]); Address A = Dest.getAddress(CGF).withElementType(ResultRegTypes[i]); if (CGF.getTargetHooks().isScalarizableAsmOperand(CGF, TruncTy)) { Builder.CreateStore(Tmp, A); @@ -2524,11 +2551,32 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { ResultRegIsFlagReg.push_back(IsFlagReg); llvm::Type *Ty = ConvertTypeForMem(QTy); + ResultTruncRegTypes.push_back(Ty); + + // Expressing the type as a structure in inline asm calls will complicate + // the current code case, so instead, the return type is set to be a + // single scalable vector, then reconstructed with `vector.extract` and + // `insertvalue`. The type is derived here, and the reconstruction is done + // under EmitAsmStores. + if (QTy->isRVVType() && isa<llvm::StructType>(Ty)) { + // Flatten the structure into a single ScalableVectorType + auto *STy = cast<llvm::StructType>(Ty); + assert(STy->containsHomogeneousScalableVectorTypes() && + isa<llvm::ScalableVectorType>(STy->getElementType(0)) && + "Dealing with RVV tuple (aggregate with homogeneous scalable " + "vectors"); + + auto *VecTy = cast<llvm::ScalableVectorType>(STy->getElementType(0)); + + Ty = llvm::ScalableVectorType::get(VecTy->getScalarType(), + STy->getNumElements() * + VecTy->getMinNumElements()); + } + const bool RequiresCast = Info.allowsRegister() && (getTargetHooks().isScalarizableAsmOperand(*this, Ty) || Ty->isAggregateType()); - ResultTruncRegTypes.push_back(Ty); ResultTypeRequiresCast.push_back(RequiresCast); if (RequiresCast) { diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/rvv-inline-asm.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/rvv-inline-asm.c new file mode 100644 index 000000000000000..cad4f8ed5dcbd48 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/rvv-inline-asm.c @@ -0,0 +1,41 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 3 +#include <riscv_vector.h> + +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve32x -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | FileCheck %s + +// CHECK-LABEL: define dso_local void @foo( +// CHECK-SAME: ) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i32> asm "#NOP", "=^vr"() #[[ATTR2:[0-9]+]], !srcloc !4 +// CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.vector.extract.nxv2i32.nxv4i32(<vscale x 4 x i32> [[TMP0]], i64 0) +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } poison, <vscale x 2 x i32> [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 2 x i32> @llvm.vector.extract.nxv2i32.nxv4i32(<vscale x 4 x i32> [[TMP0]], i64 2) +// CHECK-NEXT: [[TMP4:%.*]] = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } [[TMP2]], <vscale x 2 x i32> [[TMP3]], 1 +// CHECK-NEXT: ret void +// +void foo() { + vint32m1x2_t v0; + asm ("#NOP" : "=vr" (v0)); +} + +// CHECK-LABEL: define dso_local void @bar( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } asm "#NOP", "=^vr,=^vr"() #[[ATTR2]], !srcloc !5 +// CHECK-NEXT: [[ASMRESULT:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP0]], 0 +// CHECK-NEXT: [[ASMRESULT1:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP0]], 1 +// CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.vector.extract.nxv2i32.nxv4i32(<vscale x 4 x i32> [[ASMRESULT]], i64 0) +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } poison, <vscale x 2 x i32> [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 2 x i32> @llvm.vector.extract.nxv2i32.nxv4i32(<vscale x 4 x i32> [[ASMRESULT]], i64 2) +// CHECK-NEXT: [[TMP4:%.*]] = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } [[TMP2]], <vscale x 2 x i32> [[TMP3]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 2 x i32> @llvm.vector.extract.nxv2i32.nxv4i32(<vscale x 4 x i32> [[ASMRESULT1]], i64 0) +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } poison, <vscale x 2 x i32> [[TMP5]], 0 +// CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 2 x i32> @llvm.vector.extract.nxv2i32.nxv4i32(<vscale x 4 x i32> [[ASMRESULT1]], i64 2) +// CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } [[TMP6]], <vscale x 2 x i32> [[TMP7]], 1 +// CHECK-NEXT: ret void +// +void bar() { + vint32m1x2_t v0, v2; + asm ("#NOP" : "=vr" (v0), "=vr" (v2)); +} _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits