https://github.com/eopXD updated https://github.com/llvm/llvm-project/pull/67018
>From a26eb9fe0c09fc0fd504d261874630d8b5edae26 Mon Sep 17 00:00:00 2001 From: eopXD <yueh.ting.c...@gmail.com> Date: Thu, 21 Sep 2023 06:34:57 -0700 Subject: [PATCH] [Clang][RISCV] Handle RVV tuple types correctly as OutputOperand for inline asm The RVV tuple type maps to an aggregate type with homogeneous scalable vectors. EmitAsmStmt does not handle this correctly and this commit attempts to fix it. Expressing the type as a structure in inline asm calls will complicate the current code base, so instead, the return type is set to be a single scalable vector, then reconstructed with `vector.extract` and `insertvalue`. A follow-up commit will deal with details when associated with InputOperands. --- clang/lib/CodeGen/CGStmt.cpp | 69 ++++++++++++++++++- .../rvv-inline-asm.c | 41 +++++++++++ 2 files changed, 108 insertions(+), 2 deletions(-) create mode 100644 clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/rvv-inline-asm.c diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp index 6674aa2409a5947..948539ea546084e 100644 --- a/clang/lib/CodeGen/CGStmt.cpp +++ b/clang/lib/CodeGen/CGStmt.cpp @@ -19,6 +19,7 @@ #include "clang/AST/Expr.h" #include "clang/AST/Stmt.h" #include "clang/AST/StmtVisitor.h" +#include "clang/AST/Type.h" #include "clang/Basic/Builtins.h" #include "clang/Basic/DiagnosticSema.h" #include "clang/Basic/PrettyStackTrace.h" @@ -29,10 +30,13 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/StringExtras.h" #include "llvm/IR/Assumptions.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/MDBuilder.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/SaveAndRestore.h" #include <optional> @@ -2392,6 +2396,26 @@ EmitAsmStores(CodeGenFunction &CGF, const AsmStmt &S, Tmp = Builder.CreateZExtOrTrunc(Tmp, TruncTy); } else if (TruncTy->isVectorTy()) { Tmp = Builder.CreateBitCast(Tmp, TruncTy); + } else if (TruncTy->isStructTy() && ResultRegQualTys[i]->isRVVType()) { + auto *STy = cast<llvm::StructType>(TruncTy); + auto *VTy = cast<llvm::ScalableVectorType>(STy->getElementType(0)); + + assert(STy->containsHomogeneousScalableVectorTypes() && + "Must be dealing with RVV tuple type"); + + unsigned MinElts = VTy->getElementCount().getKnownMinValue(); + llvm::Value *StructValue = llvm::PoisonValue::get(STy); + + for (unsigned Idx = 0, TupleSize = STy->getNumElements(); + Idx != TupleSize; ++Idx) { + llvm::Value *IdxValue = + llvm::ConstantInt::get(CGM.Int64Ty, Idx * MinElts); + llvm::Value *SubVec = Builder.CreateExtractVector(VTy, Tmp, IdxValue); + + StructValue = Builder.CreateInsertValue(StructValue, SubVec, Idx); + } + + Tmp = StructValue; } } @@ -2399,7 +2423,13 @@ EmitAsmStores(CodeGenFunction &CGF, const AsmStmt &S, // ResultTypeRequiresCast elements correspond to the first // ResultTypeRequiresCast.size() elements of RegResults. if ((i < ResultTypeRequiresCast.size()) && ResultTypeRequiresCast[i]) { - unsigned Size = CGF.getContext().getTypeSize(ResultRegQualTys[i]); + unsigned Size; + if (ResultRegQualTys[i]->isRVVType() && TruncTy->isStructTy()) { + Size = cast<llvm::ScalableVectorType>( + cast<llvm::StructType>(TruncTy)->getElementType(0)) + ->getScalarSizeInBits(); + } else + Size = CGF.getContext().getTypeSize(ResultRegQualTys[i]); Address A = Dest.getAddress(CGF).withElementType(ResultRegTypes[i]); if (CGF.getTargetHooks().isScalarizableAsmOperand(CGF, TruncTy)) { Builder.CreateStore(Tmp, A); @@ -2524,11 +2554,32 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { ResultRegIsFlagReg.push_back(IsFlagReg); llvm::Type *Ty = ConvertTypeForMem(QTy); + ResultTruncRegTypes.push_back(Ty); + + // Expressing the type as a structure in inline asm calls will complicate + // the current code case, so instead, the return type is set to be a + // single scalable vector, then reconstructed with `vector.extract` and + // `insertvalue`. The type is derived here, and the reconstruction is done + // under EmitAsmStores. + if (QTy->isRVVType() && isa<llvm::StructType>(Ty)) { + // Flatten the structure into a single ScalableVectorType + auto *STy = cast<llvm::StructType>(Ty); + assert(STy->containsHomogeneousScalableVectorTypes() && + isa<llvm::ScalableVectorType>(STy->getElementType(0)) && + "Dealing with RVV tuple (aggregate with homogeneous scalable " + "vectors"); + + auto *VecTy = cast<llvm::ScalableVectorType>(STy->getElementType(0)); + + Ty = llvm::ScalableVectorType::get(VecTy->getScalarType(), + STy->getNumElements() * + VecTy->getMinNumElements()); + } + const bool RequiresCast = Info.allowsRegister() && (getTargetHooks().isScalarizableAsmOperand(*this, Ty) || Ty->isAggregateType()); - ResultTruncRegTypes.push_back(Ty); ResultTypeRequiresCast.push_back(RequiresCast); if (RequiresCast) { @@ -2551,6 +2602,13 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { QualType InputTy = S.getInputExpr(InputNo)->getType(); QualType OutputType = OutExpr->getType(); + if ((InputTy->isRVVType() && + isa<llvm::StructType>(ConvertType(InputTy))) || + (OutputType->isRVVType() && + isa<llvm::StructType>(ConvertType(OutputType)))) { + llvm_unreachable("FIXME: Deal with RVV type matching."); + } + uint64_t InputSize = getContext().getTypeSize(InputTy); if (getContext().getTypeSize(OutputType) < InputSize) { // Form the asm to return the value as a larger integer or fp type. @@ -2671,6 +2729,13 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { QualType OutputType = S.getOutputExpr(Output)->getType(); QualType InputTy = InputExpr->getType(); + if ((InputTy->isRVVType() && + isa<llvm::StructType>(ConvertType(InputTy))) || + (OutputType->isRVVType() && + isa<llvm::StructType>(ConvertType(OutputType)))) { + llvm_unreachable("FIXME: Deal with RVV type matching."); + } + if (getContext().getTypeSize(OutputType) > getContext().getTypeSize(InputTy)) { // Use ptrtoint as appropriate so that we can do our extension. diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/rvv-inline-asm.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/rvv-inline-asm.c new file mode 100644 index 000000000000000..cad4f8ed5dcbd48 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/rvv-inline-asm.c @@ -0,0 +1,41 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 3 +#include <riscv_vector.h> + +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve32x -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | FileCheck %s + +// CHECK-LABEL: define dso_local void @foo( +// CHECK-SAME: ) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i32> asm "#NOP", "=^vr"() #[[ATTR2:[0-9]+]], !srcloc !4 +// CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.vector.extract.nxv2i32.nxv4i32(<vscale x 4 x i32> [[TMP0]], i64 0) +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } poison, <vscale x 2 x i32> [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 2 x i32> @llvm.vector.extract.nxv2i32.nxv4i32(<vscale x 4 x i32> [[TMP0]], i64 2) +// CHECK-NEXT: [[TMP4:%.*]] = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } [[TMP2]], <vscale x 2 x i32> [[TMP3]], 1 +// CHECK-NEXT: ret void +// +void foo() { + vint32m1x2_t v0; + asm ("#NOP" : "=vr" (v0)); +} + +// CHECK-LABEL: define dso_local void @bar( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } asm "#NOP", "=^vr,=^vr"() #[[ATTR2]], !srcloc !5 +// CHECK-NEXT: [[ASMRESULT:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP0]], 0 +// CHECK-NEXT: [[ASMRESULT1:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP0]], 1 +// CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.vector.extract.nxv2i32.nxv4i32(<vscale x 4 x i32> [[ASMRESULT]], i64 0) +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } poison, <vscale x 2 x i32> [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 2 x i32> @llvm.vector.extract.nxv2i32.nxv4i32(<vscale x 4 x i32> [[ASMRESULT]], i64 2) +// CHECK-NEXT: [[TMP4:%.*]] = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } [[TMP2]], <vscale x 2 x i32> [[TMP3]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 2 x i32> @llvm.vector.extract.nxv2i32.nxv4i32(<vscale x 4 x i32> [[ASMRESULT1]], i64 0) +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } poison, <vscale x 2 x i32> [[TMP5]], 0 +// CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 2 x i32> @llvm.vector.extract.nxv2i32.nxv4i32(<vscale x 4 x i32> [[ASMRESULT1]], i64 2) +// CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } [[TMP6]], <vscale x 2 x i32> [[TMP7]], 1 +// CHECK-NEXT: ret void +// +void bar() { + vint32m1x2_t v0, v2; + asm ("#NOP" : "=vr" (v0), "=vr" (v2)); +} _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits