llvmorg-github-actions[bot] wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang-codegen Author: Bill Wendling (bwendling) <details> <summary>Changes</summary> Implement Justin's suggestion, which was far better than what we had. --- Patch is 48.29 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/199377.diff 2 Files Affected: - (modified) clang/lib/CodeGen/CGStmt.cpp (+415-352) - (modified) clang/lib/CodeGen/CodeGenFunction.h (+2-80) ``````````diff diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp index b70667d04d1f6..30756180ebafa 100644 --- a/clang/lib/CodeGen/CGStmt.cpp +++ b/clang/lib/CodeGen/CGStmt.cpp @@ -2605,185 +2605,242 @@ static llvm::MDNode *getAsmSrcLocInfo(const StringLiteral *Str, return llvm::MDNode::get(CGF.getLLVMContext(), Locs); } -void CodeGenFunction::UpdateAsmCallInst( - const AsmStmt &S, llvm::CallBase &Result, const AsmConstraintsInfo &AsmInfo, - bool HasSideEffect, bool HasUnwindClobber, bool NoMerge, bool NoConvergent, - std::vector<llvm::Value *> &RegResults) { - if (!HasUnwindClobber) - Result.addFnAttr(llvm::Attribute::NoUnwind); +namespace clang { - if (NoMerge) - Result.addFnAttr(llvm::Attribute::NoMerge); +/// This structure holds the information gathered about the constraints for an +/// inline assembly statement. It helps in separating the constraint processing +/// from the code generation. +class AsmConstraintsInfo { + CodeGenFunction &CGF; + CodeGenModule &CGM; // Per-module state. + const AsmStmt &S; + CGBuilderTy &Builder; - // Attach readnone and readonly attributes. - if (!HasSideEffect) { - if (AsmInfo.ReadNone) - Result.setDoesNotAccessMemory(); - else if (AsmInfo.ReadOnly) - Result.setOnlyReadsMemory(); - } + // The final asm string. + std::string AsmString; - // Add elementtype attribute for indirect constraints. - for (auto Pair : llvm::enumerate(AsmInfo.ArgElemTypes)) { - if (Pair.value()) { - auto Attr = llvm::Attribute::get( - getLLVMContext(), llvm::Attribute::ElementType, Pair.value()); - Result.addParamAttr(Pair.index(), Attr); - } - } + // The output and input constraints. + SmallVector<TargetInfo::ConstraintInfo, 4> OutputConstraintInfos; + SmallVector<TargetInfo::ConstraintInfo, 4> InputConstraintInfos; - // Slap the source location of the inline asm into a !srcloc metadata on the - // call. - const StringLiteral *SL; - if (const auto *gccAsmStmt = dyn_cast<GCCAsmStmt>(&S); - gccAsmStmt && - (SL = dyn_cast<StringLiteral>(gccAsmStmt->getAsmStringExpr()))) { - Result.setMetadata("srcloc", getAsmSrcLocInfo(SL, *this)); - } else { - // At least put the line number on MS inline asm blobs and GCC asm constexpr - // strings. - llvm::Constant *Loc = - llvm::ConstantInt::get(Int64Ty, S.getAsmLoc().getRawEncoding()); - Result.setMetadata("srcloc", - llvm::MDNode::get(getLLVMContext(), - llvm::ConstantAsMetadata::get(Loc))); - } + // Constraint strings. + std::string Constraints; + std::string InOutConstraints; - // Make inline-asm calls Key for the debug info feature Key Instructions. - addInstToNewSourceAtom(&Result, nullptr); + // Keep track of out constraints for tied input operand. + std::vector<std::string> OutputConstraints; - if (!NoConvergent && getLangOpts().assumeFunctionsAreConvergent()) - // Conservatively, mark all inline asm blocks in CUDA or OpenCL as - // convergent (meaning, they may call an intrinsically convergent op, such - // as bar.sync, and so can't have certain optimizations applied around - // them) unless it's explicitly marked 'noconvergent'. - Result.addFnAttr(llvm::Attribute::Convergent); - // Extract all of the register value results from the asm. - if (AsmInfo.ResultRegTypes.size() == 1) { - RegResults.push_back(&Result); - } else { - for (unsigned i = 0, e = AsmInfo.ResultRegTypes.size(); i != e; ++i) { - llvm::Value *Tmp = Builder.CreateExtractValue(&Result, i, "asmresult"); - RegResults.push_back(Tmp); - } - } -} + // Keep track of argument types. + std::vector<llvm::Value *> Args; + std::vector<llvm::Type *> ArgTypes; + std::vector<llvm::Type *> ArgElemTypes; -void CodeGenFunction::EmitAsmStores( - const AsmStmt &S, const llvm::ArrayRef<llvm::Value *> RegResults, - const AsmConstraintsInfo &AsmInfo) { - llvm::LLVMContext &CTX = getLLVMContext(); + // Keep track of result register constraints. + std::vector<LValue> ResultRegDests; + std::vector<QualType> ResultRegQualTys; + std::vector<llvm::Type *> ResultRegTypes; + std::vector<llvm::Type *> ResultTruncRegTypes; - assert(RegResults.size() == AsmInfo.ResultRegTypes.size()); - assert(RegResults.size() == AsmInfo.ResultTruncRegTypes.size()); - assert(RegResults.size() == AsmInfo.ResultRegDests.size()); + llvm::BitVector ResultTypeRequiresCast; - // ResultRegDests can also be populated by addReturnRegisterOutputs() above, - // in which case its size may grow. - assert(AsmInfo.ResultTypeRequiresCast.size() <= - AsmInfo.ResultRegDests.size()); - assert(AsmInfo.ResultBounds.size() <= AsmInfo.ResultRegDests.size()); + // Keep track of in/out constraints. + std::vector<llvm::Value *> InOutArgs; + std::vector<llvm::Type *> InOutArgTypes; + std::vector<llvm::Type *> InOutArgElemTypes; - for (unsigned i = 0, e = RegResults.size(); i != e; ++i) { - llvm::Value *Tmp = RegResults[i]; - llvm::Type *TruncTy = AsmInfo.ResultTruncRegTypes[i]; + // Destination blocks for 'asm gotos'. + llvm::BasicBlock *DefaultDest = nullptr; + SmallVector<llvm::BasicBlock *, 3> IndirectDests; - if (i < AsmInfo.ResultBounds.size() && - AsmInfo.ResultBounds[i].has_value()) { - const auto [LowerBound, UpperBound] = AsmInfo.ResultBounds[i].value(); + std::vector<std::optional<std::pair<unsigned, unsigned>>> ResultBounds; - // FIXME: Support for nonzero lower bounds not yet implemented. - assert(LowerBound == 0 && "Output operand lower bound is not zero."); + // An inline asm can be marked readonly if it meets the following + // conditions: + // + // - it doesn't have any sideeffects + // - it doesn't clobber memory + // - it doesn't return a value by-reference + // + // It can be marked readnone if it doesn't have any input memory + // constraints in addition to meeting the conditions listed above. + bool ReadOnly = true; + bool ReadNone = true; + + bool GetOutputAndInputConstraints(); + void HandleOutputConstraints(); + void HandleMSStyleAsmBlob(); + void HandleInputConstraints(); + bool HandleLabels(); + bool HandleClobbers(); + void UpdateAsmCallInst(llvm::CallBase &Result, bool HasSideEffect, + bool HasUnwindClobber, bool NoMerge, bool NoConvergent, + std::vector<llvm::Value *> &RegResults); + void EmitAsmStores(const llvm::ArrayRef<llvm::Value *> RegResults); + + void EmitHipStdParUnsupportedAsm() { + constexpr auto Name = "__ASM__hipstdpar_unsupported"; + + std::string Asm; + if (auto GCCAsm = dyn_cast<GCCAsmStmt>(&S)) + Asm = GCCAsm->getAsmString(); + + auto &Ctx = getLLVMContext(); + auto StrTy = llvm::ConstantDataArray::getString(Ctx, Asm); + auto FnTy = llvm::FunctionType::get(llvm::Type::getVoidTy(Ctx), + {StrTy->getType()}, false); + auto UBF = CGM.getModule().getOrInsertFunction(Name, FnTy); + + Builder.CreateCall(UBF, {StrTy}); + } + + ASTContext &getContext() { return CGF.getContext(); } + llvm::LLVMContext &getLLVMContext() { return CGF.getLLVMContext(); } + const TargetInfo &getTarget() const { return CGF.getTarget(); } + const LangOptions &getLangOpts() const { return CGF.getLangOpts(); } + const TargetCodeGenInfo &getTargetHooks() const { + return CGM.getTargetCodeGenInfo(); + } + +public: + AsmConstraintsInfo(CodeGenFunction &CGF, const AsmStmt &S) + : CGF(CGF), CGM(CGF.CGM), S(S), Builder(CGF.Builder), + AsmString(S.generateAsmString(CGF.getContext())) {} + + void EmitAsmStmt(); +}; - llvm::Constant *UpperBoundConst = - llvm::ConstantInt::get(Tmp->getType(), UpperBound); - llvm::Value *IsBooleanValue = - Builder.CreateCmp(llvm::CmpInst::ICMP_ULT, Tmp, UpperBoundConst); - llvm::Function *FnAssume = CGM.getIntrinsic(llvm::Intrinsic::assume); +} // namespace clang - Builder.CreateCall(FnAssume, IsBooleanValue); - } +void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { + // Pop all cleanup blocks at the end of the asm statement. + CodeGenFunction::RunCleanupsScope Cleanups(*this); - // If the result type of the LLVM IR asm doesn't match the result type of - // the expression, do the conversion. - if (AsmInfo.ResultRegTypes[i] != TruncTy) { - // Truncate the integer result to the right size, note that TruncTy can be - // a pointer. - if (TruncTy->isFloatingPointTy()) - Tmp = Builder.CreateFPTrunc(Tmp, TruncTy); - else if (TruncTy->isPointerTy() && Tmp->getType()->isIntegerTy()) { - uint64_t ResSize = CGM.getDataLayout().getTypeSizeInBits(TruncTy); - Tmp = Builder.CreateTrunc( - Tmp, llvm::IntegerType::get(CTX, (unsigned)ResSize)); - Tmp = Builder.CreateIntToPtr(Tmp, TruncTy); - } else if (Tmp->getType()->isPointerTy() && TruncTy->isIntegerTy()) { - uint64_t TmpSize = - CGM.getDataLayout().getTypeSizeInBits(Tmp->getType()); - Tmp = Builder.CreatePtrToInt( - Tmp, llvm::IntegerType::get(CTX, (unsigned)TmpSize)); - Tmp = Builder.CreateTrunc(Tmp, TruncTy); - } else if (Tmp->getType()->isIntegerTy() && TruncTy->isIntegerTy()) { - Tmp = Builder.CreateZExtOrTrunc(Tmp, TruncTy); - } else if (Tmp->getType()->isVectorTy() || TruncTy->isVectorTy()) { - Tmp = Builder.CreateBitCast(Tmp, TruncTy); - } - } + // Get all the output and input constraints together. + AsmConstraintsInfo AsmInfo(*this, S); + AsmInfo.EmitAsmStmt(); +} - ApplyAtomGroup Grp(getDebugInfo()); - LValue Dest = AsmInfo.ResultRegDests[i]; +void AsmConstraintsInfo::EmitAsmStmt() { + if (!GetOutputAndInputConstraints()) + return EmitHipStdParUnsupportedAsm(); - // ResultTypeRequiresCast elements correspond to the first - // ResultTypeRequiresCast.size() elements of RegResults. - if (i < AsmInfo.ResultTypeRequiresCast.size() && - AsmInfo.ResultTypeRequiresCast[i]) { - unsigned Size = getContext().getTypeSize(AsmInfo.ResultRegQualTys[i]); - Address A = Dest.getAddress().withElementType(AsmInfo.ResultRegTypes[i]); + // Handle output constraints. + HandleOutputConstraints(); - if (getTargetHooks().isScalarizableAsmOperand(*this, TruncTy)) { - llvm::StoreInst *S = Builder.CreateStore(Tmp, A); - addInstToCurrentSourceAtom(S, S->getValueOperand()); - continue; - } + // If this is a Microsoft-style asm blob, store the return registers (EAX:EDX) + // to the return value slot. Only do this when returning in registers. + HandleMSStyleAsmBlob(); - QualType Ty = getContext().getIntTypeForBitwidth(Size, /*Signed=*/false); - if (Ty.isNull()) { - const Expr *OutExpr = S.getOutputExpr(i); - CGM.getDiags().Report(OutExpr->getExprLoc(), - diag::err_store_value_to_reg); - return; - } + // Handle input constraints. + HandleInputConstraints(); - Dest = MakeAddrLValue(A, Ty); - } + // Handle 'asm goto' labels. + bool IsGCCAsmGoto = HandleLabels(); - EmitStoreThroughLValue(RValue::get(Tmp), Dest); + // Handle any clobbers. + bool HasUnwindClobber = HandleClobbers(); + assert(!(HasUnwindClobber && IsGCCAsmGoto) && + "unwind clobber can't be used with asm goto"); + + // Add machine specific clobbers + std::string_view MachineClobbers = getTarget().getClobbers(); + if (!MachineClobbers.empty()) { + if (!Constraints.empty()) + Constraints += ','; + Constraints += MachineClobbers; } -} -static void EmitHipStdParUnsupportedAsm(CodeGenFunction *CGF, - const AsmStmt &S) { - constexpr auto Name = "__ASM__hipstdpar_unsupported"; + llvm::Type *ResultType; + if (ResultRegTypes.empty()) + ResultType = CGF.VoidTy; + else if (ResultRegTypes.size() == 1) + ResultType = ResultRegTypes[0]; + else + ResultType = llvm::StructType::get(getLLVMContext(), ResultRegTypes); + + llvm::FunctionType *FTy = + llvm::FunctionType::get(ResultType, ArgTypes, false); + + bool HasSideEffect = S.isVolatile() || S.getNumOutputs() == 0; + + llvm::InlineAsm::AsmDialect GnuAsmDialect = + CGM.getCodeGenOpts().getInlineAsmDialect() == CodeGenOptions::IAD_ATT + ? llvm::InlineAsm::AD_ATT + : llvm::InlineAsm::AD_Intel; + llvm::InlineAsm::AsmDialect AsmDialect = + isa<MSAsmStmt>(&S) ? llvm::InlineAsm::AD_Intel : GnuAsmDialect; + + llvm::InlineAsm *IA = llvm::InlineAsm::get( + FTy, AsmString, Constraints, HasSideEffect, + /* IsAlignStack */ false, AsmDialect, HasUnwindClobber); + std::vector<llvm::Value *> RegResults; + llvm::CallBrInst *CBR; + llvm::DenseMap<llvm::BasicBlock *, SmallVector<llvm::Value *, 4>> + CBRRegResults; + + if (IsGCCAsmGoto) { + CBR = Builder.CreateCallBr(IA, DefaultDest, IndirectDests, Args); + CGF.EmitBlock(DefaultDest); + UpdateAsmCallInst(*CBR, HasSideEffect, + /*HasUnwindClobber=*/false, CGF.InNoMergeAttributedStmt, + CGF.InNoConvergentAttributedStmt, RegResults); + + // Because we are emitting code top to bottom, we don't have enough + // information at this point to know precisely whether we have a critical + // edge. If we have outputs, split all indirect destinations. + if (!RegResults.empty()) { + unsigned I = 0; + for (llvm::BasicBlock *Dest : CBR->getIndirectDests()) { + llvm::Twine SynthName = Dest->getName() + ".split"; + llvm::BasicBlock *SynthBB = CGF.createBasicBlock(SynthName); + llvm::IRBuilderBase::InsertPointGuard IPG(Builder); + Builder.SetInsertPoint(SynthBB); + + if (ResultRegTypes.size() == 1) { + CBRRegResults[SynthBB].push_back(CBR); + } else { + for (unsigned J = 0, E = ResultRegTypes.size(); J != E; ++J) { + llvm::Value *Tmp = Builder.CreateExtractValue(CBR, J, "asmresult"); + CBRRegResults[SynthBB].push_back(Tmp); + } + } - std::string Asm; - if (auto GCCAsm = dyn_cast<GCCAsmStmt>(&S)) - Asm = GCCAsm->getAsmString(); + CGF.EmitBranch(Dest); + CGF.EmitBlock(SynthBB); + CBR->setIndirectDest(I++, SynthBB); + } + } + } else if (HasUnwindClobber) { + llvm::CallBase *Result = CGF.EmitCallOrInvoke(IA, Args, ""); + UpdateAsmCallInst(*Result, HasSideEffect, + /*HasUnwindClobber=*/true, CGF.InNoMergeAttributedStmt, + CGF.InNoConvergentAttributedStmt, RegResults); + } else { + llvm::CallInst *Result = + Builder.CreateCall(IA, Args, CGF.getBundlesForFunclet(IA)); + UpdateAsmCallInst(*Result, HasSideEffect, + /*HasUnwindClobber=*/false, CGF.InNoMergeAttributedStmt, + CGF.InNoConvergentAttributedStmt, RegResults); + } - auto &Ctx = CGF->CGM.getLLVMContext(); - auto StrTy = llvm::ConstantDataArray::getString(Ctx, Asm); - auto FnTy = llvm::FunctionType::get(llvm::Type::getVoidTy(Ctx), - {StrTy->getType()}, false); - auto UBF = CGF->CGM.getModule().getOrInsertFunction(Name, FnTy); + EmitAsmStores(RegResults); - CGF->Builder.CreateCall(UBF, {StrTy}); + // If this is an asm goto with outputs, repeat EmitAsmStores, but with a + // different insertion point; one for each indirect destination and with + // CBRRegResults rather than RegResults. + if (IsGCCAsmGoto && !CBRRegResults.empty()) { + for (llvm::BasicBlock *Succ : CBR->getIndirectDests()) { + llvm::IRBuilderBase::InsertPointGuard IPG(Builder); + Builder.SetInsertPoint(Succ, --(Succ->end())); + EmitAsmStores(CBRRegResults[Succ]); + } + } } /// Gather and validate the output and input constraints for the given inline /// assembly statement. This ensures that the constraints are valid for the /// target and prepares them for further processing. -bool CodeGenFunction::GetOutputAndInputConstraints( - const AsmStmt &S, - SmallVectorImpl<TargetInfo::ConstraintInfo> &OutputConstraintInfos, - SmallVectorImpl<TargetInfo::ConstraintInfo> &InputConstraintInfos) { +bool AsmConstraintsInfo::GetOutputAndInputConstraints() { bool IsValidTargetAsm = true; bool IsHipStdPar = getLangOpts().HIPStdPar && getLangOpts().CUDAIsDevice; for (unsigned I = 0, E = S.getNumOutputs(); I != E && IsValidTargetAsm; I++) { @@ -2826,18 +2883,17 @@ bool CodeGenFunction::GetOutputAndInputConstraints( /// handles the complexity of determining whether an output should be a /// register or memory operand, manages tied operands, and prepares the /// necessary arguments for the LLVM inline asm call. -void CodeGenFunction::HandleOutputConstraints(const AsmStmt &S, - AsmConstraintsInfo &AsmInfo) { +void AsmConstraintsInfo::HandleOutputConstraints() { // Keep track of defined physregs. llvm::SmallSet<std::string, 8> PhysRegOutputs; for (unsigned I = 0, E = S.getNumOutputs(); I != E; I++) { - TargetInfo::ConstraintInfo &Info = AsmInfo.OutputConstraintInfos[I]; + TargetInfo::ConstraintInfo &Info = OutputConstraintInfos[I]; // Simplify the output constraint. std::string OutputConstraint(S.getOutputConstraint(I)); OutputConstraint = getTarget().simplifyConstraint( - StringRef(OutputConstraint).substr(1), &AsmInfo.OutputConstraintInfos); + StringRef(OutputConstraint).substr(1), &OutputConstraintInfos); const Expr *OutExpr = S.getOutputExpr(I); OutExpr = OutExpr->IgnoreParenNoopCasts(getContext()); @@ -2854,32 +2910,33 @@ void CodeGenFunction::HandleOutputConstraints(const AsmStmt &S, if (!GCCReg.empty() && !PhysRegOutputs.insert(GCCReg).second) CGM.Error(S.getAsmLoc(), "multiple outputs to hard register: " + GCCReg); - AsmInfo.OutputConstraints.push_back(OutputConstraint); - LValue Dest = EmitLValue(OutExpr); - if (!AsmInfo.Constraints.empty()) - AsmInfo.Constraints += ','; + OutputConstraints.push_back(OutputConstraint); + LValue Dest = CGF.EmitLValue(OutExpr); + if (!Constraints.empty()) + Constraints += ','; // If this is a register output, then make the inline asm return it // by-value. If this is a memory result, return the value by-reference. QualType QTy = OutExpr->getType(); const bool IsScalarOrAggregate = - hasScalarEvaluationKind(QTy) || hasAggregateEvaluationKind(QTy); + CodeGenFunction::hasScalarEvaluationKind(QTy) || + CodeGenFunction::hasAggregateEvaluationKind(QTy); if (!Info.allowsMemory() && IsScalarOrAggregate) { - AsmInfo.Constraints += "=" + OutputConstraint; - AsmInfo.ResultRegQualTys.push_back(QTy); - AsmInfo.ResultRegDests.push_back(Dest); + Constraints += "=" + OutputConstraint; + ResultRegQualTys.push_back(QTy); + ResultRegDests.push_back(Dest); - AsmInfo.ResultBounds.emplace_back(Info.getOutputOperandBounds()); + ResultBounds.emplace_back(Info.getOutputOperandBounds()); - llvm::Type *Ty = ConvertTypeForMem(QTy); + llvm::Type *Ty = CGF.ConvertTypeForMem(QTy); const bool RequiresCast = Info.allowsRegister() && - (getTargetHooks().isScalarizableAsmOperand(*this, Ty) || + (getTargetHooks().isScalarizableAsmOperand(CGF, Ty) || Ty->isAggregateType()); - AsmInfo.ResultTruncRegTypes.push_back(Ty); - AsmInfo.ResultTypeRequiresCast.push_back(RequiresCast); + ResultTruncRegTypes.push_back(Ty); + ResultTypeRequiresCast.push_back(RequiresCast); if (RequiresCast) { if (unsigned Size = getContext().getTypeSize(QTy)) @@ -2888,7 +2945,7 @@ void CodeGenFunction::HandleOutputConstraints(const AsmStmt &S, CGM.Error(OutExpr->getExprLoc(), "output size should not be zero"); } - AsmInfo.ResultRegTypes.push_back(Ty); + ResultRegTypes.push_back(Ty); // If this output is tied to an input, and if the input is larger, then // we need to set the actual result type of the inline asm node to be the @@ -2896,8 +2953,7 @@ void CodeGenFunction::HandleOutputConstraints(const AsmStmt &S, if (Info.hasMatchingInput()) { unsigned InputNo; for (InputNo = 0; InputNo != S.getNumInputs(); ++InputNo) { - Ta... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/199377 _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
