================ @@ -4227,8 +4227,106 @@ static const omp::GV &getGridValue(Function *Kernel) { llvm_unreachable("No grid value available for this architecture!"); } +static MDNode *getNVPTXMDNode(Function &Kernel, StringRef Name) { + Module &M = *Kernel.getParent(); + NamedMDNode *MD = M.getOrInsertNamedMetadata("nvvm.annotations"); + for (auto *Op : MD->operands()) { + if (Op->getNumOperands() != 3) + continue; + auto *KernelOp = dyn_cast<ConstantAsMetadata>(Op->getOperand(0)); + if (!KernelOp || KernelOp->getValue() != &Kernel) + continue; + auto *Prop = dyn_cast<MDString>(Op->getOperand(1)); + if (!Prop || Prop->getString() != Name) + continue; + return Op; + } + return nullptr; +} + +static void updateNVPTXMetadata(Function &Kernel, StringRef Name, int32_t Value, + bool Min) { + // Update the "maxntidx" metadata for NVIDIA, or add it. + MDNode *ExistingOp = getNVPTXMDNode(Kernel, Name); + if (ExistingOp) { + auto *OldVal = dyn_cast<ConstantAsMetadata>(ExistingOp->getOperand(2)); + int32_t OldLimit = cast<ConstantInt>(OldVal->getValue())->getZExtValue(); + ExistingOp->replaceOperandWith( + 2, ConstantAsMetadata::get(ConstantInt::get( + OldVal->getValue()->getType(), + Min ? std::min(OldLimit, Value) : std::max(OldLimit, Value)))); + } else { + LLVMContext &Ctx = Kernel.getContext(); + Metadata *MDVals[] = {ConstantAsMetadata::get(&Kernel), + MDString::get(Ctx, Name), + ConstantAsMetadata::get( + ConstantInt::get(Type::getInt32Ty(Ctx), Value))}; + // Append metadata to nvvm.annotations + Module &M = *Kernel.getParent(); + NamedMDNode *MD = M.getOrInsertNamedMetadata("nvvm.annotations"); + MD->addOperand(MDNode::get(Ctx, MDVals)); + } +} + +std::pair<int32_t, int32_t> +OpenMPIRBuilder::readThreadBoundsForKernel(Function &Kernel) { + int32_t ThreadLimit = + Kernel.getFnAttributeAsParsedInteger("omp_target_thread_limit"); + + bool IsAMDGPU = Kernel.getCallingConv() == CallingConv::AMDGPU_KERNEL; + if (IsAMDGPU) { + const auto &Attr = Kernel.getFnAttribute("amdgpu-flat-work-group-size"); + if (!Attr.isValid() || !Attr.isStringAttribute()) + return {0, ThreadLimit}; + auto [LBStr, UBStr] = Attr.getValueAsString().split(','); + int32_t LB, UB; + if (!llvm::to_integer(UBStr, UB, 10)) + return {0, ThreadLimit}; + UB = ThreadLimit ? std::min(ThreadLimit, UB) : UB; + if (!llvm::to_integer(LBStr, LB, 10)) + return {0, UB}; + return {LB, UB}; + } + + if (MDNode *ExistingOp = getNVPTXMDNode(Kernel, "maxntidx")) { + auto *OldVal = dyn_cast<ConstantAsMetadata>(ExistingOp->getOperand(2)); + int32_t UB = cast<ConstantInt>(OldVal->getValue())->getZExtValue(); + return {0, ThreadLimit ? std::min(ThreadLimit, UB) : UB}; + } + return {0, ThreadLimit}; +} + +void OpenMPIRBuilder::writeThreadBoundsForKernel(Function &Kernel, int32_t LB, + int32_t UB) { + Kernel.addFnAttr("omp_target_thread_limit", std::to_string(UB)); + + bool IsAMDGPU = Kernel.getCallingConv() == CallingConv::AMDGPU_KERNEL; + if (IsAMDGPU) { + Kernel.addFnAttr("amdgpu-flat-work-group-size", + llvm::utostr(LB) + "," + llvm::utostr(UB)); + return; + } + + updateNVPTXMetadata(Kernel, "maxntidx", UB, true); +} + +std::pair<int32_t, int32_t> +OpenMPIRBuilder::readTeamBoundsForKernel(Function &Kernel) { + // TODO add A backend annotations ---------------- jhuber6 wrote:
Grammar https://github.com/llvm/llvm-project/pull/70247 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits