https://github.com/RiverDave updated https://github.com/llvm/llvm-project/pull/179084
>From e903b680c9668919ba6c67b8a21bce88c56adb93 Mon Sep 17 00:00:00 2001 From: David Rivera <[email protected]> Date: Fri, 13 Mar 2026 01:18:16 -0400 Subject: [PATCH 1/4] [CIR][AMDGPU] Lower Language specific address spaces and implement AMDGPU target --- clang/lib/CIR/CodeGen/CIRGenModule.cpp | 10 +- clang/lib/CIR/CodeGen/TargetInfo.cpp | 46 ++++ clang/lib/CIR/CodeGen/TargetInfo.h | 3 + .../CIR/Dialect/Transforms/TargetLowering.cpp | 253 +++++++++++++++++- .../Transforms/TargetLowering/CMakeLists.txt | 1 + .../Transforms/TargetLowering/LowerModule.cpp | 11 +- .../TargetLowering/TargetLoweringInfo.h | 10 + .../TargetLowering/Targets/AMDGPU.cpp | 47 ++++ .../CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 5 +- .../CIR/CodeGen/amdgpu-address-spaces.cpp | 51 ++++ .../CIR/Lowering/global-address-space.cir | 57 +++- 11 files changed, 476 insertions(+), 18 deletions(-) create mode 100644 clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/AMDGPU.cpp create mode 100644 clang/test/CIR/CodeGen/amdgpu-address-spaces.cpp diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp b/clang/lib/CIR/CodeGen/CIRGenModule.cpp index 2e16998b04a7b..4c48541850e24 100644 --- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp @@ -260,6 +260,9 @@ const TargetCIRGenInfo &CIRGenModule::getTargetCIRGenInfo() { case llvm::Triple::nvptx64: theTargetCIRGenInfo = createNVPTXTargetCIRGenInfo(genTypes); return *theTargetCIRGenInfo; + case llvm::Triple::amdgcn: { + return *(theTargetCIRGenInfo = createAMDGPUTargetCIRGenInfo(genTypes)); + } } } @@ -727,6 +730,9 @@ cir::GlobalOp CIRGenModule::getOrCreateCIRGlobal(StringRef mangledName, mlir::Type ty, LangAS langAS, const VarDecl *d, ForDefinition_t isForDefinition) { + + mlir::ptr::MemorySpaceAttrInterface cirAS = + cir::toCIRAddressSpaceAttr(getMLIRContext(), langAS); // Lookup the entry, lazily creating it if necessary. cir::GlobalOp entry; if (mlir::Operation *v = getGlobalValue(mangledName)) { @@ -736,13 +742,13 @@ CIRGenModule::getOrCreateCIRGlobal(StringRef mangledName, mlir::Type ty, } if (entry) { + mlir::ptr::MemorySpaceAttrInterface entryCIRAS = entry.getAddrSpaceAttr(); assert(!cir::MissingFeatures::opGlobalWeakRef()); assert(!cir::MissingFeatures::setDLLStorageClass()); assert(!cir::MissingFeatures::openMP()); - if (entry.getSymType() == ty && - (cir::isMatchingAddressSpace(entry.getAddrSpaceAttr(), langAS))) + if (entry.getSymType() == ty && entryCIRAS == cirAS) return entry; // If there are two attempts to define the same mangled name, issue an diff --git a/clang/lib/CIR/CodeGen/TargetInfo.cpp b/clang/lib/CIR/CodeGen/TargetInfo.cpp index 6570976e0dfeb..e593322643a13 100644 --- a/clang/lib/CIR/CodeGen/TargetInfo.cpp +++ b/clang/lib/CIR/CodeGen/TargetInfo.cpp @@ -54,7 +54,48 @@ class X8664TargetCIRGenInfo : public TargetCIRGenInfo { X8664TargetCIRGenInfo(CIRGenTypes &cgt) : TargetCIRGenInfo(std::make_unique<X8664ABIInfo>(cgt)) {} }; +class AMDGPUABIInfo : public ABIInfo { +public: + AMDGPUABIInfo(CIRGenTypes &cgt) : ABIInfo(cgt) {} +}; + +class AMDGPUTargetCIRGenInfo : public TargetCIRGenInfo { +public: + AMDGPUTargetCIRGenInfo(CIRGenTypes &cgt) + : TargetCIRGenInfo(std::make_unique<AMDGPUABIInfo>(cgt)) {} + + clang::LangAS + getGlobalVarAddressSpace(CIRGenModule &cgm, + const clang::VarDecl *decl) const override { + using clang::LangAS; + assert(!cgm.getLangOpts().OpenCL && + !(cgm.getLangOpts().CUDA && cgm.getLangOpts().CUDAIsDevice) && + "Address space agnostic languages only"); + LangAS defaultGlobalAS = LangAS::opencl_global; + if (!decl) + return defaultGlobalAS; + + LangAS addrSpace = decl->getType().getAddressSpace(); + if (addrSpace != LangAS::Default) + return addrSpace; + + // Only promote to address space 4 if VarDecl has constant initialization. + if (decl->getType().isConstantStorage(cgm.getASTContext(), false, false) && + decl->hasConstantInitialization()) { + if (auto constAS = cgm.getTarget().getConstantAddressSpace()) + return *constAS; + } + + return defaultGlobalAS; + } + mlir::ptr::MemorySpaceAttrInterface + getCIRAllocaAddressSpace() const override { + return cir::LangAddressSpaceAttr::get( + &getABIInfo().cgt.getMLIRContext(), + cir::LangAddressSpace::OffloadPrivate); + } +}; } // namespace namespace { @@ -76,6 +117,11 @@ clang::CIRGen::createNVPTXTargetCIRGenInfo(CIRGenTypes &cgt) { return std::make_unique<NVPTXTargetCIRGenInfo>(cgt); } +std::unique_ptr<TargetCIRGenInfo> +clang::CIRGen::createAMDGPUTargetCIRGenInfo(CIRGenTypes &cgt) { + return std::make_unique<AMDGPUTargetCIRGenInfo>(cgt); +} + std::unique_ptr<TargetCIRGenInfo> clang::CIRGen::createX8664TargetCIRGenInfo(CIRGenTypes &cgt) { return std::make_unique<X8664TargetCIRGenInfo>(cgt); diff --git a/clang/lib/CIR/CodeGen/TargetInfo.h b/clang/lib/CIR/CodeGen/TargetInfo.h index 161325c8668e8..df24767918469 100644 --- a/clang/lib/CIR/CodeGen/TargetInfo.h +++ b/clang/lib/CIR/CodeGen/TargetInfo.h @@ -123,6 +123,9 @@ class TargetCIRGenInfo { } }; +std::unique_ptr<TargetCIRGenInfo> +createAMDGPUTargetCIRGenInfo(CIRGenTypes &cgt); + std::unique_ptr<TargetCIRGenInfo> createX8664TargetCIRGenInfo(CIRGenTypes &cgt); std::unique_ptr<TargetCIRGenInfo> createNVPTXTargetCIRGenInfo(CIRGenTypes &cgt); diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering.cpp b/clang/lib/CIR/Dialect/Transforms/TargetLowering.cpp index c3ed588cf06dc..5249107376e67 100644 --- a/clang/lib/CIR/Dialect/Transforms/TargetLowering.cpp +++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering.cpp @@ -11,10 +11,15 @@ //===----------------------------------------------------------------------===// #include "TargetLowering/LowerModule.h" +#include "TargetLowering/TargetLoweringInfo.h" +#include "mlir/IR/PatternMatch.h" #include "mlir/Support/LLVM.h" +#include "mlir/Transforms/DialectConversion.h" +#include "clang/CIR/Dialect/IR/CIRAttrs.h" +#include "clang/CIR/Dialect/IR/CIRDialect.h" +#include "clang/CIR/Dialect/IR/CIRTypes.h" #include "clang/CIR/Dialect/Passes.h" -#include "llvm/ADT/TypeSwitch.h" using namespace mlir; using namespace cir; @@ -32,6 +37,157 @@ struct TargetLoweringPass void runOnOperation() override; }; +/// A generic target lowering pattern that matches any CIR op whose operand or +/// result types need address space conversion. Clones the op with converted +/// types. +class CIRGenericTargetLoweringPattern : public mlir::ConversionPattern { +public: + CIRGenericTargetLoweringPattern(mlir::MLIRContext *context, + const mlir::TypeConverter &typeConverter) + : mlir::ConversionPattern(typeConverter, MatchAnyOpTypeTag(), + /*benefit=*/1, context) {} + + mlir::LogicalResult + matchAndRewrite(mlir::Operation *op, llvm::ArrayRef<mlir::Value> operands, + mlir::ConversionPatternRewriter &rewriter) const override { + // Do not match on operations that have dedicated lowering patterns. + if (llvm::isa<cir::FuncOp, cir::GlobalOp>(op)) + return mlir::failure(); + + const mlir::TypeConverter *typeConverter = getTypeConverter(); + assert(typeConverter && + "CIRGenericTargetLoweringPattern requires a type converter"); + bool operandsAndResultsLegal = typeConverter->isLegal(op); + bool regionsLegal = + std::all_of(op->getRegions().begin(), op->getRegions().end(), + [typeConverter](mlir::Region ®ion) { + return typeConverter->isLegal(®ion); + }); + if (operandsAndResultsLegal && regionsLegal) + return mlir::failure(); + + assert(op->getNumRegions() == 0 && + "CIRGenericTargetLoweringPattern cannot " + "deal with operations with regions"); + + mlir::OperationState loweredOpState(op->getLoc(), op->getName()); + loweredOpState.addOperands(operands); + loweredOpState.addAttributes(op->getAttrs()); + loweredOpState.addSuccessors(op->getSuccessors()); + + llvm::SmallVector<mlir::Type> loweredResultTypes; + loweredResultTypes.reserve(op->getNumResults()); + for (mlir::Type result : op->getResultTypes()) + loweredResultTypes.push_back(typeConverter->convertType(result)); + loweredOpState.addTypes(loweredResultTypes); + + for (mlir::Region ®ion : op->getRegions()) { + mlir::Region *loweredRegion = loweredOpState.addRegion(); + rewriter.inlineRegionBefore(region, *loweredRegion, loweredRegion->end()); + if (mlir::failed( + rewriter.convertRegionTypes(loweredRegion, *getTypeConverter()))) + return mlir::failure(); + } + + mlir::Operation *loweredOp = rewriter.create(loweredOpState); + rewriter.replaceOp(op, loweredOp); + return mlir::success(); + } +}; + +/// Pattern to lower GlobalOp address space attributes. GlobalOp carries +/// addr_space as a standalone attribute (not inside a type), so the +/// TypeConverter won't reach it automatically. +class CIRGlobalOpTargetLowering + : public mlir::OpConversionPattern<cir::GlobalOp> { + const cir::TargetLoweringInfo &targetInfo; + +public: + CIRGlobalOpTargetLowering(mlir::MLIRContext *context, + const mlir::TypeConverter &typeConverter, + const cir::TargetLoweringInfo &targetInfo) + : mlir::OpConversionPattern<cir::GlobalOp>(typeConverter, context, + /*benefit=*/1), + targetInfo(targetInfo) {} + + mlir::LogicalResult + matchAndRewrite(cir::GlobalOp op, OpAdaptor adaptor, + mlir::ConversionPatternRewriter &rewriter) const override { + mlir::Type loweredSymTy = getTypeConverter()->convertType(op.getSymType()); + if (!loweredSymTy) + return mlir::failure(); + + // Convert the addr_space attribute. + mlir::ptr::MemorySpaceAttrInterface addrSpace = op.getAddrSpaceAttr(); + if (auto langAS = mlir::dyn_cast_if_present<cir::LangAddressSpaceAttr>( + addrSpace)) { + unsigned targetAS = + targetInfo.getTargetAddrSpaceFromCIRAddrSpace(langAS.getValue()); + addrSpace = targetAS == 0 + ? nullptr + : cir::TargetAddressSpaceAttr::get(op.getContext(), + targetAS); + } + + // Only rewrite if something actually changed. + if (loweredSymTy == op.getSymType() && addrSpace == op.getAddrSpaceAttr()) + return mlir::failure(); + + auto newOp = mlir::cast<cir::GlobalOp>(rewriter.clone(*op.getOperation())); + newOp.setSymType(loweredSymTy); + newOp.setAddrSpaceAttr(addrSpace); + rewriter.replaceOp(op, newOp); + return mlir::success(); + } +}; + +/// Pattern to lower FuncOp types that contain address spaces. +class CIRFuncOpTargetLowering + : public mlir::OpConversionPattern<cir::FuncOp> { +public: + using mlir::OpConversionPattern<cir::FuncOp>::OpConversionPattern; + + mlir::LogicalResult + matchAndRewrite(cir::FuncOp op, OpAdaptor adaptor, + mlir::ConversionPatternRewriter &rewriter) const override { + cir::FuncType opFuncType = op.getFunctionType(); + mlir::TypeConverter::SignatureConversion signatureConversion( + opFuncType.getNumInputs()); + + for (const auto &[i, argType] : llvm::enumerate(opFuncType.getInputs())) { + mlir::Type loweredArgType = getTypeConverter()->convertType(argType); + if (!loweredArgType) + return mlir::failure(); + signatureConversion.addInputs(i, loweredArgType); + } + + mlir::Type loweredReturnType = + getTypeConverter()->convertType(opFuncType.getReturnType()); + if (!loweredReturnType) + return mlir::failure(); + + auto loweredFuncType = cir::FuncType::get( + signatureConversion.getConvertedTypes(), loweredReturnType, + /*isVarArg=*/opFuncType.getVarArg()); + + // Nothing changed, skip. + if (loweredFuncType == opFuncType) + return mlir::failure(); + + cir::FuncOp loweredFuncOp = rewriter.cloneWithoutRegions(op); + loweredFuncOp.setFunctionType(loweredFuncType); + rewriter.inlineRegionBefore(op.getBody(), loweredFuncOp.getBody(), + loweredFuncOp.end()); + if (mlir::failed(rewriter.convertRegionTypes( + &loweredFuncOp.getBody(), *getTypeConverter(), + &signatureConversion))) + return mlir::failure(); + + rewriter.eraseOp(op); + return mlir::success(); + } +}; + } // namespace static void convertSyncScopeIfPresent(mlir::Operation *op, @@ -47,6 +203,82 @@ static void convertSyncScopeIfPresent(mlir::Operation *op, } } +/// Prepare the type converter for the target lowering pass. +/// Converts LangAddressSpaceAttr → TargetAddressSpaceAttr inside pointer types. +static void +prepareTargetLoweringTypeConverter(mlir::TypeConverter &converter, + const cir::TargetLoweringInfo &targetInfo) { + converter.addConversion([](mlir::Type type) { return type; }); + + converter.addConversion( + [&converter, &targetInfo](cir::PointerType type) -> mlir::Type { + mlir::Type pointee = converter.convertType(type.getPointee()); + if (!pointee) + return {}; + auto addrSpace = type.getAddrSpace(); + if (auto langAS = + mlir::dyn_cast_if_present<cir::LangAddressSpaceAttr>( + addrSpace)) { + unsigned targetAS = + targetInfo.getTargetAddrSpaceFromCIRAddrSpace(langAS.getValue()); + addrSpace = + targetAS == 0 + ? nullptr + : cir::TargetAddressSpaceAttr::get(type.getContext(), + targetAS); + } + return cir::PointerType::get(type.getContext(), pointee, addrSpace); + }); + + converter.addConversion([&converter](cir::ArrayType type) -> mlir::Type { + mlir::Type loweredElementType = + converter.convertType(type.getElementType()); + if (!loweredElementType) + return {}; + return cir::ArrayType::get(loweredElementType, type.getSize()); + }); + + converter.addConversion([&converter](cir::FuncType type) -> mlir::Type { + llvm::SmallVector<mlir::Type> loweredInputTypes; + loweredInputTypes.reserve(type.getNumInputs()); + if (mlir::failed( + converter.convertTypes(type.getInputs(), loweredInputTypes))) + return {}; + + mlir::Type loweredReturnType = converter.convertType(type.getReturnType()); + if (!loweredReturnType) + return {}; + + return cir::FuncType::get(loweredInputTypes, loweredReturnType, + /*isVarArg=*/type.getVarArg()); + }); +} + +static void populateTargetLoweringConversionTarget( + mlir::ConversionTarget &target, const mlir::TypeConverter &tc) { + target.addLegalOp<mlir::ModuleOp>(); + + target.addDynamicallyLegalDialect<cir::CIRDialect>( + [&tc](mlir::Operation *op) { + if (!tc.isLegal(op)) + return false; + return std::all_of(op->getRegions().begin(), op->getRegions().end(), + [&tc](mlir::Region ®ion) { + return tc.isLegal(®ion); + }); + }); + + target.addDynamicallyLegalOp<cir::FuncOp>( + [&tc](cir::FuncOp op) { return tc.isLegal(op.getFunctionType()); }); + + target.addDynamicallyLegalOp<cir::GlobalOp>([&tc](cir::GlobalOp op) { + if (!tc.isLegal(op.getSymType())) + return false; + return !mlir::isa_and_present<cir::LangAddressSpaceAttr>( + op.getAddrSpaceAttr()); + }); +} + void TargetLoweringPass::runOnOperation() { auto mod = mlir::cast<mlir::ModuleOp>(getOperation()); std::unique_ptr<cir::LowerModule> lowerModule = cir::createLowerModule(mod); @@ -57,11 +289,30 @@ void TargetLoweringPass::runOnOperation() { return; } + const auto &targetInfo = lowerModule->getTargetLoweringInfo(); + mod->walk([&](mlir::Operation *op) { if (mlir::isa<cir::LoadOp, cir::StoreOp, cir::AtomicXchgOp, cir::AtomicCmpXchgOp, cir::AtomicFetchOp>(op)) convertSyncScopeIfPresent(op, *lowerModule); }); + + // Address space conversion: LangAddressSpaceAttr → TargetAddressSpaceAttr. + mlir::TypeConverter typeConverter; + prepareTargetLoweringTypeConverter(typeConverter, targetInfo); + + mlir::RewritePatternSet patterns(mod.getContext()); + patterns.add<CIRGlobalOpTargetLowering>(mod.getContext(), typeConverter, + targetInfo); + patterns.add<CIRFuncOpTargetLowering>(typeConverter, mod.getContext()); + patterns.add<CIRGenericTargetLoweringPattern>(mod.getContext(), + typeConverter); + + mlir::ConversionTarget target(*mod.getContext()); + populateTargetLoweringConversionTarget(target, typeConverter); + + if (failed(mlir::applyPartialConversion(mod, target, std::move(patterns)))) + signalPassFailure(); } std::unique_ptr<Pass> mlir::createTargetLoweringPass() { diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/CMakeLists.txt b/clang/lib/CIR/Dialect/Transforms/TargetLowering/CMakeLists.txt index 92148127424e9..07e3a67f97859 100644 --- a/clang/lib/CIR/Dialect/Transforms/TargetLowering/CMakeLists.txt +++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering/CMakeLists.txt @@ -3,6 +3,7 @@ add_clang_library(MLIRCIRTargetLowering LowerModule.cpp LowerItaniumCXXABI.cpp TargetLoweringInfo.cpp + Targets/AMDGPU.cpp DEPENDS clangBasic diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/LowerModule.cpp b/clang/lib/CIR/Dialect/Transforms/TargetLowering/LowerModule.cpp index f2398e3105578..26e63b3b676ae 100644 --- a/clang/lib/CIR/Dialect/Transforms/TargetLowering/LowerModule.cpp +++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering/LowerModule.cpp @@ -45,8 +45,15 @@ static std::unique_ptr<CIRCXXABI> createCXXABI(LowerModule &lm) { static std::unique_ptr<TargetLoweringInfo> createTargetLoweringInfo(LowerModule &lm) { - assert(!cir::MissingFeatures::targetLoweringInfo()); - return std::make_unique<TargetLoweringInfo>(); + const llvm::Triple &triple = lm.getTarget().getTriple(); + + switch (triple.getArch()) { + case llvm::Triple::amdgcn: + return createAMDGPUTargetLoweringInfo(); + default: + assert(!cir::MissingFeatures::targetLoweringInfo()); + return std::make_unique<TargetLoweringInfo>(); + } } LowerModule::LowerModule(clang::LangOptions langOpts, diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/TargetLoweringInfo.h b/clang/lib/CIR/Dialect/Transforms/TargetLowering/TargetLoweringInfo.h index 760c3b0b7cc5e..a307bcb373dec 100644 --- a/clang/lib/CIR/Dialect/Transforms/TargetLowering/TargetLoweringInfo.h +++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering/TargetLoweringInfo.h @@ -15,6 +15,8 @@ #define LLVM_CLANG_LIB_CIR_DIALECT_TRANSFORMS_TARGETLOWERING_TARGETLOWERINGINFO_H #include "clang/CIR/Dialect/IR/CIROpsEnums.h" +#include <memory> +#include <string> namespace cir { @@ -24,8 +26,16 @@ class TargetLoweringInfo { virtual cir::SyncScopeKind convertSyncScope(cir::SyncScopeKind syncScope) const; + + virtual unsigned + getTargetAddrSpaceFromCIRAddrSpace(cir::LangAddressSpace addrSpace) const { + return 0; + }; }; +// Target-specific factory functions. +std::unique_ptr<TargetLoweringInfo> createAMDGPUTargetLoweringInfo(); + } // namespace cir #endif diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/AMDGPU.cpp b/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/AMDGPU.cpp new file mode 100644 index 0000000000000..058c1200531e5 --- /dev/null +++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/AMDGPU.cpp @@ -0,0 +1,47 @@ +//===- AMDGPU.cpp - Emit CIR for AMDGPU -----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "../TargetLoweringInfo.h" +#include "clang/CIR/Dialect/IR/CIROpsEnums.h" +#include "llvm/Support/ErrorHandling.h" + +namespace cir { + +namespace { + +class AMDGPUTargetLoweringInfo : public TargetLoweringInfo { +public: + // Address space mapping from: + // https://llvm.org/docs/AMDGPUUsage.html#address-spaces + unsigned getTargetAddrSpaceFromCIRAddrSpace( + cir::LangAddressSpace addrSpace) const override { + switch (addrSpace) { + case cir::LangAddressSpace::Default: + return 0; + case cir::LangAddressSpace::OffloadPrivate: + return 5; + case cir::LangAddressSpace::OffloadLocal: + return 3; + case cir::LangAddressSpace::OffloadGlobal: + return 1; + case cir::LangAddressSpace::OffloadConstant: + return 4; + case cir::LangAddressSpace::OffloadGeneric: + return 0; + } + llvm_unreachable("Unknown CIR address space for AMDGPU target"); + } +}; + +} // namespace + +std::unique_ptr<TargetLoweringInfo> createAMDGPUTargetLoweringInfo() { + return std::make_unique<AMDGPUTargetLoweringInfo>(); +} + +} // namespace cir diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp index b5a181c198993..edeea4bc6a115 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp @@ -3265,10 +3265,7 @@ static void prepareTypeConverter(mlir::LLVMTypeConverter &converter, mlir::ptr::MemorySpaceAttrInterface addrSpaceAttr = type.getAddrSpace(); unsigned numericAS = 0; - if (auto langAsAttr = - mlir::dyn_cast_if_present<cir::LangAddressSpaceAttr>(addrSpaceAttr)) - llvm_unreachable("lowering LangAddressSpaceAttr NYI"); - else if (auto targetAsAttr = + if (auto targetAsAttr = mlir::dyn_cast_if_present<cir::TargetAddressSpaceAttr>( addrSpaceAttr)) numericAS = targetAsAttr.getValue(); diff --git a/clang/test/CIR/CodeGen/amdgpu-address-spaces.cpp b/clang/test/CIR/CodeGen/amdgpu-address-spaces.cpp new file mode 100644 index 0000000000000..35ceed46189dc --- /dev/null +++ b/clang/test/CIR/CodeGen/amdgpu-address-spaces.cpp @@ -0,0 +1,51 @@ +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fclangir -emit-cir %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fclangir -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s + +// Test address space handling for AMDGPU target in C++ mode (non-OpenCL/HIP). +// This exercises getGlobalVarAddressSpace. + +// Test default address space for globals without explicit AS. +// For AMDGPU in non-OpenCL/HIP mode, globals default to AS 1 (global). +int globalVar = 123; + +// CIR-DAG: cir.global external lang_address_space(offload_global) @globalVar = #cir.int<123> : !s32i +// LLVM-DAG: @globalVar = addrspace(1) global i32 123, align 4 +// OGCG-DAG: @globalVar = addrspace(1) global i32 123, align 4 + +// Test non-const global array goes to global AS. +int globalArray[4] = {1, 2, 3, 4}; + +// CIR-DAG: cir.global external lang_address_space(offload_global) @globalArray = #cir.const_array<[#cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<4> : !s32i]> : !cir.array<!s32i x 4> +// LLVM-DAG: @globalArray = addrspace(1) global [4 x i32] [i32 1, i32 2, i32 3, i32 4], align 4 +// OGCG-DAG: @globalArray = addrspace(1) global [4 x i32] [i32 1, i32 2, i32 3, i32 4], align 4 + +// Test static global goes to global AS. +static int staticGlobal = 555; + +// CIR-DAG: cir.global "private" internal{{.*}}lang_address_space(offload_global) @_ZL12staticGlobal = #cir.int<555> : !s32i +// LLVM-DAG: @_ZL12staticGlobal = internal addrspace(1) global i32 555, align 4 +// OGCG-DAG: @_ZL12staticGlobal = internal addrspace(1) global i32 555, align 4 + +// Test constant initialization promotion to AS 4 (constant). +// Use extern to force emission since const globals are otherwise optimized away. +extern const int constGlobal = 456; + +// CIR-DAG: cir.global constant external target_address_space(4) @constGlobal = #cir.int<456> : !s32i +// LLVM-DAG: @constGlobal = addrspace(4) constant i32 456, align 4 +// OGCG-DAG: @constGlobal = addrspace(4) constant i32 456, align 4 + +// Test extern const array goes to constant AS. +extern const int constArray[3] = {10, 20, 30}; + +// CIR-DAG: cir.global constant external target_address_space(4) @constArray = #cir.const_array<[#cir.int<10> : !s32i, #cir.int<20> : !s32i, #cir.int<30> : !s32i]> : !cir.array<!s32i x 3> +// LLVM-DAG: @constArray = addrspace(4) constant [3 x i32] [i32 10, i32 20, i32 30], align 4 +// OGCG-DAG: @constArray = addrspace(4) constant [3 x i32] [i32 10, i32 20, i32 30], align 4 + +// Use the static variable to ensure it's emitted. +int getStaticGlobal() { return staticGlobal; } diff --git a/clang/test/CIR/Lowering/global-address-space.cir b/clang/test/CIR/Lowering/global-address-space.cir index c9f25e1126098..7161d6852acb2 100644 --- a/clang/test/CIR/Lowering/global-address-space.cir +++ b/clang/test/CIR/Lowering/global-address-space.cir @@ -3,12 +3,13 @@ !s32i = !cir.int<s, 32> -module { - cir.global external target_address_space(1) @global_as1 = #cir.int<42> : !s32i - // CHECK: llvm.mlir.global external @global_as1(42 : i32) {addr_space = 1 : i32} : i32 +module attributes { cir.triple = "amdgcn-amd-amdhsa" } { + // Target address space lowering (passthrough) + cir.global external target_address_space(1) @global_target_as1 = #cir.int<42> : !s32i + // CHECK: llvm.mlir.global external @global_target_as1(42 : i32) {addr_space = 1 : i32} : i32 - cir.global external target_address_space(3) @global_as3 = #cir.int<100> : !s32i - // CHECK: llvm.mlir.global external @global_as3(100 : i32) {addr_space = 3 : i32} : i32 + cir.global external target_address_space(3) @global_target_as3 = #cir.int<100> : !s32i + // CHECK: llvm.mlir.global external @global_target_as3(100 : i32) {addr_space = 3 : i32} : i32 cir.global external @global_default = #cir.int<0> : !s32i // CHECK: llvm.mlir.global external @global_default(0 : i32) {addr_space = 0 : i32} : i32 @@ -16,20 +17,20 @@ module { // Test cir.get_global with address space produces correct llvm.mlir.addressof type // CHECK-LABEL: llvm.func @test_get_global_as1 cir.func @test_get_global_as1() -> !s32i { - // CHECK: %[[ADDR:.*]] = llvm.mlir.addressof @global_as1 : !llvm.ptr<1> + // CHECK: %[[ADDR:.*]] = llvm.mlir.addressof @global_target_as1 : !llvm.ptr<1> // CHECK: %[[VAL:.*]] = llvm.load %[[ADDR]] {{.*}} : !llvm.ptr<1> -> i32 // CHECK: llvm.return %[[VAL]] : i32 - %0 = cir.get_global @global_as1 : !cir.ptr<!s32i, target_address_space(1)> + %0 = cir.get_global @global_target_as1 : !cir.ptr<!s32i, target_address_space(1)> %1 = cir.load %0 : !cir.ptr<!s32i, target_address_space(1)>, !s32i cir.return %1 : !s32i } // CHECK-LABEL: llvm.func @test_get_global_as3 cir.func @test_get_global_as3() -> !s32i { - // CHECK: %[[ADDR:.*]] = llvm.mlir.addressof @global_as3 : !llvm.ptr<3> + // CHECK: %[[ADDR:.*]] = llvm.mlir.addressof @global_target_as3 : !llvm.ptr<3> // CHECK: %[[VAL:.*]] = llvm.load %[[ADDR]] {{.*}} : !llvm.ptr<3> -> i32 // CHECK: llvm.return %[[VAL]] : i32 - %0 = cir.get_global @global_as3 : !cir.ptr<!s32i, target_address_space(3)> + %0 = cir.get_global @global_target_as3 : !cir.ptr<!s32i, target_address_space(3)> %1 = cir.load %0 : !cir.ptr<!s32i, target_address_space(3)>, !s32i cir.return %1 : !s32i } @@ -43,4 +44,42 @@ module { %1 = cir.load %0 : !cir.ptr<!s32i>, !s32i cir.return %1 : !s32i } + + // Language address space lowering (AMDGPU mapping) + // See: https://llvm.org/docs/AMDGPUUsage.html#address-spaces + // OffloadGlobal -> 1 + cir.global external lang_address_space(offload_global) @global_lang_global = #cir.int<1> : !s32i + // CHECK: llvm.mlir.global external @global_lang_global(1 : i32) {addr_space = 1 : i32} : i32 + + // OffloadLocal -> 3 + cir.global "private" internal lang_address_space(offload_local) @global_lang_local : !s32i + // CHECK: llvm.mlir.global internal @global_lang_local() {addr_space = 3 : i32} : i32 + + // OffloadConstant -> 4 + cir.global external lang_address_space(offload_constant) @global_lang_constant = #cir.int<2> : !s32i + // CHECK: llvm.mlir.global external @global_lang_constant(2 : i32) {addr_space = 4 : i32} : i32 + + // OffloadPrivate -> 5 + cir.global "private" internal lang_address_space(offload_private) @global_lang_private : !s32i + // CHECK: llvm.mlir.global internal @global_lang_private() {addr_space = 5 : i32} : i32 + + // OffloadGeneric -> 0 + cir.global external lang_address_space(offload_generic) @global_lang_generic = #cir.int<3> : !s32i + // CHECK: llvm.mlir.global external @global_lang_generic(3 : i32) {addr_space = 0 : i32} : i32 + + // Pointer type lowering with lang_address_space + // CHECK: llvm.func @test_ptr_lang_as(%arg0: !llvm.ptr<1>) + cir.func @test_ptr_lang_as(%arg0: !cir.ptr<!s32i, lang_address_space(offload_global)>) { + // The alloca stores a pointer to address space 1, but the alloca itself is on the stack (default AS) + // CHECK: llvm.alloca {{.*}} x !llvm.ptr<1> {{.*}} : (i64) -> !llvm.ptr + %0 = cir.alloca !cir.ptr<!s32i, lang_address_space(offload_global)>, !cir.ptr<!cir.ptr<!s32i, lang_address_space(offload_global)>>, ["arg", init] {alignment = 8 : i64} + cir.return + } + + // CHECK: llvm.func @test_ptr_target_as(%arg0: !llvm.ptr<5>) + cir.func @test_ptr_target_as(%arg0: !cir.ptr<!s32i, target_address_space(5)>) { + // CHECK: llvm.alloca {{.*}} x !llvm.ptr<5> {{.*}} : (i64) -> !llvm.ptr + %0 = cir.alloca !cir.ptr<!s32i, target_address_space(5)>, !cir.ptr<!cir.ptr<!s32i, target_address_space(5)>>, ["arg", init] {alignment = 8 : i64} + cir.return + } } >From d2f82825a366d75d25393b69a2b25405125c9dcb Mon Sep 17 00:00:00 2001 From: David Rivera <[email protected]> Date: Fri, 13 Mar 2026 04:36:14 -0400 Subject: [PATCH 2/4] handle formatting --- .../CIR/Dialect/Transforms/TargetLowering.cpp | 80 +++++++++---------- 1 file changed, 38 insertions(+), 42 deletions(-) diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering.cpp b/clang/lib/CIR/Dialect/Transforms/TargetLowering.cpp index 5249107376e67..0c1fcbe8f3ee5 100644 --- a/clang/lib/CIR/Dialect/Transforms/TargetLowering.cpp +++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering.cpp @@ -66,9 +66,8 @@ class CIRGenericTargetLoweringPattern : public mlir::ConversionPattern { if (operandsAndResultsLegal && regionsLegal) return mlir::failure(); - assert(op->getNumRegions() == 0 && - "CIRGenericTargetLoweringPattern cannot " - "deal with operations with regions"); + assert(op->getNumRegions() == 0 && "CIRGenericTargetLoweringPattern cannot " + "deal with operations with regions"); mlir::OperationState loweredOpState(op->getLoc(), op->getName()); loweredOpState.addOperands(operands); @@ -104,10 +103,10 @@ class CIRGlobalOpTargetLowering public: CIRGlobalOpTargetLowering(mlir::MLIRContext *context, - const mlir::TypeConverter &typeConverter, - const cir::TargetLoweringInfo &targetInfo) + const mlir::TypeConverter &typeConverter, + const cir::TargetLoweringInfo &targetInfo) : mlir::OpConversionPattern<cir::GlobalOp>(typeConverter, context, - /*benefit=*/1), + /*benefit=*/1), targetInfo(targetInfo) {} mlir::LogicalResult @@ -119,14 +118,14 @@ class CIRGlobalOpTargetLowering // Convert the addr_space attribute. mlir::ptr::MemorySpaceAttrInterface addrSpace = op.getAddrSpaceAttr(); - if (auto langAS = mlir::dyn_cast_if_present<cir::LangAddressSpaceAttr>( - addrSpace)) { + if (auto langAS = + mlir::dyn_cast_if_present<cir::LangAddressSpaceAttr>(addrSpace)) { unsigned targetAS = targetInfo.getTargetAddrSpaceFromCIRAddrSpace(langAS.getValue()); - addrSpace = targetAS == 0 - ? nullptr - : cir::TargetAddressSpaceAttr::get(op.getContext(), - targetAS); + addrSpace = + targetAS == 0 + ? nullptr + : cir::TargetAddressSpaceAttr::get(op.getContext(), targetAS); } // Only rewrite if something actually changed. @@ -142,8 +141,7 @@ class CIRGlobalOpTargetLowering }; /// Pattern to lower FuncOp types that contain address spaces. -class CIRFuncOpTargetLowering - : public mlir::OpConversionPattern<cir::FuncOp> { +class CIRFuncOpTargetLowering : public mlir::OpConversionPattern<cir::FuncOp> { public: using mlir::OpConversionPattern<cir::FuncOp>::OpConversionPattern; @@ -178,9 +176,9 @@ class CIRFuncOpTargetLowering loweredFuncOp.setFunctionType(loweredFuncType); rewriter.inlineRegionBefore(op.getBody(), loweredFuncOp.getBody(), loweredFuncOp.end()); - if (mlir::failed(rewriter.convertRegionTypes( - &loweredFuncOp.getBody(), *getTypeConverter(), - &signatureConversion))) + if (mlir::failed(rewriter.convertRegionTypes(&loweredFuncOp.getBody(), + *getTypeConverter(), + &signatureConversion))) return mlir::failure(); rewriter.eraseOp(op); @@ -210,25 +208,23 @@ prepareTargetLoweringTypeConverter(mlir::TypeConverter &converter, const cir::TargetLoweringInfo &targetInfo) { converter.addConversion([](mlir::Type type) { return type; }); - converter.addConversion( - [&converter, &targetInfo](cir::PointerType type) -> mlir::Type { - mlir::Type pointee = converter.convertType(type.getPointee()); - if (!pointee) - return {}; - auto addrSpace = type.getAddrSpace(); - if (auto langAS = - mlir::dyn_cast_if_present<cir::LangAddressSpaceAttr>( - addrSpace)) { - unsigned targetAS = - targetInfo.getTargetAddrSpaceFromCIRAddrSpace(langAS.getValue()); - addrSpace = - targetAS == 0 - ? nullptr - : cir::TargetAddressSpaceAttr::get(type.getContext(), - targetAS); - } - return cir::PointerType::get(type.getContext(), pointee, addrSpace); - }); + converter.addConversion([&converter, + &targetInfo](cir::PointerType type) -> mlir::Type { + mlir::Type pointee = converter.convertType(type.getPointee()); + if (!pointee) + return {}; + auto addrSpace = type.getAddrSpace(); + if (auto langAS = + mlir::dyn_cast_if_present<cir::LangAddressSpaceAttr>(addrSpace)) { + unsigned targetAS = + targetInfo.getTargetAddrSpaceFromCIRAddrSpace(langAS.getValue()); + addrSpace = + targetAS == 0 + ? nullptr + : cir::TargetAddressSpaceAttr::get(type.getContext(), targetAS); + } + return cir::PointerType::get(type.getContext(), pointee, addrSpace); + }); converter.addConversion([&converter](cir::ArrayType type) -> mlir::Type { mlir::Type loweredElementType = @@ -254,18 +250,18 @@ prepareTargetLoweringTypeConverter(mlir::TypeConverter &converter, }); } -static void populateTargetLoweringConversionTarget( - mlir::ConversionTarget &target, const mlir::TypeConverter &tc) { +static void +populateTargetLoweringConversionTarget(mlir::ConversionTarget &target, + const mlir::TypeConverter &tc) { target.addLegalOp<mlir::ModuleOp>(); target.addDynamicallyLegalDialect<cir::CIRDialect>( [&tc](mlir::Operation *op) { if (!tc.isLegal(op)) return false; - return std::all_of(op->getRegions().begin(), op->getRegions().end(), - [&tc](mlir::Region ®ion) { - return tc.isLegal(®ion); - }); + return std::all_of( + op->getRegions().begin(), op->getRegions().end(), + [&tc](mlir::Region ®ion) { return tc.isLegal(®ion); }); }); target.addDynamicallyLegalOp<cir::FuncOp>( >From 52851d818b03a7d8bbd1edf1851652d4be879dd4 Mon Sep 17 00:00:00 2001 From: David Rivera <[email protected]> Date: Fri, 13 Mar 2026 04:39:26 -0400 Subject: [PATCH 3/4] fix tests to represent pre-target lowering state of AS --- clang/lib/CIR/CodeGen/CIRGenModule.cpp | 5 +- .../CIR/CodeGen/amdgpu-address-spaces.cpp | 20 +++-- clang/test/CIR/CodeGenCUDA/address-spaces.cu | 19 +++-- .../CIR/Lowering/global-address-space.cir | 85 ------------------- 4 files changed, 26 insertions(+), 103 deletions(-) delete mode 100644 clang/test/CIR/Lowering/global-address-space.cir diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp b/clang/lib/CIR/CodeGen/CIRGenModule.cpp index 4c48541850e24..b06d108d15f02 100644 --- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp @@ -731,8 +731,6 @@ CIRGenModule::getOrCreateCIRGlobal(StringRef mangledName, mlir::Type ty, LangAS langAS, const VarDecl *d, ForDefinition_t isForDefinition) { - mlir::ptr::MemorySpaceAttrInterface cirAS = - cir::toCIRAddressSpaceAttr(getMLIRContext(), langAS); // Lookup the entry, lazily creating it if necessary. cir::GlobalOp entry; if (mlir::Operation *v = getGlobalValue(mangledName)) { @@ -748,7 +746,8 @@ CIRGenModule::getOrCreateCIRGlobal(StringRef mangledName, mlir::Type ty, assert(!cir::MissingFeatures::setDLLStorageClass()); assert(!cir::MissingFeatures::openMP()); - if (entry.getSymType() == ty && entryCIRAS == cirAS) + if (entry.getSymType() == ty && + cir::isMatchingAddressSpace(entryCIRAS, langAS)) return entry; // If there are two attempts to define the same mangled name, issue an diff --git a/clang/test/CIR/CodeGen/amdgpu-address-spaces.cpp b/clang/test/CIR/CodeGen/amdgpu-address-spaces.cpp index 35ceed46189dc..bee81138471c5 100644 --- a/clang/test/CIR/CodeGen/amdgpu-address-spaces.cpp +++ b/clang/test/CIR/CodeGen/amdgpu-address-spaces.cpp @@ -1,4 +1,7 @@ -// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fclangir -emit-cir %s -o %t.cir +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fclangir -emit-cir \ +// RUN: -mmlir -mlir-print-ir-before=cir-target-lowering %s -o %t.cir 2> %t-pre.cir +// RUN: FileCheck --check-prefix=CIR-PRE --input-file=%t-pre.cir %s + // RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fclangir -emit-llvm %s -o %t.ll @@ -14,21 +17,24 @@ // For AMDGPU in non-OpenCL/HIP mode, globals default to AS 1 (global). int globalVar = 123; -// CIR-DAG: cir.global external lang_address_space(offload_global) @globalVar = #cir.int<123> : !s32i +// CIR-PRE-DAG: cir.global external lang_address_space(offload_global) @globalVar = #cir.int<123> : !s32i +// CIR-DAG: cir.global external target_address_space(1) @globalVar = #cir.int<123> : !s32i // LLVM-DAG: @globalVar = addrspace(1) global i32 123, align 4 // OGCG-DAG: @globalVar = addrspace(1) global i32 123, align 4 // Test non-const global array goes to global AS. int globalArray[4] = {1, 2, 3, 4}; -// CIR-DAG: cir.global external lang_address_space(offload_global) @globalArray = #cir.const_array<[#cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<4> : !s32i]> : !cir.array<!s32i x 4> +// CIR-PRE-DAG: cir.global external lang_address_space(offload_global) @globalArray = #cir.const_array<[#cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<4> : !s32i]> : !cir.array<!s32i x 4> +// CIR-DAG: cir.global external target_address_space(1) @globalArray = #cir.const_array<[#cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<4> : !s32i]> : !cir.array<!s32i x 4> // LLVM-DAG: @globalArray = addrspace(1) global [4 x i32] [i32 1, i32 2, i32 3, i32 4], align 4 // OGCG-DAG: @globalArray = addrspace(1) global [4 x i32] [i32 1, i32 2, i32 3, i32 4], align 4 // Test static global goes to global AS. static int staticGlobal = 555; -// CIR-DAG: cir.global "private" internal{{.*}}lang_address_space(offload_global) @_ZL12staticGlobal = #cir.int<555> : !s32i +// CIR-PRE-DAG: cir.global "private" internal{{.*}}lang_address_space(offload_global) @_ZL12staticGlobal = #cir.int<555> : !s32i +// CIR-DAG: cir.global "private" internal{{.*}}target_address_space(1) @_ZL12staticGlobal = #cir.int<555> : !s32i // LLVM-DAG: @_ZL12staticGlobal = internal addrspace(1) global i32 555, align 4 // OGCG-DAG: @_ZL12staticGlobal = internal addrspace(1) global i32 555, align 4 @@ -36,14 +42,16 @@ static int staticGlobal = 555; // Use extern to force emission since const globals are otherwise optimized away. extern const int constGlobal = 456; -// CIR-DAG: cir.global constant external target_address_space(4) @constGlobal = #cir.int<456> : !s32i +// CIR-PRE-DAG: cir.global constant external target_address_space(4) @constGlobal = #cir.int<456> : !s32i +// CIR-DAG: cir.global constant external target_address_space(4) @constGlobal = #cir.int<456> : !s32i // LLVM-DAG: @constGlobal = addrspace(4) constant i32 456, align 4 // OGCG-DAG: @constGlobal = addrspace(4) constant i32 456, align 4 // Test extern const array goes to constant AS. extern const int constArray[3] = {10, 20, 30}; -// CIR-DAG: cir.global constant external target_address_space(4) @constArray = #cir.const_array<[#cir.int<10> : !s32i, #cir.int<20> : !s32i, #cir.int<30> : !s32i]> : !cir.array<!s32i x 3> +// CIR-PRE-DAG: cir.global constant external target_address_space(4) @constArray = #cir.const_array<[#cir.int<10> : !s32i, #cir.int<20> : !s32i, #cir.int<30> : !s32i]> : !cir.array<!s32i x 3> +// CIR-DAG: cir.global constant external target_address_space(4) @constArray = #cir.const_array<[#cir.int<10> : !s32i, #cir.int<20> : !s32i, #cir.int<30> : !s32i]> : !cir.array<!s32i x 3> // LLVM-DAG: @constArray = addrspace(4) constant [3 x i32] [i32 10, i32 20, i32 30], align 4 // OGCG-DAG: @constArray = addrspace(4) constant [3 x i32] [i32 10, i32 20, i32 30], align 4 diff --git a/clang/test/CIR/CodeGenCUDA/address-spaces.cu b/clang/test/CIR/CodeGenCUDA/address-spaces.cu index 8f95c1600878d..0f78309cb096a 100644 --- a/clang/test/CIR/CodeGenCUDA/address-spaces.cu +++ b/clang/test/CIR/CodeGenCUDA/address-spaces.cu @@ -1,30 +1,31 @@ // RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -x cuda \ -// RUN: -fcuda-is-device -emit-cir %s -o %t.cir -// RUN: FileCheck --input-file=%t.cir %s +// RUN: -fcuda-is-device -fclangir -emit-cir \ +// RUN: -mmlir -mlir-print-ir-before=cir-target-lowering %s -o %t.cir 2> %t-pre.cir +// RUN: FileCheck --check-prefix=CIR-PRE --input-file=%t-pre.cir %s // Verifies CIR emits correct address spaces for CUDA globals. #include "Inputs/cuda.h" -// CHECK: cir.global external lang_address_space(offload_global) @i = #cir.int<0> : !s32i +// CIR-PRE: cir.global external lang_address_space(offload_global) @i = #cir.int<0> : !s32i __device__ int i; -// CHECK: cir.global constant external lang_address_space(offload_constant) @j = #cir.int<0> : !s32i +// CIR-PRE: cir.global constant external lang_address_space(offload_constant) @j = #cir.int<0> : !s32i __constant__ int j; -// CHECK: cir.global external lang_address_space(offload_local) @k = #cir.poison : !s32i +// CIR-PRE: cir.global external lang_address_space(offload_local) @k = #cir.poison : !s32i __shared__ int k; -// CHECK: cir.global external lang_address_space(offload_local) @b = #cir.poison : !cir.float +// CIR-PRE: cir.global external lang_address_space(offload_local) @b = #cir.poison : !cir.float __shared__ float b; __device__ void foo() { - // CHECK: cir.get_global @i : !cir.ptr<!s32i, lang_address_space(offload_global)> + // CIR-PRE: cir.get_global @i : !cir.ptr<!s32i, lang_address_space(offload_global)> i++; - // CHECK: cir.get_global @j : !cir.ptr<!s32i, lang_address_space(offload_constant)> + // CIR-PRE: cir.get_global @j : !cir.ptr<!s32i, lang_address_space(offload_constant)> j++; - // CHECK: cir.get_global @k : !cir.ptr<!s32i, lang_address_space(offload_local)> + // CIR-PRE: cir.get_global @k : !cir.ptr<!s32i, lang_address_space(offload_local)> k++; } diff --git a/clang/test/CIR/Lowering/global-address-space.cir b/clang/test/CIR/Lowering/global-address-space.cir deleted file mode 100644 index 7161d6852acb2..0000000000000 --- a/clang/test/CIR/Lowering/global-address-space.cir +++ /dev/null @@ -1,85 +0,0 @@ -// RUN: cir-opt %s -cir-to-llvm -o %t.mlir -// RUN: FileCheck --input-file=%t.mlir %s - -!s32i = !cir.int<s, 32> - -module attributes { cir.triple = "amdgcn-amd-amdhsa" } { - // Target address space lowering (passthrough) - cir.global external target_address_space(1) @global_target_as1 = #cir.int<42> : !s32i - // CHECK: llvm.mlir.global external @global_target_as1(42 : i32) {addr_space = 1 : i32} : i32 - - cir.global external target_address_space(3) @global_target_as3 = #cir.int<100> : !s32i - // CHECK: llvm.mlir.global external @global_target_as3(100 : i32) {addr_space = 3 : i32} : i32 - - cir.global external @global_default = #cir.int<0> : !s32i - // CHECK: llvm.mlir.global external @global_default(0 : i32) {addr_space = 0 : i32} : i32 - - // Test cir.get_global with address space produces correct llvm.mlir.addressof type - // CHECK-LABEL: llvm.func @test_get_global_as1 - cir.func @test_get_global_as1() -> !s32i { - // CHECK: %[[ADDR:.*]] = llvm.mlir.addressof @global_target_as1 : !llvm.ptr<1> - // CHECK: %[[VAL:.*]] = llvm.load %[[ADDR]] {{.*}} : !llvm.ptr<1> -> i32 - // CHECK: llvm.return %[[VAL]] : i32 - %0 = cir.get_global @global_target_as1 : !cir.ptr<!s32i, target_address_space(1)> - %1 = cir.load %0 : !cir.ptr<!s32i, target_address_space(1)>, !s32i - cir.return %1 : !s32i - } - - // CHECK-LABEL: llvm.func @test_get_global_as3 - cir.func @test_get_global_as3() -> !s32i { - // CHECK: %[[ADDR:.*]] = llvm.mlir.addressof @global_target_as3 : !llvm.ptr<3> - // CHECK: %[[VAL:.*]] = llvm.load %[[ADDR]] {{.*}} : !llvm.ptr<3> -> i32 - // CHECK: llvm.return %[[VAL]] : i32 - %0 = cir.get_global @global_target_as3 : !cir.ptr<!s32i, target_address_space(3)> - %1 = cir.load %0 : !cir.ptr<!s32i, target_address_space(3)>, !s32i - cir.return %1 : !s32i - } - - // CHECK-LABEL: llvm.func @test_get_global_default - cir.func @test_get_global_default() -> !s32i { - // CHECK: %[[ADDR:.*]] = llvm.mlir.addressof @global_default : !llvm.ptr - // CHECK: %[[VAL:.*]] = llvm.load %[[ADDR]] {{.*}} : !llvm.ptr -> i32 - // CHECK: llvm.return %[[VAL]] : i32 - %0 = cir.get_global @global_default : !cir.ptr<!s32i> - %1 = cir.load %0 : !cir.ptr<!s32i>, !s32i - cir.return %1 : !s32i - } - - // Language address space lowering (AMDGPU mapping) - // See: https://llvm.org/docs/AMDGPUUsage.html#address-spaces - // OffloadGlobal -> 1 - cir.global external lang_address_space(offload_global) @global_lang_global = #cir.int<1> : !s32i - // CHECK: llvm.mlir.global external @global_lang_global(1 : i32) {addr_space = 1 : i32} : i32 - - // OffloadLocal -> 3 - cir.global "private" internal lang_address_space(offload_local) @global_lang_local : !s32i - // CHECK: llvm.mlir.global internal @global_lang_local() {addr_space = 3 : i32} : i32 - - // OffloadConstant -> 4 - cir.global external lang_address_space(offload_constant) @global_lang_constant = #cir.int<2> : !s32i - // CHECK: llvm.mlir.global external @global_lang_constant(2 : i32) {addr_space = 4 : i32} : i32 - - // OffloadPrivate -> 5 - cir.global "private" internal lang_address_space(offload_private) @global_lang_private : !s32i - // CHECK: llvm.mlir.global internal @global_lang_private() {addr_space = 5 : i32} : i32 - - // OffloadGeneric -> 0 - cir.global external lang_address_space(offload_generic) @global_lang_generic = #cir.int<3> : !s32i - // CHECK: llvm.mlir.global external @global_lang_generic(3 : i32) {addr_space = 0 : i32} : i32 - - // Pointer type lowering with lang_address_space - // CHECK: llvm.func @test_ptr_lang_as(%arg0: !llvm.ptr<1>) - cir.func @test_ptr_lang_as(%arg0: !cir.ptr<!s32i, lang_address_space(offload_global)>) { - // The alloca stores a pointer to address space 1, but the alloca itself is on the stack (default AS) - // CHECK: llvm.alloca {{.*}} x !llvm.ptr<1> {{.*}} : (i64) -> !llvm.ptr - %0 = cir.alloca !cir.ptr<!s32i, lang_address_space(offload_global)>, !cir.ptr<!cir.ptr<!s32i, lang_address_space(offload_global)>>, ["arg", init] {alignment = 8 : i64} - cir.return - } - - // CHECK: llvm.func @test_ptr_target_as(%arg0: !llvm.ptr<5>) - cir.func @test_ptr_target_as(%arg0: !cir.ptr<!s32i, target_address_space(5)>) { - // CHECK: llvm.alloca {{.*}} x !llvm.ptr<5> {{.*}} : (i64) -> !llvm.ptr - %0 = cir.alloca !cir.ptr<!s32i, target_address_space(5)>, !cir.ptr<!cir.ptr<!s32i, target_address_space(5)>>, ["arg", init] {alignment = 8 : i64} - cir.return - } -} >From a8c797b9a49d652df993831c27030c3071999b93 Mon Sep 17 00:00:00 2001 From: David Rivera <[email protected]> Date: Fri, 13 Mar 2026 04:49:06 -0400 Subject: [PATCH 4/4] Use AMDGPU enums to map CIR AS --- .../Transforms/TargetLowering/Targets/AMDGPU.cpp | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/AMDGPU.cpp b/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/AMDGPU.cpp index 058c1200531e5..186b2af31bd0c 100644 --- a/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/AMDGPU.cpp +++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/AMDGPU.cpp @@ -8,6 +8,7 @@ #include "../TargetLoweringInfo.h" #include "clang/CIR/Dialect/IR/CIROpsEnums.h" +#include "llvm/Support/AMDGPUAddrSpace.h" #include "llvm/Support/ErrorHandling.h" namespace cir { @@ -22,17 +23,17 @@ class AMDGPUTargetLoweringInfo : public TargetLoweringInfo { cir::LangAddressSpace addrSpace) const override { switch (addrSpace) { case cir::LangAddressSpace::Default: - return 0; + return llvm::AMDGPUAS::FLAT_ADDRESS; case cir::LangAddressSpace::OffloadPrivate: - return 5; + return llvm::AMDGPUAS::PRIVATE_ADDRESS; case cir::LangAddressSpace::OffloadLocal: - return 3; + return llvm::AMDGPUAS::LOCAL_ADDRESS; case cir::LangAddressSpace::OffloadGlobal: - return 1; + return llvm::AMDGPUAS::GLOBAL_ADDRESS; case cir::LangAddressSpace::OffloadConstant: - return 4; + return llvm::AMDGPUAS::CONSTANT_ADDRESS; case cir::LangAddressSpace::OffloadGeneric: - return 0; + return llvm::AMDGPUAS::FLAT_ADDRESS; } llvm_unreachable("Unknown CIR address space for AMDGPU target"); } _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
