https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/116309
>From 74ed0a510ff829e5e98d9edf0284ee4decfa4bc0 Mon Sep 17 00:00:00 2001 From: Pravin Jagtap <pravin.jag...@amd.com> Date: Wed, 13 Dec 2023 00:27:03 -0500 Subject: [PATCH 1/2] AMDGPU: Increase the LDS size to support to 160 KB for gfx950 --- llvm/docs/AMDGPUUsage.rst | 2 + llvm/lib/Target/AMDGPU/AMDGPU.td | 3 +- llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp | 12 +++-- llvm/lib/Target/AMDGPU/AMDGPUFeatures.td | 1 + .../Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 2 + llvm/test/CodeGen/AMDGPU/extra-lds-size.ll | 7 +++ .../AMDGPU/lds-limit-diagnostics-gfx950.ll | 13 +++++ .../CodeGen/AMDGPU/lds-size-hsa-gfx950.ll | 31 +++++++++++ .../CodeGen/AMDGPU/lds-size-pal-gfx950.ll | 26 ++++++++++ .../tools/llvm-objdump/ELF/AMDGPU/kd-gfx950.s | 52 +++++++++++++++++++ 10 files changed, 144 insertions(+), 5 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/lds-limit-diagnostics-gfx950.ll create mode 100644 llvm/test/CodeGen/AMDGPU/lds-size-hsa-gfx950.ll create mode 100644 llvm/test/CodeGen/AMDGPU/lds-size-pal-gfx950.ll create mode 100644 llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx950.s diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst index b85b680b9c82d3..a25b6feddbeddc 100644 --- a/llvm/docs/AMDGPUUsage.rst +++ b/llvm/docs/AMDGPUUsage.rst @@ -5475,6 +5475,8 @@ The fields used by CP for code objects before V3 also match those specified in roundup(lds-size / (64 * 4)) GFX7-GFX11 roundup(lds-size / (128 * 4)) + GFX950 + roundup(lds-size / (320 * 4)) 24 1 bit ENABLE_EXCEPTION_IEEE_754_FP Wavefront starts execution _INVALID_OPERATION with specified exceptions diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 35dbf86b7c6f36..a05d4a644d08d1 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -1494,7 +1494,8 @@ def FeatureISAVersion9_5_Common : FeatureSet< [FeatureFP8Insts, FeatureFP8ConversionInsts, FeatureCvtFP8VOP1Bug, - FeatureGFX950Insts + FeatureGFX950Insts, + FeatureAddressableLocalMemorySize163840 ])>; def FeatureISAVersion9_4_0 : FeatureSet< diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index d801f2b1591275..90ece275412c7c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -1172,12 +1172,16 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, ProgInfo.DX10Clamp = Mode.DX10Clamp; unsigned LDSAlignShift; - if (STM.getGeneration() < AMDGPUSubtarget::SEA_ISLANDS) { - // LDS is allocated in 64 dword blocks. - LDSAlignShift = 8; - } else { + if (STM.getFeatureBits().test(FeatureAddressableLocalMemorySize163840)) { + // LDS is allocated in 320 dword blocks. + LDSAlignShift = 11; + } else if (STM.getFeatureBits().test( + FeatureAddressableLocalMemorySize65536)) { // LDS is allocated in 128 dword blocks. LDSAlignShift = 9; + } else { + // LDS is allocated in 64 dword blocks. + LDSAlignShift = 8; } ProgInfo.SGPRSpill = MFI->getNumSpilledSGPRs(); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUFeatures.td b/llvm/lib/Target/AMDGPU/AMDGPUFeatures.td index f832a2a55d6229..74d1faeb6f545b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUFeatures.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUFeatures.td @@ -29,6 +29,7 @@ class SubtargetFeatureAddressableLocalMemorySize <int Value> : SubtargetFeature< def FeatureAddressableLocalMemorySize32768 : SubtargetFeatureAddressableLocalMemorySize<32768>; def FeatureAddressableLocalMemorySize65536 : SubtargetFeatureAddressableLocalMemorySize<65536>; +def FeatureAddressableLocalMemorySize163840 : SubtargetFeatureAddressableLocalMemorySize<163840>; class SubtargetFeatureWavefrontSize <int ValueLog2> : SubtargetFeature< "wavefrontsize"#!shl(1, ValueLog2), diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 01866fbd9da6e7..501d00b1f308d9 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -916,6 +916,8 @@ unsigned getAddressableLocalMemorySize(const MCSubtargetInfo *STI) { return 32768; if (STI->getFeatureBits().test(FeatureAddressableLocalMemorySize65536)) return 65536; + if (STI->getFeatureBits().test(FeatureAddressableLocalMemorySize163840)) + return 163840; return 0; } diff --git a/llvm/test/CodeGen/AMDGPU/extra-lds-size.ll b/llvm/test/CodeGen/AMDGPU/extra-lds-size.ll index 13640b74a7937b..318ecd16a2ccb3 100644 --- a/llvm/test/CodeGen/AMDGPU/extra-lds-size.ll +++ b/llvm/test/CodeGen/AMDGPU/extra-lds-size.ll @@ -2,6 +2,8 @@ ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10-MESA %s ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11-PAL %s ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11-MESA %s +; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx950 < %s | FileCheck -check-prefix=GFX950-PAL %s +; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx950 < %s | FileCheck -check-prefix=GFX950-MESA %s ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX1200-PAL %s ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX1200-MESA %s @@ -17,6 +19,11 @@ ; GFX11-MESA: .long 45100 ; GFX11-MESA-NEXT: .long 1024 +; GFX950-PAL: '0x2c0b (SPI_SHADER_PGM_RSRC2_PS)': 0x200 + +; GFX950-MESA: .long 45100 +; GFX950-MESA-NEXT: .long 512 + ; GFX1200-PAL: '0x2c0b (SPI_SHADER_PGM_RSRC2_PS)': 0x400 ; GFX1200-MESA: .long 45100 diff --git a/llvm/test/CodeGen/AMDGPU/lds-limit-diagnostics-gfx950.ll b/llvm/test/CodeGen/AMDGPU/lds-limit-diagnostics-gfx950.ll new file mode 100644 index 00000000000000..19166b271db775 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/lds-limit-diagnostics-gfx950.ll @@ -0,0 +1,13 @@ +; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=ERROR %s + +; GFX950 supports upto 160 KB LDS memory. +; This is a negative test to check when the LDS size exceeds the max usable limit. + +; ERROR: error: <unknown>:0:0: local memory (163844) exceeds limit (163840) in function 'test_lds_limit' +@dst = addrspace(3) global [40961 x i32] poison + +define amdgpu_kernel void @test_lds_limit(i32 %val) { + %gep = getelementptr [40961 x i32], ptr addrspace(3) @dst, i32 0, i32 100 + store i32 %val, ptr addrspace(3) %gep + ret void +} \ No newline at end of file diff --git a/llvm/test/CodeGen/AMDGPU/lds-size-hsa-gfx950.ll b/llvm/test/CodeGen/AMDGPU/lds-size-hsa-gfx950.ll new file mode 100644 index 00000000000000..6ebfc9a5e9d4f6 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/lds-size-hsa-gfx950.ll @@ -0,0 +1,31 @@ +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx950 -verify-machineinstrs < %s | FileCheck -check-prefix=MESA %s + +; gfx950 supports upto 160 KB configurable LDS memory. +; This test checks the max and above the old i.e. 128 KiB size of LDS that can be allocated. + +@lds.i32 = addrspace(3) global i32 poison +@lds.array.size.131076 = addrspace(3) global [32768 x i32] poison +@lds.array.size.163840 = addrspace(3) global [40959 x i32] poison + +; GCN-LABEL: test_lds_array_size_131076: +; GCN: .amdhsa_group_segment_fixed_size 131076 +; GCN: ; LDSByteSize: 131076 bytes/workgroup +; MESA: granulated_lds_size = 65 +define amdgpu_kernel void @test_lds_array_size_131076() { + %gep = getelementptr inbounds [32768 x i32], ptr addrspace(3) @lds.array.size.131076, i32 0, i32 20 + %val = load i32, ptr addrspace(3) %gep + store i32 %val, ptr addrspace(3) @lds.i32 + ret void +} + +; GCN-LABEL: test_lds_array_size_163840: +; GCN: .amdhsa_group_segment_fixed_size 163840 +; GCN: ; LDSByteSize: 163840 bytes/workgroup +; MESA: granulated_lds_size = 80 +define amdgpu_kernel void @test_lds_array_size_163840() { + %gep = getelementptr inbounds [40959 x i32], ptr addrspace(3) @lds.array.size.163840 , i32 0, i32 20 + %val = load i32, ptr addrspace(3) %gep + store i32 %val, ptr addrspace(3) @lds.i32 + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/lds-size-pal-gfx950.ll b/llvm/test/CodeGen/AMDGPU/lds-size-pal-gfx950.ll new file mode 100644 index 00000000000000..22cad8ab5f5360 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/lds-size-pal-gfx950.ll @@ -0,0 +1,26 @@ +; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx950 -verify-machineinstrs < %s | FileCheck -check-prefix=PAL %s + +; GFX950supports upto 160 KB configurable LDS memory. +; This test checks the min and max size of LDS that can be allocated. + +; PAL: .shader_functions: +; PAL: test_lds_array_i32: +; PAL: .lds_size: 0x28000 +; PAL: test_lds_i32: +; PAL: .lds_size: 0x4 + + +@lds.i32 = addrspace(3) global i32 poison +@lds.array.i32 = addrspace(3) global [40959 x i32] poison + +define amdgpu_gfx void @test_lds_i32(i32 %val) { + store i32 %val, ptr addrspace(3) @lds.i32 + ret void +} + +define amdgpu_gfx void @test_lds_array_i32() { + %gep = getelementptr inbounds [40959 x i32], ptr addrspace(3) @lds.array.i32, i32 0, i32 20 + %val = load i32, ptr addrspace(3) %gep + store i32 %val, ptr addrspace(3) @lds.i32 + ret void +} \ No newline at end of file diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx950.s b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx950.s new file mode 100644 index 00000000000000..5b9d42c7fad553 --- /dev/null +++ b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx950.s @@ -0,0 +1,52 @@ +;; Test disassembly for gfx950 kernel descriptor. + +; RUN: rm -rf %t && split-file %s %t && cd %t + +;--- 1.s +; RUN: llvm-mc --triple=amdgcn-amd-amdhsa -mattr=-xnack -filetype=obj -mcpu=gfx950 < 1.s > 1.o +; RUN: llvm-objdump --disassemble-symbols=kernel.kd 1.o | tail -n +7 | tee 1-disasm.s | FileCheck 1.s +; RUN: llvm-mc --triple=amdgcn-amd-amdhsa -mattr=-xnack -filetype=obj -mcpu=gfx950 < 1-disasm.s > 1-disasm.o +; FIxMe: cmp 1.o 1-disasm.o +; CHECK: .amdhsa_kernel kernel +; CHECK-NEXT: .amdhsa_group_segment_fixed_size 163840 +; CHECK-NEXT: .amdhsa_private_segment_fixed_size 0 +; CHECK-NEXT: .amdhsa_kernarg_size 0 +; CHECK-NEXT: .amdhsa_accum_offset 4 +; CHECK-NEXT: .amdhsa_tg_split 0 +; CHECK-NEXT: .amdhsa_next_free_vgpr 8 +; CHECK-NEXT: .amdhsa_reserve_vcc 0 +; CHECK-NEXT: .amdhsa_reserve_xnack_mask 0 +; CHECK-NEXT: .amdhsa_next_free_sgpr 8 +; CHECK-NEXT: .amdhsa_float_round_mode_32 0 +; CHECK-NEXT: .amdhsa_float_round_mode_16_64 0 +; CHECK-NEXT: .amdhsa_float_denorm_mode_32 0 +; CHECK-NEXT: .amdhsa_float_denorm_mode_16_64 3 +; CHECK-NEXT: .amdhsa_dx10_clamp 1 +; CHECK-NEXT: .amdhsa_ieee_mode 1 +; CHECK-NEXT: .amdhsa_fp16_overflow 0 +; CHECK-NEXT: .amdhsa_enable_private_segment 0 +; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1 +; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0 +; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0 +; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_info 0 +; CHECK-NEXT: .amdhsa_system_vgpr_workitem_id 0 +; CHECK-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0 +; CHECK-NEXT: .amdhsa_exception_fp_denorm_src 0 +; CHECK-NEXT: .amdhsa_exception_fp_ieee_div_zero 0 +; CHECK-NEXT: .amdhsa_exception_fp_ieee_overflow 0 +; CHECK-NEXT: .amdhsa_exception_fp_ieee_underflow 0 +; CHECK-NEXT: .amdhsa_exception_fp_ieee_inexact 0 +; CHECK-NEXT: .amdhsa_exception_int_div_zero 0 +; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0 +; CHECK-NEXT: .amdhsa_user_sgpr_queue_ptr 0 +; CHECK-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0 +; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0 +; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0 +; CHECK-NEXT: .amdhsa_uses_dynamic_stack 0 +; CHECK-NEXT:.end_amdhsa_kernel +.amdhsa_kernel kernel + .amdhsa_group_segment_fixed_size 163840 + .amdhsa_next_free_vgpr 0 + .amdhsa_next_free_sgpr 0 + .amdhsa_accum_offset 4 +.end_amdhsa_kernel >From 6ab31c00bb87ef8488c29e452c47cfd7af8aaac9 Mon Sep 17 00:00:00 2001 From: Matt Arsenault <matthew.arsena...@amd.com> Date: Fri, 15 Nov 2024 10:51:15 -0800 Subject: [PATCH 2/2] Avoid adding multiple addressablelocalmemorysizes to gfx950 Also expand the limit testing. --- llvm/lib/Target/AMDGPU/AMDGPU.td | 16 +++++++--- .../AMDGPU/lds-limit-diagnostics-gfx950.ll | 13 -------- .../CodeGen/AMDGPU/lds-limit-diagnostics.ll | 32 +++++++++++++++++++ 3 files changed, 43 insertions(+), 18 deletions(-) delete mode 100644 llvm/test/CodeGen/AMDGPU/lds-limit-diagnostics-gfx950.ll create mode 100644 llvm/test/CodeGen/AMDGPU/lds-limit-diagnostics.ll diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index a05d4a644d08d1..e84fdf54866cdd 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -1192,7 +1192,7 @@ def FeatureVolcanicIslands : GCNSubtargetFeatureGeneration<"VOLCANIC_ISLANDS", def FeatureGFX9 : GCNSubtargetFeatureGeneration<"GFX9", "gfx9", - [FeatureFP64, FeatureAddressableLocalMemorySize65536, + [FeatureFP64, FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts, FeatureSMemRealTime, FeatureScalarStores, FeatureInv2PiInlineImm, @@ -1358,6 +1358,7 @@ def FeatureISAVersion8_1_0 : FeatureSet< def FeatureISAVersion9_0_Common : FeatureSet< [FeatureGFX9, + FeatureAddressableLocalMemorySize65536, FeatureLDSBankCount32, FeatureImageInsts, FeatureMadMacF32Insts]>; @@ -1375,7 +1376,8 @@ def FeatureISAVersion9_Generic : FeatureSet< def FeatureISAVersion9_0_MI_Common : FeatureSet< !listconcat(FeatureISAVersion9_0_Common.Features, - [FeatureFmaMixInsts, + [FeatureAddressableLocalMemorySize65536, + FeatureFmaMixInsts, FeatureDLInsts, FeatureDot1Insts, FeatureDot2Insts, @@ -1491,16 +1493,17 @@ def FeatureISAVersion9_4_Common : FeatureSet< def FeatureISAVersion9_5_Common : FeatureSet< !listconcat(FeatureISAVersion9_4_Common.Features, - [FeatureFP8Insts, + [FeatureAddressableLocalMemorySize163840, + FeatureFP8Insts, FeatureFP8ConversionInsts, FeatureCvtFP8VOP1Bug, FeatureGFX950Insts, - FeatureAddressableLocalMemorySize163840 ])>; def FeatureISAVersion9_4_0 : FeatureSet< !listconcat(FeatureISAVersion9_4_Common.Features, [ + FeatureAddressableLocalMemorySize65536, FeatureForceStoreSC0SC1, FeatureFP8Insts, FeatureFP8ConversionInsts, @@ -1511,6 +1514,7 @@ def FeatureISAVersion9_4_0 : FeatureSet< def FeatureISAVersion9_4_1 : FeatureSet< !listconcat(FeatureISAVersion9_4_Common.Features, [ + FeatureAddressableLocalMemorySize65536, FeatureForceStoreSC0SC1, FeatureFP8Insts, FeatureFP8ConversionInsts, @@ -1521,6 +1525,7 @@ def FeatureISAVersion9_4_1 : FeatureSet< def FeatureISAVersion9_4_2 : FeatureSet< !listconcat(FeatureISAVersion9_4_Common.Features, [ + FeatureAddressableLocalMemorySize65536, FeatureFP8Insts, FeatureFP8ConversionInsts, FeatureCvtFP8VOP1Bug, @@ -1529,7 +1534,8 @@ def FeatureISAVersion9_4_2 : FeatureSet< def FeatureISAVersion9_4_Generic : FeatureSet< !listconcat(FeatureISAVersion9_4_Common.Features, - [FeatureRequiresCOV6])>; + [FeatureAddressableLocalMemorySize65536, + FeatureRequiresCOV6])>; def FeatureISAVersion9_5_0 : FeatureSet<FeatureISAVersion9_5_Common.Features>; diff --git a/llvm/test/CodeGen/AMDGPU/lds-limit-diagnostics-gfx950.ll b/llvm/test/CodeGen/AMDGPU/lds-limit-diagnostics-gfx950.ll deleted file mode 100644 index 19166b271db775..00000000000000 --- a/llvm/test/CodeGen/AMDGPU/lds-limit-diagnostics-gfx950.ll +++ /dev/null @@ -1,13 +0,0 @@ -; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=ERROR %s - -; GFX950 supports upto 160 KB LDS memory. -; This is a negative test to check when the LDS size exceeds the max usable limit. - -; ERROR: error: <unknown>:0:0: local memory (163844) exceeds limit (163840) in function 'test_lds_limit' -@dst = addrspace(3) global [40961 x i32] poison - -define amdgpu_kernel void @test_lds_limit(i32 %val) { - %gep = getelementptr [40961 x i32], ptr addrspace(3) @dst, i32 0, i32 100 - store i32 %val, ptr addrspace(3) %gep - ret void -} \ No newline at end of file diff --git a/llvm/test/CodeGen/AMDGPU/lds-limit-diagnostics.ll b/llvm/test/CodeGen/AMDGPU/lds-limit-diagnostics.ll new file mode 100644 index 00000000000000..73f6dcb3a2a1d2 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/lds-limit-diagnostics.ll @@ -0,0 +1,32 @@ +; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -filetype=null %s 2>&1 | FileCheck -check-prefix=ERROR-LIMIT160K %s +; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx9-4-generic -filetype=null %s 2>&1 | FileCheck -check-prefix=ERROR-LIMIT64K %s +; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx9-generic -filetype=null %s 2>&1 | FileCheck -check-prefix=ERROR-LIMIT64K %s +; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -filetype=null %s 2>&1 | FileCheck -check-prefix=ERROR-LIMIT64K %s +; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx941 -filetype=null %s 2>&1 | FileCheck -check-prefix=ERROR-LIMIT64K %s +; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -filetype=null %s 2>&1 | FileCheck -check-prefix=ERROR-LIMIT64K %s +; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=null %s 2>&1 | FileCheck -check-prefix=ERROR-LIMIT64K %s +; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -filetype=null %s 2>&1 | FileCheck -check-prefix=ERROR-LIMIT64K %s +; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=null %s 2>&1 | FileCheck -check-prefix=ERROR-LIMIT64K %s +; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -filetype=null %s 2>&1 | FileCheck -check-prefix=ERROR-LIMIT64K %s +; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90c -filetype=null %s 2>&1 | FileCheck -check-prefix=ERROR-LIMIT64K %s +; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -filetype=null %s 2>&1 | FileCheck -check-prefix=ERROR-LIMIT64K %s +; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -filetype=null %s 2>&1 | FileCheck -check-prefix=ERROR-LIMIT64K %s +; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -filetype=null %s 2>&1 | FileCheck -check-prefix=ERROR-LIMIT64K %s +; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -filetype=null %s 2>&1 | FileCheck -check-prefix=ERROR-LIMIT64K %s +; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -filetype=null %s 2>&1 | FileCheck -check-prefix=ERROR-LIMIT64K %s +; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=null %s 2>&1 | FileCheck -check-prefix=ERROR-LIMIT64K %s +; RUN: not llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx600 -filetype=null %s 2>&1 | FileCheck -check-prefix=ERROR-LIMIT32K %s + +; gfx950 supports upto 160 KB LDS memory. The generic target does not. +; This is a negative test to check when the LDS size exceeds the max usable limit. + +; ERROR-LIMIT160K: error: <unknown>:0:0: local memory (163844) exceeds limit (163840) in function 'test_lds_limit' +; ERROR-LIMIT64K: error: <unknown>:0:0: local memory (163844) exceeds limit (65536) in function 'test_lds_limit' +; ERROR-LIMIT32K: error: <unknown>:0:0: local memory (163844) exceeds limit (32768) in function 'test_lds_limit' +@dst = addrspace(3) global [40961 x i32] poison + +define amdgpu_kernel void @test_lds_limit(i32 %val) { + %gep = getelementptr [40961 x i32], ptr addrspace(3) @dst, i32 0, i32 100 + store i32 %val, ptr addrspace(3) %gep + ret void +} _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits