https://github.com/saiislam updated https://github.com/llvm/llvm-project/pull/79039
>From 384a90e5f161e4647a6ab803906a93f730c5df4b Mon Sep 17 00:00:00 2001 From: Saiyedul Islam <saiyedul.is...@amd.com> Date: Mon, 22 Jan 2024 13:11:22 -0600 Subject: [PATCH 1/2] [AMDGPU] Change default AMDHSA Code Object version to 5 Also update LIT tests, docs, and release notes for Clang and LLVM. For more details, see https://llvm.org/docs/AMDGPUUsage.html#code-object-v5-metadata --- clang/docs/ReleaseNotes.rst | 3 +++ clang/include/clang/Driver/Options.td | 4 ++-- clang/test/CodeGen/amdgpu-address-spaces.cpp | 2 +- .../CodeGenCUDA/amdgpu-code-object-version.cu | 2 +- clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu | 4 ++-- clang/test/CodeGenHIP/default-attributes.hip | 4 ++-- clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl | 4 ++-- clang/test/CodeGenOpenCL/builtins-amdgcn.cl | 10 +++++----- flang/test/Driver/driver-help-hidden.f90 | 2 +- flang/test/Driver/driver-help.f90 | 4 ++-- llvm/docs/AMDGPUUsage.rst | 15 +++++++-------- llvm/docs/ReleaseNotes.rst | 2 ++ llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 2 +- .../Dialect/GPU/Transforms/SerializeToHsaco.cpp | 2 +- .../Dialect/ROCDL/ROCDLToLLVMIRTranslation.cpp | 1 + mlir/test/Target/LLVMIR/rocdl.mlir | 2 +- 16 files changed, 34 insertions(+), 29 deletions(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 5846503af3acdfe..069dfcd22e3b667 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -1104,6 +1104,9 @@ AMDGPU Support arguments in C ABI. Callee is responsible for allocating stack memory and copying the value of the struct if modified. Note that AMDGPU backend still supports byval for struct arguments. +- The default value for ``-mcode-object-version`` is now 5. + See `AMDHSA Code Object V5 Metadata <https://llvm.org/docs/AMDGPUUsage.html#code-object-v5-metadata>`_ + for more details. X86 Support ^^^^^^^^^^^ diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index f9e883e3e22de86..d4b82b301f12e64 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -4777,12 +4777,12 @@ defm amdgpu_ieee : BoolOption<"m", "amdgpu-ieee", NegFlag<SetFalse, [], [ClangOption, CC1Option]>>, Group<m_Group>; def mcode_object_version_EQ : Joined<["-"], "mcode-object-version=">, Group<m_Group>, - HelpText<"Specify code object ABI version. Defaults to 4. (AMDGPU only)">, + HelpText<"Specify code object ABI version. Defaults to 5. (AMDGPU only)">, Visibility<[ClangOption, FlangOption, CC1Option, FC1Option]>, Values<"none,4,5">, NormalizedValuesScope<"llvm::CodeObjectVersionKind">, NormalizedValues<["COV_None", "COV_4", "COV_5"]>, - MarshallingInfoEnum<TargetOpts<"CodeObjectVersion">, "COV_4">; + MarshallingInfoEnum<TargetOpts<"CodeObjectVersion">, "COV_5">; defm cumode : SimpleMFlag<"cumode", "Specify CU wavefront", "Specify WGP wavefront", diff --git a/clang/test/CodeGen/amdgpu-address-spaces.cpp b/clang/test/CodeGen/amdgpu-address-spaces.cpp index 0a808aa6cc75ed3..ae2c61439f4ca53 100644 --- a/clang/test/CodeGen/amdgpu-address-spaces.cpp +++ b/clang/test/CodeGen/amdgpu-address-spaces.cpp @@ -29,7 +29,7 @@ int [[clang::address_space(999)]] bbb = 1234; // CHECK: @u = addrspace(5) global i32 undef, align 4 // CHECK: @aaa = addrspace(6) global i32 1000, align 4 // CHECK: @bbb = addrspace(999) global i32 1234, align 4 -// CHECK: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 400 +// CHECK: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 500 //. // CHECK-LABEL: define dso_local amdgpu_kernel void @foo( // CHECK-SAME: ) #[[ATTR0:[0-9]+]] { diff --git a/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu b/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu index ff5deaf9ab850d2..3cb6632fc0b63d3 100644 --- a/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu +++ b/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu @@ -1,7 +1,7 @@ // Create module flag for code object version. // RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \ -// RUN: -o - %s | FileCheck %s -check-prefix=V4 +// RUN: -o - %s | FileCheck %s -check-prefix=V5 // RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \ // RUN: -mcode-object-version=4 -o - %s | FileCheck -check-prefix=V4 %s diff --git a/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu b/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu index 282e0a49b9aa10b..0c846e0936b58b1 100644 --- a/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu +++ b/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu @@ -1,10 +1,10 @@ // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \ -// RUN: -fcuda-is-device -emit-llvm -o - -x hip %s \ +// RUN: -fcuda-is-device -mcode-object-version=4 -emit-llvm -o - -x hip %s \ // RUN: | FileCheck -check-prefix=PRECOV5 %s // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \ -// RUN: -fcuda-is-device -mcode-object-version=5 -emit-llvm -o - -x hip %s \ +// RUN: -fcuda-is-device -emit-llvm -o - -x hip %s \ // RUN: | FileCheck -check-prefix=COV5 %s // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \ diff --git a/clang/test/CodeGenHIP/default-attributes.hip b/clang/test/CodeGenHIP/default-attributes.hip index 80aa1ee0700628f..9c9ea521271b99b 100644 --- a/clang/test/CodeGenHIP/default-attributes.hip +++ b/clang/test/CodeGenHIP/default-attributes.hip @@ -46,11 +46,11 @@ __global__ void kernel() { // OPT: attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) "no-trapping-math"="true" "stack-protector-buffer-size"="8" } // OPT: attributes #1 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) "amdgpu-flat-work-group-size"="1,1024" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" } //. -// OPTNONE: !0 = !{i32 1, !"amdgpu_code_object_version", i32 400} +// OPTNONE: !0 = !{i32 1, !"amdgpu_code_object_version", i32 500} // OPTNONE: !1 = !{i32 1, !"amdgpu_printf_kind", !"hostcall"} // OPTNONE: !2 = !{i32 1, !"wchar_size", i32 4} //. -// OPT: !0 = !{i32 1, !"amdgpu_code_object_version", i32 400} +// OPT: !0 = !{i32 1, !"amdgpu_code_object_version", i32 500} // OPT: !1 = !{i32 1, !"amdgpu_printf_kind", !"hostcall"} // OPT: !2 = !{i32 1, !"wchar_size", i32 4} //. diff --git a/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl b/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl index e574b1f64c499bd..2cf1286e2b54e8e 100644 --- a/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl @@ -703,7 +703,7 @@ kernel void test_target_features_kernel(global int *i) { // GFX900: attributes #8 = { nounwind } // GFX900: attributes #9 = { convergent nounwind } //. -// NOCPU: !0 = !{i32 1, !"amdgpu_code_object_version", i32 400} +// NOCPU: !0 = !{i32 1, !"amdgpu_code_object_version", i32 500} // NOCPU: !1 = !{i32 1, !"wchar_size", i32 4} // NOCPU: !2 = !{i32 2, i32 0} // NOCPU: !3 = !{i32 1, i32 0, i32 1, i32 0} @@ -721,7 +721,7 @@ kernel void test_target_features_kernel(global int *i) { // NOCPU: !15 = !{i32 1} // NOCPU: !16 = !{!"int*"} //. -// GFX900: !0 = !{i32 1, !"amdgpu_code_object_version", i32 400} +// GFX900: !0 = !{i32 1, !"amdgpu_code_object_version", i32 500} // GFX900: !1 = !{i32 1, !"wchar_size", i32 4} // GFX900: !2 = !{i32 2, i32 0} // GFX900: !3 = !{!4, !4, i64 0} diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl index 0bc9a54682d3e31..8d9e4e018b12e5a 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl @@ -601,13 +601,13 @@ void test_get_local_id(int d, global int *out) } // CHECK-LABEL: @test_get_workgroup_size( -// CHECK: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() -// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 4 +// CHECK: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() +// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 12 // CHECK: load i16, ptr addrspace(4) %{{.*}}, align 4, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef -// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 6 +// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 14 // CHECK: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef -// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 8 -// CHECK: load i16, ptr addrspace(4) %{{.*}}, align 4, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef +// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 16 +// CHECK: load i16, ptr addrspace(4) %{{.*}}, align 8, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef void test_get_workgroup_size(int d, global int *out) { switch (d) { diff --git a/flang/test/Driver/driver-help-hidden.f90 b/flang/test/Driver/driver-help-hidden.f90 index 426b0e5a1c367d7..25dfcf3c70d8e1a 100644 --- a/flang/test/Driver/driver-help-hidden.f90 +++ b/flang/test/Driver/driver-help-hidden.f90 @@ -117,7 +117,7 @@ ! CHECK-NEXT: -L <dir> Add directory to library search path ! CHECK-NEXT: -march=<value> For a list of available architectures for the target use '-mcpu=help' ! CHECK-NEXT: -mcode-object-version=<value> -! CHECK-NEXT: Specify code object ABI version. Defaults to 4. (AMDGPU only) +! CHECK-NEXT: Specify code object ABI version. Defaults to 5. (AMDGPU only) ! CHECK-NEXT: -mcpu=<value> For a list of available CPUs for the target use '-mcpu=help' ! CHECK-NEXT: -mllvm=<arg> Alias for -mllvm ! CHECK-NEXT: -mllvm <value> Additional arguments to forward to LLVM's option processing diff --git a/flang/test/Driver/driver-help.f90 b/flang/test/Driver/driver-help.f90 index 221da6439764b4d..9f0aae51c3f5da6 100644 --- a/flang/test/Driver/driver-help.f90 +++ b/flang/test/Driver/driver-help.f90 @@ -103,7 +103,7 @@ ! HELP-NEXT: -L <dir> Add directory to library search path ! HELP-NEXT: -march=<value> For a list of available architectures for the target use '-mcpu=help' ! HELP-NEXT: -mcode-object-version=<value> -! HELP-NEXT: Specify code object ABI version. Defaults to 4. (AMDGPU only) +! HELP-NEXT: Specify code object ABI version. Defaults to 5. (AMDGPU only) ! HELP-NEXT: -mcpu=<value> For a list of available CPUs for the target use '-mcpu=help' ! HELP-NEXT: -mllvm=<arg> Alias for -mllvm ! HELP-NEXT: -mllvm <value> Additional arguments to forward to LLVM's option processing @@ -240,7 +240,7 @@ ! HELP-FC1-NEXT: -I <dir> Add directory to the end of the list of include search paths ! HELP-FC1-NEXT: -load <dsopath> Load the named plugin (dynamic shared object) ! HELP-FC1-NEXT: -mcode-object-version=<value> -! HELP-FC1-NEXT: Specify code object ABI version. Defaults to 4. (AMDGPU only) +! HELP-FC1-NEXT: Specify code object ABI version. Defaults to 5. (AMDGPU only) ! HELP-FC1-NEXT: -menable-no-infs Allow optimization to assume there are no infinities. ! HELP-FC1-NEXT: -menable-no-nans Allow optimization to assume there are no NaNs. ! HELP-FC1-NEXT: -mframe-pointer=<value> Specify which frame pointers to retain. diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst index 548d677afdecb8f..6b2417143ca06c9 100644 --- a/llvm/docs/AMDGPUUsage.rst +++ b/llvm/docs/AMDGPUUsage.rst @@ -1510,12 +1510,12 @@ The AMDGPU backend uses the following ELF header: * ``ELFABIVERSION_AMDGPU_HSA_V4`` is used to specify the version of AMD HSA runtime ABI for code object V4. Specify using the Clang option - ``-mcode-object-version=4``. This is the default code object - version if not specified. + ``-mcode-object-version=4``. * ``ELFABIVERSION_AMDGPU_HSA_V5`` is used to specify the version of AMD HSA runtime ABI for code object V5. Specify using the Clang option - ``-mcode-object-version=5``. + ``-mcode-object-version=5``. This is the default code object + version if not specified. * ``ELFABIVERSION_AMDGPU_PAL`` is used to specify the version of AMD PAL runtime ABI. @@ -3949,6 +3949,10 @@ same *vendor-name*. Code Object V4 Metadata +++++++++++++++++++++++ +. warning:: + Code object V4 is not the default code object version emitted by this version + of LLVM. + Code object V4 metadata is the same as :ref:`amdgpu-amdhsa-code-object-metadata-v3` with the changes and additions defined in table :ref:`amdgpu-amdhsa-code-object-metadata-map-table-v4`. @@ -3979,11 +3983,6 @@ defined in table :ref:`amdgpu-amdhsa-code-object-metadata-map-table-v4`. Code Object V5 Metadata +++++++++++++++++++++++ -.. warning:: - Code object V5 is not the default code object version emitted by this version - of LLVM. - - Code object V5 metadata is the same as :ref:`amdgpu-amdhsa-code-object-metadata-v4` with the changes defined in table :ref:`amdgpu-amdhsa-code-object-metadata-map-table-v5`, table diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst index c17c834c8081b8a..471b43462a65015 100644 --- a/llvm/docs/ReleaseNotes.rst +++ b/llvm/docs/ReleaseNotes.rst @@ -115,6 +115,8 @@ Changes to the AMDGPU Backend * Implemented :ref:`llvm.get.rounding <int_get_rounding>` +* The default :ref:`AMDHSA code object version <amdgpu-amdhsa-code-object-metadata-v5>` is now 5. + Changes to the ARM Backend -------------------------- diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index f1c05446bf60690..0bf9452d822e970 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -33,7 +33,7 @@ static llvm::cl::opt<unsigned> DefaultAMDHSACodeObjectVersion( "amdhsa-code-object-version", llvm::cl::Hidden, - llvm::cl::init(llvm::AMDGPU::AMDHSA_COV4), + llvm::cl::init(llvm::AMDGPU::AMDHSA_COV5), llvm::cl::desc("Set default AMDHSA Code Object Version (module flag " "or asm directive still take priority if present)")); diff --git a/mlir/lib/Dialect/GPU/Transforms/SerializeToHsaco.cpp b/mlir/lib/Dialect/GPU/Transforms/SerializeToHsaco.cpp index 5cce7befce5283b..eee7a680f5b3bf9 100644 --- a/mlir/lib/Dialect/GPU/Transforms/SerializeToHsaco.cpp +++ b/mlir/lib/Dialect/GPU/Transforms/SerializeToHsaco.cpp @@ -264,7 +264,7 @@ SerializeToHsacoPass::translateToLLVMIR(llvm::LLVMContext &llvmContext) { // This constant must always match the default code object ABI version // of the AMDGPU backend. - addControlConstant("__oclc_ABI_version", 400, 32); + addControlConstant("__oclc_ABI_version", 500, 32); } // Determine libraries we need to link - order matters due to dependencies diff --git a/mlir/lib/Target/LLVMIR/Dialect/ROCDL/ROCDLToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/ROCDL/ROCDLToLLVMIRTranslation.cpp index cbce23fd580e755..a230ead7c188314 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/ROCDL/ROCDLToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/ROCDL/ROCDLToLLVMIRTranslation.cpp @@ -99,6 +99,7 @@ class ROCDLDialectLLVMIRTranslationInterface if (!llvmFunc->hasFnAttribute("amdgpu-flat-work-group-size")) { llvmFunc->addFnAttr("amdgpu-flat-work-group-size", "1,256"); } + llvmFunc->addFnAttr("amdgpu-implicitarg-num-bytes", "256"); } // Override flat-work-group-size // TODO: update clients to rocdl.flat_work_group_size instead, diff --git a/mlir/test/Target/LLVMIR/rocdl.mlir b/mlir/test/Target/LLVMIR/rocdl.mlir index 3c9c70711ae2304..f831d7bba864c8f 100644 --- a/mlir/test/Target/LLVMIR/rocdl.mlir +++ b/mlir/test/Target/LLVMIR/rocdl.mlir @@ -489,7 +489,7 @@ llvm.func @rocdl_8bit_floats(%source: i32, %stoch: i32) -> i32 { llvm.return %source5 : i32 } -// CHECK-DAG: attributes #[[$KERNEL_ATTRS]] = { "amdgpu-flat-work-group-size"="1,256" } +// CHECK-DAG: attributes #[[$KERNEL_ATTRS]] = { "amdgpu-flat-work-group-size"="1,256" "amdgpu-implicitarg-num-bytes"="256" } // CHECK-DAG: attributes #[[$KERNEL_WORKGROUP_ATTRS]] = { "amdgpu-flat-work-group-size"="1,1024" // CHECK-DAG: attributes #[[$KNOWN_BLOCK_SIZE_ATTRS]] = { "amdgpu-flat-work-group-size"="128,128" // CHECK-DAG: ![[$RANGE]] = !{i32 0, i32 64} >From ce8289e2f22ef158155f821b1114e9539cb33779 Mon Sep 17 00:00:00 2001 From: Saiyedul Islam <saiyedul.is...@amd.com> Date: Mon, 22 Jan 2024 13:19:51 -0600 Subject: [PATCH 2/2] [AMDGPU] Update llvm-objdump lit tests for COV5 Depends on #79038 which makes cov5 as the default code object version. --- llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx10.s | 4 ++++ llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx11.s | 4 ++++ llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx12.s | 2 ++ llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx90a.s | 3 +++ llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-sgpr.s | 3 +++ llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-vgpr.s | 3 +++ llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx10.s | 1 + 7 files changed, 20 insertions(+) diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx10.s b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx10.s index 58b01031afe383e..781729d5c4cc1a4 100644 --- a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx10.s +++ b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx10.s @@ -48,6 +48,7 @@ ; CHECK-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0 ; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0 ; CHECK-NEXT: .amdhsa_wavefront_size32 1 +; CHECK-NEXT: .amdhsa_uses_dynamic_stack 0 ; CHECK-NEXT: .end_amdhsa_kernel .amdhsa_kernel kernel .amdhsa_next_free_vgpr 32 @@ -101,6 +102,7 @@ ; CHECK-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0 ; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0 ; CHECK-NEXT: .amdhsa_wavefront_size32 0 +; CHECK-NEXT: .amdhsa_uses_dynamic_stack 0 ; CHECK-NEXT: .end_amdhsa_kernel .amdhsa_kernel kernel .amdhsa_next_free_vgpr 32 @@ -154,6 +156,7 @@ ; CHECK-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0 ; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0 ; CHECK-NEXT: .amdhsa_wavefront_size32 0 +; CHECK-NEXT: .amdhsa_uses_dynamic_stack 0 ; CHECK-NEXT: .end_amdhsa_kernel .amdhsa_kernel kernel .amdhsa_next_free_vgpr 32 @@ -207,6 +210,7 @@ ; CHECK-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0 ; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0 ; CHECK-NEXT: .amdhsa_wavefront_size32 0 +; CHECK-NEXT: .amdhsa_uses_dynamic_stack 0 ; CHECK-NEXT: .end_amdhsa_kernel .amdhsa_kernel kernel .amdhsa_next_free_vgpr 32 diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx11.s b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx11.s index 2133002908d9fc8..019c20754f389d7 100644 --- a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx11.s +++ b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx11.s @@ -49,6 +49,7 @@ ; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0 ; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0 ; CHECK-NEXT: .amdhsa_wavefront_size32 1 +; CHECK-NEXT: .amdhsa_uses_dynamic_stack 0 ; CHECK-NEXT: .end_amdhsa_kernel .amdhsa_kernel kernel .amdhsa_next_free_vgpr 32 @@ -103,6 +104,7 @@ ; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0 ; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0 ; CHECK-NEXT: .amdhsa_wavefront_size32 0 +; CHECK-NEXT: .amdhsa_uses_dynamic_stack 0 ; CHECK-NEXT: .end_amdhsa_kernel .amdhsa_kernel kernel .amdhsa_next_free_vgpr 32 @@ -157,6 +159,7 @@ ; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0 ; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0 ; CHECK-NEXT: .amdhsa_wavefront_size32 0 +; CHECK-NEXT: .amdhsa_uses_dynamic_stack 0 ; CHECK-NEXT: .end_amdhsa_kernel .amdhsa_kernel kernel .amdhsa_next_free_vgpr 32 @@ -211,6 +214,7 @@ ; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0 ; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0 ; CHECK-NEXT: .amdhsa_wavefront_size32 0 +; CHECK-NEXT: .amdhsa_uses_dynamic_stack 0 ; CHECK-NEXT: .end_amdhsa_kernel .amdhsa_kernel kernel .amdhsa_next_free_vgpr 32 diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx12.s b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx12.s index e1d312d6035cb71..86af4810059ecd0 100644 --- a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx12.s +++ b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx12.s @@ -46,6 +46,7 @@ ; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0 ; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0 ; CHECK-NEXT: .amdhsa_wavefront_size32 1 +; CHECK-NEXT: .amdhsa_uses_dynamic_stack 0 ; CHECK-NEXT: .end_amdhsa_kernel .amdhsa_kernel kernel .amdhsa_next_free_vgpr 32 @@ -97,6 +98,7 @@ ; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0 ; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0 ; CHECK-NEXT: .amdhsa_wavefront_size32 0 +; CHECK-NEXT: .amdhsa_uses_dynamic_stack 0 ; CHECK-NEXT: .end_amdhsa_kernel .amdhsa_kernel kernel .amdhsa_next_free_vgpr 32 diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx90a.s b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx90a.s index d26189451829f71..4978f6974fd33ce 100644 --- a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx90a.s +++ b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx90a.s @@ -45,6 +45,7 @@ ; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0 ; CHECK-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0 ; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0 +; CHECK-NEXT: .amdhsa_uses_dynamic_stack 0 ; CHECK-NEXT: .end_amdhsa_kernel .amdhsa_kernel kernel .amdhsa_next_free_vgpr 0 @@ -95,6 +96,7 @@ ; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0 ; CHECK-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0 ; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0 +; CHECK-NEXT: .amdhsa_uses_dynamic_stack 0 ; CHECK-NEXT: .end_amdhsa_kernel .amdhsa_kernel kernel .amdhsa_next_free_vgpr 32 @@ -145,6 +147,7 @@ ; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0 ; CHECK-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0 ; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0 +; CHECK-NEXT: .amdhsa_uses_dynamic_stack 0 ; CHECK-NEXT: .amdhsa_user_sgpr_kernarg_preload_length 2 ; CHECK-NEXT: .amdhsa_user_sgpr_kernarg_preload_offset 1 ; CHECK-NEXT: .end_amdhsa_kernel diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-sgpr.s b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-sgpr.s index 1f6f134cd67eff4..a40cf1d37769321 100644 --- a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-sgpr.s +++ b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-sgpr.s @@ -44,6 +44,7 @@ ; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0 ; CHECK-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0 ; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0 +; CHECK-NEXT: .amdhsa_uses_dynamic_stack 0 ; CHECK-NEXT: .end_amdhsa_kernel .amdhsa_kernel kernel .amdhsa_next_free_vgpr 0 @@ -95,6 +96,7 @@ ; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0 ; CHECK-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0 ; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0 +; CHECK-NEXT: .amdhsa_uses_dynamic_stack 0 ; CHECK-NEXT: .end_amdhsa_kernel .amdhsa_kernel kernel .amdhsa_next_free_vgpr 0 @@ -146,6 +148,7 @@ ; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0 ; CHECK-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0 ; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0 +; CHECK-NEXT: .amdhsa_uses_dynamic_stack 0 ; CHECK-NEXT: .end_amdhsa_kernel .amdhsa_kernel kernel .amdhsa_next_free_vgpr 0 diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-vgpr.s b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-vgpr.s index 4d385a1c885780b..b6b9c91b1424627 100644 --- a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-vgpr.s +++ b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-vgpr.s @@ -43,6 +43,7 @@ ; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0 ; CHECK-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0 ; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0 +; CHECK-NEXT: .amdhsa_uses_dynamic_stack 0 ; CHECK-NEXT: .end_amdhsa_kernel .amdhsa_kernel kernel .amdhsa_next_free_vgpr 23 @@ -90,6 +91,7 @@ ; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0 ; CHECK-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0 ; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0 +; CHECK-NEXT: .amdhsa_uses_dynamic_stack 0 ; CHECK-NEXT: .end_amdhsa_kernel .amdhsa_kernel kernel .amdhsa_next_free_vgpr 14 @@ -137,6 +139,7 @@ ; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0 ; CHECK-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0 ; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0 +; CHECK-NEXT: .amdhsa_uses_dynamic_stack 0 ; CHECK-NEXT: .end_amdhsa_kernel .amdhsa_kernel kernel .amdhsa_next_free_vgpr 32 diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx10.s b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx10.s index 39cef4da4278df2..39739c957350cb2 100644 --- a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx10.s +++ b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx10.s @@ -62,6 +62,7 @@ ; OBJDUMP-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0 ; OBJDUMP-NEXT: .amdhsa_user_sgpr_private_segment_size 0 ; OBJDUMP-NEXT: .amdhsa_wavefront_size32 0 +; OBJDUMP-NEXT: .amdhsa_uses_dynamic_stack 0 ; OBJDUMP-NEXT: .end_amdhsa_kernel .amdhsa_kernel my_kernel _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits