llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-backend-amdgpu Author: Saiyedul Islam (saiislam) <details> <summary>Changes</summary> Also update LIT tests and docs. For more details, see https://llvm.org/docs/AMDGPUUsage.html#code-object-v5-metadata --- Patch is 1.95 MiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/73000.diff 115 Files Affected: - (modified) clang/include/clang/Basic/TargetOptions.h (+1-1) - (modified) clang/include/clang/Driver/Options.td (+2-2) - (modified) clang/lib/Driver/ToolChains/CommonArgs.cpp (+1-1) - (modified) clang/test/CodeGen/amdgpu-address-spaces.cpp (+1-1) - (modified) clang/test/CodeGenCUDA/amdgpu-code-object-version.cu (+1-1) - (modified) clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu (+2-2) - (modified) clang/test/CodeGenHIP/default-attributes.hip (+2-2) - (modified) clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl (+2-2) - (modified) clang/test/CodeGenOpenCL/builtins-amdgcn.cl (+5-5) - (modified) clang/test/Driver/hip-device-libs.hip (+8-8) - (modified) lld/test/ELF/emulation-amdgpu.s (+1-1) - (modified) lld/test/ELF/lto/amdgcn-oses.ll (+1-1) - (modified) llvm/docs/AMDGPUUsage.rst (+7-8) - (modified) llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp (+2-2) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/crash-stack-address-O0.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/dropped_debug_info_assert.ll (+28-29) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-assert-align.ll (+8-8) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-atomicrmw.ll (+2-2) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll (+53-55) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll (+468-484) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll (+1578-1623) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll (+31-32) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll (+2013-2077) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constant-fold-vector-op.ll (+2-2) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll (+31-32) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll (+51-51) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll (+18-18) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-tail-call.ll (+2-2) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-addrspacecast.mir (+28-39) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.private.ll (+5-6) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.shared.ll (+5-6) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/non-entry-alloca.ll (+7-4) - (modified) llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll (+74-35) - (modified) llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll (+2-2) - (modified) llvm/test/CodeGen/AMDGPU/addrspacecast.gfx6.ll (+46-20) - (modified) llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow-codegen.ll (+30-30) - (modified) llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll (+12-12) - (modified) llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll (+8-7) - (modified) llvm/test/CodeGen/AMDGPU/attributor-noopt.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/blender-no-live-segment-at-def-implicit-def.ll (+32-35) - (modified) llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll (+397-379) - (modified) llvm/test/CodeGen/AMDGPU/call-alias-register-usage-agpr.ll (+3-3) - (modified) llvm/test/CodeGen/AMDGPU/call-alias-register-usage0.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/call-alias-register-usage1.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/call-alias-register-usage2.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/call-alias-register-usage3.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/call-argument-types.ll (+1219-1461) - (modified) llvm/test/CodeGen/AMDGPU/call-waitcnt.ll (+42-47) - (modified) llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs-fixed-abi.ll (+8-12) - (modified) llvm/test/CodeGen/AMDGPU/cc-update.ll (+144-160) - (modified) llvm/test/CodeGen/AMDGPU/cf-loop-on-constant.ll (+45-45) - (modified) llvm/test/CodeGen/AMDGPU/codegen-internal-only-func.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/collapse-endcf.ll (+10-10) - (modified) llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll (+28-32) - (modified) llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll (+20-20) - (modified) llvm/test/CodeGen/AMDGPU/dagcombine-lshr-and-cmp.ll (+2-4) - (modified) llvm/test/CodeGen/AMDGPU/ds_read2.ll (+21-25) - (modified) llvm/test/CodeGen/AMDGPU/dwarf-multi-register-use-crash.ll (+6-6) - (modified) llvm/test/CodeGen/AMDGPU/elf-header-osabi.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll (+17-19) - (modified) llvm/test/CodeGen/AMDGPU/fneg-fabs.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/gfx11-user-sgpr-init16-bug.ll (+19-12) - (modified) llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll (+550-605) - (modified) llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmax.ll (+330-363) - (modified) llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmin.ll (+330-363) - (modified) llvm/test/CodeGen/AMDGPU/global_atomics_scan_fsub.ll (+550-605) - (modified) llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll (+19-19) - (modified) llvm/test/CodeGen/AMDGPU/insert-delay-alu-bug.ll (+55-55) - (modified) llvm/test/CodeGen/AMDGPU/kernel-vgpr-spill-mubuf-with-voffset.ll (+25-32) - (modified) llvm/test/CodeGen/AMDGPU/lds-frame-extern.ll (+124-112) - (modified) llvm/test/CodeGen/AMDGPU/lds-global-non-entry-func.ll (+48-22) - (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.private.ll (+8-7) - (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.shared.ll (+8-7) - (modified) llvm/test/CodeGen/AMDGPU/llvm.dbg.value.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/lower-kernargs.ll (+118-118) - (modified) llvm/test/CodeGen/AMDGPU/lower-module-lds-via-hybrid.ll (+2-4) - (modified) llvm/test/CodeGen/AMDGPU/lower-module-lds-via-table.ll (+8-22) - (modified) llvm/test/CodeGen/AMDGPU/module-lds-false-sharing.ll (+48-44) - (modified) llvm/test/CodeGen/AMDGPU/need-fp-from-vgpr-spills.ll (+33-61) - (modified) llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll (+8-8) - (modified) llvm/test/CodeGen/AMDGPU/preserve-wwm-copy-dst-reg.ll (+72-72) - (modified) llvm/test/CodeGen/AMDGPU/promote-alloca-calling-conv.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/reqd-work-group-size.ll (+24-24) - (modified) llvm/test/CodeGen/AMDGPU/sgpr-spill-no-vgprs.ll (+2-2) - (modified) llvm/test/CodeGen/AMDGPU/sgpr-spill-update-only-slot-indexes.ll (+10-23) - (modified) llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll (+3-3) - (modified) llvm/test/CodeGen/AMDGPU/sopk-no-literal.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/spill-m0.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/stacksave_stackrestore.ll (+51-51) - (modified) llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-any.ll (+3-3) - (modified) llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-not-supported.ll (+3-3) - (modified) llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-off.ll (+3-3) - (modified) llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-on.ll (+3-3) - (modified) llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-off-1.ll (+3-3) - (modified) llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-off-2.ll (+3-3) - (modified) llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-on-1.ll (+3-3) - (modified) llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-on-2.ll (+3-3) - (modified) llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-any.ll (+3-3) - (modified) llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-not-supported.ll (+3-3) - (modified) llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-off.ll (+3-3) - (modified) llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-on.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll (+160-170) - (modified) llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll (+7-7) - (modified) llvm/test/CodeGen/AMDGPU/vgpr-spill-placement-issue61083.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/vgpr_constant_to_sgpr.ll (+25-36) - (modified) llvm/test/CodeGen/AMDGPU/wwm-reserved.ll (+112-108) - (modified) llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx10.s (+4) - (modified) llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx11.s (+4) - (modified) llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx90a.s (+3) - (modified) llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-sgpr.s (+3) - (modified) llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-vgpr.s (+3) - (modified) llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx10.s (+1) - (modified) mlir/lib/Dialect/GPU/Transforms/SerializeToHsaco.cpp (+1-1) - (modified) mlir/lib/Target/LLVMIR/Dialect/ROCDL/ROCDLToLLVMIRTranslation.cpp (+1-1) - (modified) mlir/test/Target/LLVMIR/rocdl.mlir (+1-1) ``````````diff diff --git a/clang/include/clang/Basic/TargetOptions.h b/clang/include/clang/Basic/TargetOptions.h index ba3acd029587160..2cd4b4203543c28 100644 --- a/clang/include/clang/Basic/TargetOptions.h +++ b/clang/include/clang/Basic/TargetOptions.h @@ -88,7 +88,7 @@ class TargetOptions { COV_5 = 500, }; /// \brief Code object version for AMDGPU. - CodeObjectVersionKind CodeObjectVersion = CodeObjectVersionKind::COV_None; + CodeObjectVersionKind CodeObjectVersion = CodeObjectVersionKind::COV_5; /// \brief Enumeration values for AMDGPU printf lowering scheme enum class AMDGPUPrintfKind { diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index df12ba8fbcb296a..89a3775f79396c7 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -4708,12 +4708,12 @@ defm amdgpu_ieee : BoolOption<"m", "amdgpu-ieee", NegFlag<SetFalse, [], [ClangOption, CC1Option]>>, Group<m_Group>; def mcode_object_version_EQ : Joined<["-"], "mcode-object-version=">, Group<m_Group>, - HelpText<"Specify code object ABI version. Defaults to 4. (AMDGPU only)">, + HelpText<"Specify code object ABI version. Defaults to 5. (AMDGPU only)">, Visibility<[ClangOption, CC1Option]>, Values<"none,4,5">, NormalizedValuesScope<"TargetOptions">, NormalizedValues<["COV_None", "COV_4", "COV_5"]>, - MarshallingInfoEnum<TargetOpts<"CodeObjectVersion">, "COV_4">; + MarshallingInfoEnum<TargetOpts<"CodeObjectVersion">, "COV_5">; defm cumode : SimpleMFlag<"cumode", "Specify CU wavefront", "Specify WGP wavefront", diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index 5d2cd1959b06925..09318c85a810261 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -2402,7 +2402,7 @@ void tools::checkAMDGPUCodeObjectVersion(const Driver &D, unsigned tools::getAMDGPUCodeObjectVersion(const Driver &D, const llvm::opt::ArgList &Args) { - unsigned CodeObjVer = 4; // default + unsigned CodeObjVer = 5; // default if (auto *CodeObjArg = getAMDGPUCodeObjectArgument(D, Args)) StringRef(CodeObjArg->getValue()).getAsInteger(0, CodeObjVer); return CodeObjVer; diff --git a/clang/test/CodeGen/amdgpu-address-spaces.cpp b/clang/test/CodeGen/amdgpu-address-spaces.cpp index 0a808aa6cc75ed3..ae2c61439f4ca53 100644 --- a/clang/test/CodeGen/amdgpu-address-spaces.cpp +++ b/clang/test/CodeGen/amdgpu-address-spaces.cpp @@ -29,7 +29,7 @@ int [[clang::address_space(999)]] bbb = 1234; // CHECK: @u = addrspace(5) global i32 undef, align 4 // CHECK: @aaa = addrspace(6) global i32 1000, align 4 // CHECK: @bbb = addrspace(999) global i32 1234, align 4 -// CHECK: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 400 +// CHECK: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 500 //. // CHECK-LABEL: define dso_local amdgpu_kernel void @foo( // CHECK-SAME: ) #[[ATTR0:[0-9]+]] { diff --git a/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu b/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu index ff5deaf9ab850d2..3cb6632fc0b63d3 100644 --- a/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu +++ b/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu @@ -1,7 +1,7 @@ // Create module flag for code object version. // RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \ -// RUN: -o - %s | FileCheck %s -check-prefix=V4 +// RUN: -o - %s | FileCheck %s -check-prefix=V5 // RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \ // RUN: -mcode-object-version=4 -o - %s | FileCheck -check-prefix=V4 %s diff --git a/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu b/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu index 282e0a49b9aa10b..0c846e0936b58b1 100644 --- a/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu +++ b/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu @@ -1,10 +1,10 @@ // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \ -// RUN: -fcuda-is-device -emit-llvm -o - -x hip %s \ +// RUN: -fcuda-is-device -mcode-object-version=4 -emit-llvm -o - -x hip %s \ // RUN: | FileCheck -check-prefix=PRECOV5 %s // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \ -// RUN: -fcuda-is-device -mcode-object-version=5 -emit-llvm -o - -x hip %s \ +// RUN: -fcuda-is-device -emit-llvm -o - -x hip %s \ // RUN: | FileCheck -check-prefix=COV5 %s // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \ diff --git a/clang/test/CodeGenHIP/default-attributes.hip b/clang/test/CodeGenHIP/default-attributes.hip index 80aa1ee0700628f..9c9ea521271b99b 100644 --- a/clang/test/CodeGenHIP/default-attributes.hip +++ b/clang/test/CodeGenHIP/default-attributes.hip @@ -46,11 +46,11 @@ __global__ void kernel() { // OPT: attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) "no-trapping-math"="true" "stack-protector-buffer-size"="8" } // OPT: attributes #1 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) "amdgpu-flat-work-group-size"="1,1024" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" } //. -// OPTNONE: !0 = !{i32 1, !"amdgpu_code_object_version", i32 400} +// OPTNONE: !0 = !{i32 1, !"amdgpu_code_object_version", i32 500} // OPTNONE: !1 = !{i32 1, !"amdgpu_printf_kind", !"hostcall"} // OPTNONE: !2 = !{i32 1, !"wchar_size", i32 4} //. -// OPT: !0 = !{i32 1, !"amdgpu_code_object_version", i32 400} +// OPT: !0 = !{i32 1, !"amdgpu_code_object_version", i32 500} // OPT: !1 = !{i32 1, !"amdgpu_printf_kind", !"hostcall"} // OPT: !2 = !{i32 1, !"wchar_size", i32 4} //. diff --git a/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl b/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl index e574b1f64c499bd..2cf1286e2b54e8e 100644 --- a/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl @@ -703,7 +703,7 @@ kernel void test_target_features_kernel(global int *i) { // GFX900: attributes #8 = { nounwind } // GFX900: attributes #9 = { convergent nounwind } //. -// NOCPU: !0 = !{i32 1, !"amdgpu_code_object_version", i32 400} +// NOCPU: !0 = !{i32 1, !"amdgpu_code_object_version", i32 500} // NOCPU: !1 = !{i32 1, !"wchar_size", i32 4} // NOCPU: !2 = !{i32 2, i32 0} // NOCPU: !3 = !{i32 1, i32 0, i32 1, i32 0} @@ -721,7 +721,7 @@ kernel void test_target_features_kernel(global int *i) { // NOCPU: !15 = !{i32 1} // NOCPU: !16 = !{!"int*"} //. -// GFX900: !0 = !{i32 1, !"amdgpu_code_object_version", i32 400} +// GFX900: !0 = !{i32 1, !"amdgpu_code_object_version", i32 500} // GFX900: !1 = !{i32 1, !"wchar_size", i32 4} // GFX900: !2 = !{i32 2, i32 0} // GFX900: !3 = !{!4, !4, i64 0} diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl index 0bc9a54682d3e31..8d9e4e018b12e5a 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl @@ -601,13 +601,13 @@ void test_get_local_id(int d, global int *out) } // CHECK-LABEL: @test_get_workgroup_size( -// CHECK: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() -// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 4 +// CHECK: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() +// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 12 // CHECK: load i16, ptr addrspace(4) %{{.*}}, align 4, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef -// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 6 +// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 14 // CHECK: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef -// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 8 -// CHECK: load i16, ptr addrspace(4) %{{.*}}, align 4, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef +// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 16 +// CHECK: load i16, ptr addrspace(4) %{{.*}}, align 8, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef void test_get_workgroup_size(int d, global int *out) { switch (d) { diff --git a/clang/test/Driver/hip-device-libs.hip b/clang/test/Driver/hip-device-libs.hip index 6ac5778721ba5b7..9a8c1f98dab30c5 100644 --- a/clang/test/Driver/hip-device-libs.hip +++ b/clang/test/Driver/hip-device-libs.hip @@ -160,13 +160,13 @@ // Test default code object version. // RUN: %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \ // RUN: --rocm-path=%S/Inputs/rocm %S/Inputs/hip_multiple_inputs/b.hip \ -// RUN: 2>&1 | FileCheck %s --check-prefixes=ABI4 +// RUN: 2>&1 | FileCheck %s --check-prefixes=ABI5 -// Test default code object version with old device library without abi_version_400.bc -// RUN: %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \ +// Test default code object version with old device library without abi_version_500.bc +// RUN: not %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \ // RUN: --hip-device-lib-path=%S/Inputs/rocm/amdgcn/bitcode-no-abi-ver \ // RUN: --rocm-path=%S/Inputs/rocm %S/Inputs/hip_multiple_inputs/b.hip \ -// RUN: 2>&1 | FileCheck %s --check-prefixes=NOABI4 +// RUN: 2>&1 | FileCheck %s --check-prefixes=NOABI5 // Test -mcode-object-version=4 // RUN: %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \ @@ -187,12 +187,12 @@ // RUN: --rocm-path=%S/Inputs/rocm %S/Inputs/hip_multiple_inputs/b.hip \ // RUN: 2>&1 | FileCheck %s --check-prefixes=ABI5 -// Test -mcode-object-version=5 with old device library without abi_version_400.bc -// RUN: not %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \ -// RUN: -mcode-object-version=5 \ +// Test -mcode-object-version=4 with old device library without abi_version_400.bc +// RUN: %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \ +// RUN: -mcode-object-version=4 \ // RUN: --hip-device-lib-path=%S/Inputs/rocm/amdgcn/bitcode-no-abi-ver \ // RUN: --rocm-path=%S/Inputs/rocm %S/Inputs/hip_multiple_inputs/b.hip \ -// RUN: 2>&1 | FileCheck %s --check-prefixes=NOABI5 +// RUN: 2>&1 | FileCheck %s --check-prefixes=NOABI4 // ALL-NOT: error: // ALL: {{"[^"]*clang[^"]*"}} diff --git a/lld/test/ELF/emulation-amdgpu.s b/lld/test/ELF/emulation-amdgpu.s index 707f0aeb909efae..329fb1c69b16665 100644 --- a/lld/test/ELF/emulation-amdgpu.s +++ b/lld/test/ELF/emulation-amdgpu.s @@ -13,7 +13,7 @@ # CHECK-NEXT: DataEncoding: LittleEndian (0x1) # CHECK-NEXT: FileVersion: 1 # CHECK-NEXT: OS/ABI: AMDGPU_HSA (0x40) -# CHECK-NEXT: ABIVersion: 2 +# CHECK-NEXT: ABIVersion: 3 # CHECK-NEXT: Unused: (00 00 00 00 00 00 00) # CHECK-NEXT: } # CHECK-NEXT: Type: Executable (0x2) diff --git a/lld/test/ELF/lto/amdgcn-oses.ll b/lld/test/ELF/lto/amdgcn-oses.ll index a2f25cdd57d87b5..a70b678ac25141c 100644 --- a/lld/test/ELF/lto/amdgcn-oses.ll +++ b/lld/test/ELF/lto/amdgcn-oses.ll @@ -15,7 +15,7 @@ ; RUN: llvm-readobj --file-headers %t/mesa3d.so | FileCheck %s --check-prefixes=GCN,NON-AMDHSA,MESA3D ; AMDHSA: OS/ABI: AMDGPU_HSA (0x40) -; AMDHSA: ABIVersion: 2 +; AMDHSA: ABIVersion: 3 ; AMDPAL: OS/ABI: AMDGPU_PAL (0x41) ; MESA3D: OS/ABI: AMDGPU_MESA3D (0x42) diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst index 3148d4bebb96b5f..b586f700302a5e5 100644 --- a/llvm/docs/AMDGPUUsage.rst +++ b/llvm/docs/AMDGPUUsage.rst @@ -1469,12 +1469,12 @@ The AMDGPU backend uses the following ELF header: * ``ELFABIVERSION_AMDGPU_HSA_V4`` is used to specify the version of AMD HSA runtime ABI for code object V4. Specify using the Clang option - ``-mcode-object-version=4``. This is the default code object - version if not specified. + ``-mcode-object-version=4``. * ``ELFABIVERSION_AMDGPU_HSA_V5`` is used to specify the version of AMD HSA runtime ABI for code object V5. Specify using the Clang option - ``-mcode-object-version=5``. + ``-mcode-object-version=5``. This is the default code object + version if not specified. * ``ELFABIVERSION_AMDGPU_PAL`` is used to specify the version of AMD PAL runtime ABI. @@ -3900,6 +3900,10 @@ same *vendor-name*. Code Object V4 Metadata +++++++++++++++++++++++ +. warning:: + Code object V4 is not the default code object version emitted by this version + of LLVM. + Code object V4 metadata is the same as :ref:`amdgpu-amdhsa-code-object-metadata-v3` with the changes and additions defined in table :ref:`amdgpu-amdhsa-code-object-metadata-map-table-v4`. @@ -3930,11 +3934,6 @@ defined in table :ref:`amdgpu-amdhsa-code-object-metadata-map-table-v4`. Code Object V5 Metadata +++++++++++++++++++++++ -.. warning:: - Code object V5 is not the default code object version emitted by this version - of LLVM. - - Code object V5 metadata is the same as :ref:`amdgpu-amdhsa-code-object-metadata-v4` with the changes defined in table :ref:`amdgpu-amdhsa-code-object-metadata-map-table-v5`, table diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index fdc59281c50d0b3..26a861de652f469 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -34,7 +34,7 @@ static llvm::cl::opt<unsigned> AmdhsaCodeObjectVersion("amdhsa-code-object-version", llvm::cl::Hidden, llvm::cl::desc("AMDHSA Code Object Version"), - llvm::cl::init(4)); + llvm::cl::init(5)); namespace { @@ -161,7 +161,7 @@ unsigned getCodeObjectVersion(const Module &M) { } // Default code object version. - return AMDHSA_COV4; + return AMDHSA_COV5; } unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion) { diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/crash-stack-address-O0.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/crash-stack-address-O0.ll index 0df80d67e77157c..a8f5e8e853ab862 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/crash-stack-address-O0.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/crash-stack-address-O0.ll @@ -7,7 +7,7 @@ define amdgpu_kernel void @stack_write_fi() { ; CHECK-LABEL: stack_write_fi: ; CHECK: ; %bb.0: ; %entry -; CHECK-NEXT: s_add_u32 s0, s0, s17 +; CHECK-NEXT: s_add_u32 s0, s0, s15 ; CHECK-NEXT: s_addc_u32 s1, s1, 0 ; CHECK-NEXT: s_mov_b32 s5, 0 ; CHECK-NEXT: s_mov_b32 s4, 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/dropped_debug_info_assert.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/dropped_debug_info_assert.ll index c4e383c3708b33a..4a593d26f809c8d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/dropped_debug_info_assert.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/dropped_debug_info_assert.ll @@ -7,43 +7,42 @@ declare void @callee() define amdgpu_kernel void @call_debug_loc() { ; CHECK-LABEL: name: call_debug_loc ; CHECK: bb.1.entry: - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2, debug-location !6 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1, debug-location !6 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0, debug-location !6 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16, debug-location !6 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15, debug-location !6 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14, debug-location !6 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11, debug-location !6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7, debug-location !6 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5, debug-location !6 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14, debug-location !6 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13, debug-location !6 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12, debug-location !6 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9, debug-location !6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5, debug-location !6 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY $sgpr6_sgpr7 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc, debug-location !6 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[COPY8]], debug-location !6 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[COPY7]], debug-location !6 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[COPY6]], debug-location !6 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_32 = COPY [[COPY5]], debug-location !6 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_32 = COPY [[COPY4]], debug-location !6 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:sreg_32 = COPY [[COPY3]], debug-location !6 - ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF debug-location !6 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[COPY7]], debug-location !6 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF debug-location !6 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[COPY6]], debug-location !6 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[COPY5]], debug-location !6 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[COPY4]], debug-location !6 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_32 = COPY [[COPY3]], debug-location !6 + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF debug-location !6 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 10, debug-location !6 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]], debug-location !6 - ; CHECK-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY16]], [[COPY1]], implicit $exec, debug-location !6 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]], debug-location !6 + ; CHECK-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY14]], [[COPY1]], implicit $exec, debug-location !6 ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 20, debug-location !6 - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]], debug-location !6 - ; CHECK-NEXT: [[V_LSHLREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY17]], [[COPY]], implicit $exec, debug-location !6 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]], debug-location !6 + ; CHECK-NEXT: [[V_LSHLREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY15]], [[COPY]], implicit $exec, debug-location !6 ; CHECK-NEXT: [[V_OR3_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR3_B32_e64 [[COPY2]], [[V_LSHLREV_B32_e64_]], [[V_LSHLREV_B32_e64_1]], implicit $exec, debug-location !6 - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3, debug-location !6 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]], debug-location !6 - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]], debug-location !6 - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]], debug-location !6 - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY9]], debug-location !6 - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]], debug-location !6 - ; CHECK-NEXT: $sgpr12 = COPY [[COPY13]], debug-location !6 - ; CHECK-NEXT: $sgpr13 = COPY [[COPY14]], debug-location !6 - ; CHECK-NEXT: $sgpr14 = COPY [[COPY15]], debug-location !6 - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]], debug-location !6 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3, debug-location !6 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]], debug-location !6 + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]], debug-location !6 + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF]], debug-location !6 + ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY8]], debug-location !6 + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY10]], debug-location !6 + ; CHECK-NEXT: $sgpr12 = COPY [[COPY11]], debug-location !6 + ; CHECK-NEXT: $sgpr13 = COPY [[COPY12]], debug-location !6 + ; CHECK-NEXT: $sgpr14 = COPY [[COPY13]], debug-location !6 + ; CHECK-NEXT: $s... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/73000 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits