Author: Fabian Ritter Date: 2025-02-19T10:11:48+01:00 New Revision: 029c8e783d17d55541b308ee6db5429d54cb5153
URL: https://github.com/llvm/llvm-project/commit/029c8e783d17d55541b308ee6db5429d54cb5153 DIFF: https://github.com/llvm/llvm-project/commit/029c8e783d17d55541b308ee6db5429d54cb5153.diff LOG: [AMDGPU][clang] Replace gfx940 and gfx941 with gfx942 in clang (#126762) gfx940 and gfx941 are no longer supported. This is one of a series of PRs to remove them from the code base. This PR removes all occurrences of gfx940/gfx941 from clang that can be removed without changes in the llvm directory. The target-invalid-cpu-note/amdgcn.c test is not included here since it tests a list of targets that is defined in llvm/lib/TargetParser/TargetParser.cpp. For SWDEV-512631 Added: clang/test/CodeGenOpenCL/builtins-amdgcn-gfx942.cl clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx942.cl clang/test/SemaOpenCL/builtins-amdgcn-error-gfx942-param.cl clang/test/SemaOpenCL/builtins-amdgcn-gfx942-err.cl Modified: clang/include/clang/Basic/Cuda.h clang/lib/Basic/Cuda.cpp clang/lib/Basic/Targets/NVPTX.cpp clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu clang/test/CodeGenOpenCL/amdgpu-features.cl clang/test/CodeGenOpenCL/builtins-amdgcn-fp8.cl clang/test/CodeGenOpenCL/builtins-amdgcn-gfx950-err.cl clang/test/CodeGenOpenCL/builtins-amdgcn-gws-insts.cl clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl clang/test/Driver/amdgpu-macros.cl clang/test/Driver/amdgpu-mcpu.cl clang/test/Driver/cuda-bad-arch.cu clang/test/Driver/hip-macros.hip clang/test/Misc/target-invalid-cpu-note/nvptx.c clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950.cl Removed: clang/test/CodeGenOpenCL/builtins-amdgcn-gfx940.cl clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx940.cl clang/test/SemaOpenCL/builtins-amdgcn-error-gfx940-param.cl clang/test/SemaOpenCL/builtins-amdgcn-gfx940-err.cl ################################################################################ diff --git a/clang/include/clang/Basic/Cuda.h b/clang/include/clang/Basic/Cuda.h index f33ba46233a7a..793cab1f4e84a 100644 --- a/clang/include/clang/Basic/Cuda.h +++ b/clang/include/clang/Basic/Cuda.h @@ -106,8 +106,6 @@ enum class OffloadArch { GFX90a, GFX90c, GFX9_4_GENERIC, - GFX940, - GFX941, GFX942, GFX950, GFX10_1_GENERIC, diff --git a/clang/lib/Basic/Cuda.cpp b/clang/lib/Basic/Cuda.cpp index 1bfec0b37c5ee..f45fb0eca3714 100644 --- a/clang/lib/Basic/Cuda.cpp +++ b/clang/lib/Basic/Cuda.cpp @@ -124,8 +124,6 @@ static const OffloadArchToStringMap arch_names[] = { GFX(90a), // gfx90a GFX(90c), // gfx90c {OffloadArch::GFX9_4_GENERIC, "gfx9-4-generic", "compute_amdgcn"}, - GFX(940), // gfx940 - GFX(941), // gfx941 GFX(942), // gfx942 GFX(950), // gfx950 {OffloadArch::GFX10_1_GENERIC, "gfx10-1-generic", "compute_amdgcn"}, diff --git a/clang/lib/Basic/Targets/NVPTX.cpp b/clang/lib/Basic/Targets/NVPTX.cpp index 7d13c1f145440..547cf3dfa2be7 100644 --- a/clang/lib/Basic/Targets/NVPTX.cpp +++ b/clang/lib/Basic/Targets/NVPTX.cpp @@ -211,8 +211,6 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts, case OffloadArch::GFX90a: case OffloadArch::GFX90c: case OffloadArch::GFX9_4_GENERIC: - case OffloadArch::GFX940: - case OffloadArch::GFX941: case OffloadArch::GFX942: case OffloadArch::GFX950: case OffloadArch::GFX10_1_GENERIC: diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp index c13928f61a748..826ec4da8ea28 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -2302,8 +2302,6 @@ void CGOpenMPRuntimeGPU::processRequiresDirective(const OMPRequiresDecl *D) { case OffloadArch::GFX90a: case OffloadArch::GFX90c: case OffloadArch::GFX9_4_GENERIC: - case OffloadArch::GFX940: - case OffloadArch::GFX941: case OffloadArch::GFX942: case OffloadArch::GFX950: case OffloadArch::GFX10_1_GENERIC: diff --git a/clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu b/clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu index 47fa3967fe237..37fca614c3111 100644 --- a/clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu +++ b/clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu @@ -11,7 +11,7 @@ // RUN: -fnative-half-arguments-and-returns | FileCheck -check-prefix=SAFE %s // RUN: %clang_cc1 -x hip %s -O3 -S -o - -triple=amdgcn-amd-amdhsa \ -// RUN: -fcuda-is-device -target-cpu gfx940 -fnative-half-type \ +// RUN: -fcuda-is-device -target-cpu gfx942 -fnative-half-type \ // RUN: -fnative-half-arguments-and-returns -munsafe-fp-atomics \ // RUN: | FileCheck -check-prefix=UNSAFE %s diff --git a/clang/test/CodeGenOpenCL/amdgpu-features.cl b/clang/test/CodeGenOpenCL/amdgpu-features.cl index 633f1dec5e370..d12dcead6fadf 100644 --- a/clang/test/CodeGenOpenCL/amdgpu-features.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-features.cl @@ -29,8 +29,6 @@ // RUN: %clang_cc1 -triple amdgcn -target-cpu gfx909 -emit-llvm -o - %s | FileCheck --check-prefix=GFX909 %s // RUN: %clang_cc1 -triple amdgcn -target-cpu gfx90a -emit-llvm -o - %s | FileCheck --check-prefix=GFX90A %s // RUN: %clang_cc1 -triple amdgcn -target-cpu gfx90c -emit-llvm -o - %s | FileCheck --check-prefix=GFX90C %s -// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx940 -emit-llvm -o - %s | FileCheck --check-prefix=GFX940 %s -// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx941 -emit-llvm -o - %s | FileCheck --check-prefix=GFX941 %s // RUN: %clang_cc1 -triple amdgcn -target-cpu gfx942 -emit-llvm -o - %s | FileCheck --check-prefix=GFX942 %s // RUN: %clang_cc1 -triple amdgcn -target-cpu gfx950 -emit-llvm -o - %s | FileCheck --check-prefix=GFX950 %s // RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1010 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1010 %s @@ -85,8 +83,6 @@ // GFX909: "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" // GFX90A: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" // GFX90C: "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" -// GFX940: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,+xf32-insts" -// GFX941: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,+xf32-insts" // GFX942: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,+xf32-insts" // GFX9_4_Generic: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" // GFX950: "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+bf8-cvt-scale-insts,+bitop3-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot12-insts,+dot13-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+f16bf16-to-fp6bf6-cvt-scale-insts,+f32-to-f16bf16-cvt-sr-insts,+fp4-cvt-scale-insts,+fp6bf6-cvt-scale-insts,+fp8-conversion-insts,+fp8-cvt-scale-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+gfx950-insts,+mai-insts,+permlane16-swap,+permlane32-swap,+prng-inst,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-fp8.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-fp8.cl index 6593a8de566f6..f300b05fe798a 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-fp8.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-fp8.cl @@ -1,5 +1,5 @@ // REQUIRES: amdgpu-registered-target -// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx940 -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx942 -emit-llvm -o - %s | FileCheck %s // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1200 -emit-llvm -o - %s | FileCheck %s typedef float v2f __attribute__((ext_vector_type(2))); diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx940.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx942.cl similarity index 98% rename from clang/test/CodeGenOpenCL/builtins-amdgcn-gfx940.cl rename to clang/test/CodeGenOpenCL/builtins-amdgcn-gfx942.cl index a2f14c652c828..789f6e07240d7 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx940.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx942.cl @@ -1,5 +1,5 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// RUN: %clang_cc1 -cl-std=CL2.0 -O0 -triple amdgcn-unknown-unknown -target-cpu gfx940 -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -cl-std=CL2.0 -O0 -triple amdgcn-unknown-unknown -target-cpu gfx942 -emit-llvm -o - %s | FileCheck %s // REQUIRES: amdgpu-registered-target typedef unsigned int u32; diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx950-err.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx950-err.cl index 521121f5e7e54..c91cf158948b9 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx950-err.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx950-err.cl @@ -2,7 +2,7 @@ // RUN: -verify -o - %s // RUN: %clang_cc1 -O0 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -target-cpu gfx90a -emit-llvm \ // RUN: -verify -o - %s -// RUN: %clang_cc1 -O0 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -target-cpu gfx940 -emit-llvm \ +// RUN: %clang_cc1 -O0 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -target-cpu gfx942 -emit-llvm \ // RUN: -verify -o - %s // RUN: %clang_cc1 -O0 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -target-cpu gfx1200 -emit-llvm \ // RUN: -verify -o - %s diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gws-insts.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gws-insts.cl index 45d2fa18efd53..b3367202f824e 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gws-insts.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gws-insts.cl @@ -5,7 +5,7 @@ // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx906 -emit-llvm -o - %s | FileCheck %s // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx90a -emit-llvm -o - %s | FileCheck %s // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx90c -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx940 -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx942 -emit-llvm -o - %s | FileCheck %s // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1010 -emit-llvm -o - %s | FileCheck %s // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1030 -emit-llvm -o - %s | FileCheck %s // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1100 -emit-llvm -o - %s | FileCheck %s diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl index 00346baa6ff84..79083c3c5f0f9 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl @@ -1,7 +1,7 @@ // REQUIRES: amdgpu-registered-target // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx908 -DMFMA_GFX908_TESTS -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-GFX908 // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx90a -DMFMA_GFX90A_TESTS -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-GFX90A -// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx940 -DMFMA_GFX940_TESTS -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-GFX940 +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx942 -DMFMA_GFX942_TESTS -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-GFX942 // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx950 -DMFMA_GFX950_TESTS -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-GFX950 #pragma OPENCL EXTENSION cl_khr_fp64:enable @@ -226,189 +226,189 @@ void test_mfma_f64_4x4x4f64(global double* out, double a, double b, double c) #endif // MFMA_GFX90A_TESTS -#if defined(MFMA_GFX940_TESTS) || defined(MFMA_GFX950_TESTS) -// CHECK-GFX940-LABEL: @test_mfma_i32_16x16x32_i8 -// CHECK-GFX940: call <4 x i32> @llvm.amdgcn.mfma.i32.16x16x32.i8(i64 %a, i64 %b, <4 x i32> %c, i32 0, i32 0, i32 0) +#if defined(MFMA_GFX942_TESTS) || defined(MFMA_GFX950_TESTS) +// CHECK-GFX942-LABEL: @test_mfma_i32_16x16x32_i8 +// CHECK-GFX942: call <4 x i32> @llvm.amdgcn.mfma.i32.16x16x32.i8(i64 %a, i64 %b, <4 x i32> %c, i32 0, i32 0, i32 0) void test_mfma_i32_16x16x32_i8(global v4i* out, long a, long b, v4i c) { *out = __builtin_amdgcn_mfma_i32_16x16x32_i8(a, b, c, 0, 0, 0); } -// CHECK-GFX940-LABEL: @test_mfma_i32_32x32x16_i8 -// CHECK-GFX940: call <16 x i32> @llvm.amdgcn.mfma.i32.32x32x16.i8(i64 %a, i64 %b, <16 x i32> %c, i32 0, i32 0, i32 0) +// CHECK-GFX942-LABEL: @test_mfma_i32_32x32x16_i8 +// CHECK-GFX942: call <16 x i32> @llvm.amdgcn.mfma.i32.32x32x16.i8(i64 %a, i64 %b, <16 x i32> %c, i32 0, i32 0, i32 0) void test_mfma_i32_32x32x16_i8(global v16i* out, long a, long b, v16i c) { *out = __builtin_amdgcn_mfma_i32_32x32x16_i8(a, b, c, 0, 0, 0); } -// CHECK-GFX940-LABEL: @test_mfma_f32_16x16x8_xf32 -// CHECK-GFX940: call <4 x float> @llvm.amdgcn.mfma.f32.16x16x8.xf32(<2 x float> %a, <2 x float> %b, <4 x float> %c, i32 0, i32 0, i32 0) +// CHECK-GFX942-LABEL: @test_mfma_f32_16x16x8_xf32 +// CHECK-GFX942: call <4 x float> @llvm.amdgcn.mfma.f32.16x16x8.xf32(<2 x float> %a, <2 x float> %b, <4 x float> %c, i32 0, i32 0, i32 0) void test_mfma_f32_16x16x8_xf32(global v4f* out, v2f a, v2f b, v4f c) { *out = __builtin_amdgcn_mfma_f32_16x16x8_xf32(a, b, c, 0, 0, 0); } -// CHECK-GFX940-LABEL: @test_mfma_f32_32x32x4_xf32 -// CHECK-GFX940: call <16 x float> @llvm.amdgcn.mfma.f32.32x32x4.xf32(<2 x float> %a, <2 x float> %b, <16 x float> %c, i32 0, i32 0, i32 0) +// CHECK-GFX942-LABEL: @test_mfma_f32_32x32x4_xf32 +// CHECK-GFX942: call <16 x float> @llvm.amdgcn.mfma.f32.32x32x4.xf32(<2 x float> %a, <2 x float> %b, <16 x float> %c, i32 0, i32 0, i32 0) void test_mfma_f32_32x32x4_xf32(global v16f* out, v2f a, v2f b, v16f c) { *out = __builtin_amdgcn_mfma_f32_32x32x4_xf32(a, b, c, 0, 0, 0); } -// CHECK-GFX940-LABEL: @test_mfma_f32_16x16x32_bf8_bf8 -// CHECK-GFX940: call <4 x float> @llvm.amdgcn.mfma.f32.16x16x32.bf8.bf8(i64 %a, i64 %b, <4 x float> %c, i32 0, i32 0, i32 0) +// CHECK-GFX942-LABEL: @test_mfma_f32_16x16x32_bf8_bf8 +// CHECK-GFX942: call <4 x float> @llvm.amdgcn.mfma.f32.16x16x32.bf8.bf8(i64 %a, i64 %b, <4 x float> %c, i32 0, i32 0, i32 0) void test_mfma_f32_16x16x32_bf8_bf8(global v4f* out, long a, long b, v4f c) { *out = __builtin_amdgcn_mfma_f32_16x16x32_bf8_bf8(a, b, c, 0, 0, 0); } -// CHECK-GFX940-LABEL: @test_mfma_f32_16x16x32_bf8_fp8 -// CHECK-GFX940: call <4 x float> @llvm.amdgcn.mfma.f32.16x16x32.bf8.fp8(i64 %a, i64 %b, <4 x float> %c, i32 0, i32 0, i32 0) +// CHECK-GFX942-LABEL: @test_mfma_f32_16x16x32_bf8_fp8 +// CHECK-GFX942: call <4 x float> @llvm.amdgcn.mfma.f32.16x16x32.bf8.fp8(i64 %a, i64 %b, <4 x float> %c, i32 0, i32 0, i32 0) void test_mfma_f32_16x16x32_bf8_fp8(global v4f* out, long a, long b, v4f c) { *out = __builtin_amdgcn_mfma_f32_16x16x32_bf8_fp8(a, b, c, 0, 0, 0); } -// CHECK-GFX940-LABEL: @test_mfma_f32_16x16x32_fp8_bf8 -// CHECK-GFX940: call <4 x float> @llvm.amdgcn.mfma.f32.16x16x32.fp8.bf8(i64 %a, i64 %b, <4 x float> %c, i32 0, i32 0, i32 0) +// CHECK-GFX942-LABEL: @test_mfma_f32_16x16x32_fp8_bf8 +// CHECK-GFX942: call <4 x float> @llvm.amdgcn.mfma.f32.16x16x32.fp8.bf8(i64 %a, i64 %b, <4 x float> %c, i32 0, i32 0, i32 0) void test_mfma_f32_16x16x32_fp8_bf8(global v4f* out, long a, long b, v4f c) { *out = __builtin_amdgcn_mfma_f32_16x16x32_fp8_bf8(a, b, c, 0, 0, 0); } -// CHECK-GFX940-LABEL: @test_mfma_f32_16x16x32_fp8_fp8 -// CHECK-GFX940: call <4 x float> @llvm.amdgcn.mfma.f32.16x16x32.fp8.fp8(i64 %a, i64 %b, <4 x float> %c, i32 0, i32 0, i32 0) +// CHECK-GFX942-LABEL: @test_mfma_f32_16x16x32_fp8_fp8 +// CHECK-GFX942: call <4 x float> @llvm.amdgcn.mfma.f32.16x16x32.fp8.fp8(i64 %a, i64 %b, <4 x float> %c, i32 0, i32 0, i32 0) void test_mfma_f32_16x16x32_fp8_fp8(global v4f* out, long a, long b, v4f c) { *out = __builtin_amdgcn_mfma_f32_16x16x32_fp8_fp8(a, b, c, 0, 0, 0); } -// CHECK-GFX940-LABEL: @test_mfma_f32_32x32x16_bf8_bf8 -// CHECK-GFX940: call <16 x float> @llvm.amdgcn.mfma.f32.32x32x16.bf8.bf8(i64 %a, i64 %b, <16 x float> %c, i32 0, i32 0, i32 0) +// CHECK-GFX942-LABEL: @test_mfma_f32_32x32x16_bf8_bf8 +// CHECK-GFX942: call <16 x float> @llvm.amdgcn.mfma.f32.32x32x16.bf8.bf8(i64 %a, i64 %b, <16 x float> %c, i32 0, i32 0, i32 0) void test_mfma_f32_32x32x16_bf8_bf8(global v16f* out, long a, long b, v16f c) { *out = __builtin_amdgcn_mfma_f32_32x32x16_bf8_bf8(a, b, c, 0, 0, 0); } -// CHECK-GFX940-LABEL: @test_mfma_f32_32x32x16_bf8_fp8 -// CHECK-GFX940: call <16 x float> @llvm.amdgcn.mfma.f32.32x32x16.bf8.fp8(i64 %a, i64 %b, <16 x float> %c, i32 0, i32 0, i32 0) +// CHECK-GFX942-LABEL: @test_mfma_f32_32x32x16_bf8_fp8 +// CHECK-GFX942: call <16 x float> @llvm.amdgcn.mfma.f32.32x32x16.bf8.fp8(i64 %a, i64 %b, <16 x float> %c, i32 0, i32 0, i32 0) void test_mfma_f32_32x32x16_bf8_fp8(global v16f* out, long a, long b, v16f c) { *out = __builtin_amdgcn_mfma_f32_32x32x16_bf8_fp8(a, b, c, 0, 0, 0); } -// CHECK-GFX940-LABEL: @test_mfma_f32_32x32x16_fp8_bf8 -// CHECK-GFX940: call <16 x float> @llvm.amdgcn.mfma.f32.32x32x16.fp8.bf8(i64 %a, i64 %b, <16 x float> %c, i32 0, i32 0, i32 0) +// CHECK-GFX942-LABEL: @test_mfma_f32_32x32x16_fp8_bf8 +// CHECK-GFX942: call <16 x float> @llvm.amdgcn.mfma.f32.32x32x16.fp8.bf8(i64 %a, i64 %b, <16 x float> %c, i32 0, i32 0, i32 0) void test_mfma_f32_32x32x16_fp8_bf8(global v16f* out, long a, long b, v16f c) { *out = __builtin_amdgcn_mfma_f32_32x32x16_fp8_bf8(a, b, c, 0, 0, 0); } -// CHECK-GFX940-LABEL: @test_mfma_f32_32x32x16_fp8_fp8 -// CHECK-GFX940: call <16 x float> @llvm.amdgcn.mfma.f32.32x32x16.fp8.fp8(i64 %a, i64 %b, <16 x float> %c, i32 0, i32 0, i32 0) +// CHECK-GFX942-LABEL: @test_mfma_f32_32x32x16_fp8_fp8 +// CHECK-GFX942: call <16 x float> @llvm.amdgcn.mfma.f32.32x32x16.fp8.fp8(i64 %a, i64 %b, <16 x float> %c, i32 0, i32 0, i32 0) void test_mfma_f32_32x32x16_fp8_fp8(global v16f* out, long a, long b, v16f c) { *out = __builtin_amdgcn_mfma_f32_32x32x16_fp8_fp8(a, b, c, 0, 0, 0); } -// CHECK-GFX940-LABEL: @test_smfmac_f32_16x16x32_f16 -// CHECK-GFX940: call <4 x float> @llvm.amdgcn.smfmac.f32.16x16x32.f16(<4 x half> %a, <8 x half> %b, <4 x float> %c, i32 %idx, i32 0, i32 0) +// CHECK-GFX942-LABEL: @test_smfmac_f32_16x16x32_f16 +// CHECK-GFX942: call <4 x float> @llvm.amdgcn.smfmac.f32.16x16x32.f16(<4 x half> %a, <8 x half> %b, <4 x float> %c, i32 %idx, i32 0, i32 0) void test_smfmac_f32_16x16x32_f16(global v4f* out, v4h a, v8h b, v4f c, int idx) { *out = __builtin_amdgcn_smfmac_f32_16x16x32_f16(a, b, c, idx, 0, 0); } -// CHECK-GFX940-LABEL: @test_smfmac_f32_32x32x16_f16 -// CHECK-GFX940: call <16 x float> @llvm.amdgcn.smfmac.f32.32x32x16.f16(<4 x half> %a, <8 x half> %b, <16 x float> %c, i32 %idx, i32 0, i32 0) +// CHECK-GFX942-LABEL: @test_smfmac_f32_32x32x16_f16 +// CHECK-GFX942: call <16 x float> @llvm.amdgcn.smfmac.f32.32x32x16.f16(<4 x half> %a, <8 x half> %b, <16 x float> %c, i32 %idx, i32 0, i32 0) void test_smfmac_f32_32x32x16_f16(global v16f* out, v4h a, v8h b, v16f c, int idx) { *out = __builtin_amdgcn_smfmac_f32_32x32x16_f16(a, b, c, idx, 0, 0); } -// CHECK-GFX940-LABEL: @test_smfmac_f32_16x16x32_bf16 -// CHECK-GFX940: call <4 x float> @llvm.amdgcn.smfmac.f32.16x16x32.bf16(<4 x i16> %a, <8 x i16> %b, <4 x float> %c, i32 %idx, i32 0, i32 0) +// CHECK-GFX942-LABEL: @test_smfmac_f32_16x16x32_bf16 +// CHECK-GFX942: call <4 x float> @llvm.amdgcn.smfmac.f32.16x16x32.bf16(<4 x i16> %a, <8 x i16> %b, <4 x float> %c, i32 %idx, i32 0, i32 0) void test_smfmac_f32_16x16x32_bf16(global v4f* out, v4s a, v8s b, v4f c, int idx) { *out = __builtin_amdgcn_smfmac_f32_16x16x32_bf16(a, b, c, idx, 0, 0); } -// CHECK-GFX940-LABEL: @test_smfmac_f32_32x32x16_bf16 -// CHECK-GFX940: call <16 x float> @llvm.amdgcn.smfmac.f32.32x32x16.bf16(<4 x i16> %a, <8 x i16> %b, <16 x float> %c, i32 %idx, i32 0, i32 0) +// CHECK-GFX942-LABEL: @test_smfmac_f32_32x32x16_bf16 +// CHECK-GFX942: call <16 x float> @llvm.amdgcn.smfmac.f32.32x32x16.bf16(<4 x i16> %a, <8 x i16> %b, <16 x float> %c, i32 %idx, i32 0, i32 0) void test_smfmac_f32_32x32x16_bf16(global v16f* out, v4s a, v8s b, v16f c, int idx) { *out = __builtin_amdgcn_smfmac_f32_32x32x16_bf16(a, b, c, idx, 0, 0); } -// CHECK-GFX940-LABEL: @test_smfmac_i32_16x16x64_i8 -// CHECK-GFX940: call <4 x i32> @llvm.amdgcn.smfmac.i32.16x16x64.i8(<2 x i32> %a, <4 x i32> %b, <4 x i32> %c, i32 %idx, i32 0, i32 0) +// CHECK-GFX942-LABEL: @test_smfmac_i32_16x16x64_i8 +// CHECK-GFX942: call <4 x i32> @llvm.amdgcn.smfmac.i32.16x16x64.i8(<2 x i32> %a, <4 x i32> %b, <4 x i32> %c, i32 %idx, i32 0, i32 0) void test_smfmac_i32_16x16x64_i8(global v4i* out, v2i a, v4i b, v4i c, int idx) { *out = __builtin_amdgcn_smfmac_i32_16x16x64_i8(a, b, c, idx, 0, 0); } -// CHECK-GFX940-LABEL: @test_smfmac_i32_32x32x32_i8 -// CHECK-GFX940: call <16 x i32> @llvm.amdgcn.smfmac.i32.32x32x32.i8(<2 x i32> %a, <4 x i32> %b, <16 x i32> %c, i32 %idx, i32 0, i32 0) +// CHECK-GFX942-LABEL: @test_smfmac_i32_32x32x32_i8 +// CHECK-GFX942: call <16 x i32> @llvm.amdgcn.smfmac.i32.32x32x32.i8(<2 x i32> %a, <4 x i32> %b, <16 x i32> %c, i32 %idx, i32 0, i32 0) void test_smfmac_i32_32x32x32_i8(global v16i* out, v2i a, v4i b, v16i c, int idx) { *out = __builtin_amdgcn_smfmac_i32_32x32x32_i8(a, b, c, idx, 0, 0); } -// CHECK-GFX940-LABEL: @test_smfmac_f32_16x16x64_bf8_bf8 -// CHECK-GFX940: call <4 x float> @llvm.amdgcn.smfmac.f32.16x16x64.bf8.bf8(<2 x i32> %a, <4 x i32> %b, <4 x float> %c, i32 %idx, i32 0, i32 0) +// CHECK-GFX942-LABEL: @test_smfmac_f32_16x16x64_bf8_bf8 +// CHECK-GFX942: call <4 x float> @llvm.amdgcn.smfmac.f32.16x16x64.bf8.bf8(<2 x i32> %a, <4 x i32> %b, <4 x float> %c, i32 %idx, i32 0, i32 0) void test_smfmac_f32_16x16x64_bf8_bf8(global v4f* out, v2i a, v4i b, v4f c, int idx) { *out = __builtin_amdgcn_smfmac_f32_16x16x64_bf8_bf8(a, b, c, idx, 0, 0); } -// CHECK-GFX940-LABEL: @test_smfmac_f32_16x16x64_bf8_fp8 -// CHECK-GFX940: call <4 x float> @llvm.amdgcn.smfmac.f32.16x16x64.bf8.fp8(<2 x i32> %a, <4 x i32> %b, <4 x float> %c, i32 %idx, i32 0, i32 0) +// CHECK-GFX942-LABEL: @test_smfmac_f32_16x16x64_bf8_fp8 +// CHECK-GFX942: call <4 x float> @llvm.amdgcn.smfmac.f32.16x16x64.bf8.fp8(<2 x i32> %a, <4 x i32> %b, <4 x float> %c, i32 %idx, i32 0, i32 0) void test_smfmac_f32_16x16x64_bf8_fp8(global v4f* out, v2i a, v4i b, v4f c, int idx) { *out = __builtin_amdgcn_smfmac_f32_16x16x64_bf8_fp8(a, b, c, idx, 0, 0); } -// CHECK-GFX940-LABEL: @test_smfmac_f32_16x16x64_fp8_bf8 -// CHECK-GFX940: call <4 x float> @llvm.amdgcn.smfmac.f32.16x16x64.fp8.bf8(<2 x i32> %a, <4 x i32> %b, <4 x float> %c, i32 %idx, i32 0, i32 0) +// CHECK-GFX942-LABEL: @test_smfmac_f32_16x16x64_fp8_bf8 +// CHECK-GFX942: call <4 x float> @llvm.amdgcn.smfmac.f32.16x16x64.fp8.bf8(<2 x i32> %a, <4 x i32> %b, <4 x float> %c, i32 %idx, i32 0, i32 0) void test_smfmac_f32_16x16x64_fp8_bf8(global v4f* out, v2i a, v4i b, v4f c, int idx) { *out = __builtin_amdgcn_smfmac_f32_16x16x64_fp8_bf8(a, b, c, idx, 0, 0); } -// CHECK-GFX940-LABEL: @test_smfmac_f32_16x16x64_fp8_fp8 -// CHECK-GFX940: call <4 x float> @llvm.amdgcn.smfmac.f32.16x16x64.fp8.fp8(<2 x i32> %a, <4 x i32> %b, <4 x float> %c, i32 %idx, i32 0, i32 0) +// CHECK-GFX942-LABEL: @test_smfmac_f32_16x16x64_fp8_fp8 +// CHECK-GFX942: call <4 x float> @llvm.amdgcn.smfmac.f32.16x16x64.fp8.fp8(<2 x i32> %a, <4 x i32> %b, <4 x float> %c, i32 %idx, i32 0, i32 0) void test_smfmac_f32_16x16x64_fp8_fp8(global v4f* out, v2i a, v4i b, v4f c, int idx) { *out = __builtin_amdgcn_smfmac_f32_16x16x64_fp8_fp8(a, b, c, idx, 0, 0); } -// CHECK-GFX940-LABEL: @test_smfmac_f32_32x32x32_bf8_bf8 -// CHECK-GFX940: call <16 x float> @llvm.amdgcn.smfmac.f32.32x32x32.bf8.bf8(<2 x i32> %a, <4 x i32> %b, <16 x float> %c, i32 %idx, i32 0, i32 0) +// CHECK-GFX942-LABEL: @test_smfmac_f32_32x32x32_bf8_bf8 +// CHECK-GFX942: call <16 x float> @llvm.amdgcn.smfmac.f32.32x32x32.bf8.bf8(<2 x i32> %a, <4 x i32> %b, <16 x float> %c, i32 %idx, i32 0, i32 0) void test_smfmac_f32_32x32x32_bf8_bf8(global v16f* out, v2i a, v4i b, v16f c, int idx) { *out = __builtin_amdgcn_smfmac_f32_32x32x32_bf8_bf8(a, b, c, idx, 0, 0); } -// CHECK-GFX940-LABEL: @test_smfmac_f32_32x32x32_bf8_fp8 -// CHECK-GFX940: call <16 x float> @llvm.amdgcn.smfmac.f32.32x32x32.bf8.fp8(<2 x i32> %a, <4 x i32> %b, <16 x float> %c, i32 %idx, i32 0, i32 0) +// CHECK-GFX942-LABEL: @test_smfmac_f32_32x32x32_bf8_fp8 +// CHECK-GFX942: call <16 x float> @llvm.amdgcn.smfmac.f32.32x32x32.bf8.fp8(<2 x i32> %a, <4 x i32> %b, <16 x float> %c, i32 %idx, i32 0, i32 0) void test_smfmac_f32_32x32x32_bf8_fp8(global v16f* out, v2i a, v4i b, v16f c, int idx) { *out = __builtin_amdgcn_smfmac_f32_32x32x32_bf8_fp8(a, b, c, idx, 0, 0); } -// CHECK-GFX940-LABEL: @test_smfmac_f32_32x32x32_fp8_bf8 -// CHECK-GFX940: call <16 x float> @llvm.amdgcn.smfmac.f32.32x32x32.fp8.bf8(<2 x i32> %a, <4 x i32> %b, <16 x float> %c, i32 %idx, i32 0, i32 0) +// CHECK-GFX942-LABEL: @test_smfmac_f32_32x32x32_fp8_bf8 +// CHECK-GFX942: call <16 x float> @llvm.amdgcn.smfmac.f32.32x32x32.fp8.bf8(<2 x i32> %a, <4 x i32> %b, <16 x float> %c, i32 %idx, i32 0, i32 0) void test_smfmac_f32_32x32x32_fp8_bf8(global v16f* out, v2i a, v4i b, v16f c, int idx) { *out = __builtin_amdgcn_smfmac_f32_32x32x32_fp8_bf8(a, b, c, idx, 0, 0); } -// CHECK-GFX940-LABEL: @test_smfmac_f32_32x32x32_fp8_fp8 -// CHECK-GFX940: call <16 x float> @llvm.amdgcn.smfmac.f32.32x32x32.fp8.fp8(<2 x i32> %a, <4 x i32> %b, <16 x float> %c, i32 %idx, i32 0, i32 0) +// CHECK-GFX942-LABEL: @test_smfmac_f32_32x32x32_fp8_fp8 +// CHECK-GFX942: call <16 x float> @llvm.amdgcn.smfmac.f32.32x32x32.fp8.fp8(<2 x i32> %a, <4 x i32> %b, <16 x float> %c, i32 %idx, i32 0, i32 0) void test_smfmac_f32_32x32x32_fp8_fp8(global v16f* out, v2i a, v4i b, v16f c, int idx) { *out = __builtin_amdgcn_smfmac_f32_32x32x32_fp8_fp8(a, b, c, idx, 0, 0); } -#endif // defined(MFMA_GFX940_TESTS) || defined(MFMA_GFX950_TESTS) +#endif // defined(MFMA_GFX942_TESTS) || defined(MFMA_GFX950_TESTS) #ifdef MFMA_GFX950_TESTS diff --git a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx940.cl b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx942.cl similarity index 84% rename from clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx940.cl rename to clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx942.cl index 832d7df00db14..24d05fe3a8525 100644 --- a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx940.cl +++ b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx942.cl @@ -1,8 +1,8 @@ -// RUN: %clang_cc1 -O0 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -target-cpu gfx940 \ +// RUN: %clang_cc1 -O0 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -target-cpu gfx942 \ // RUN: %s -emit-llvm -o - | FileCheck %s -// RUN: %clang_cc1 -O0 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -target-cpu gfx940 \ -// RUN: -S -o - %s | FileCheck -check-prefix=GFX940 %s +// RUN: %clang_cc1 -O0 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -target-cpu gfx942 \ +// RUN: -S -o - %s | FileCheck -check-prefix=GFX942 %s // REQUIRES: amdgpu-registered-target @@ -12,8 +12,8 @@ typedef short __attribute__((ext_vector_type(2))) short2; // CHECK-LABEL: test_flat_add_f32 // CHECK: [[RMW:%.+]] = atomicrmw fadd ptr %{{.+}}, float %{{.+}} syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+}}, !amdgpu.ignore.denormal.mode !{{[0-9]+$}} -// GFX940-LABEL: test_flat_add_f32 -// GFX940: flat_atomic_add_f32 +// GFX942-LABEL: test_flat_add_f32 +// GFX942: flat_atomic_add_f32 half2 test_flat_add_f32(__generic float *addr, float x) { return __builtin_amdgcn_flat_atomic_fadd_f32(addr, x); } @@ -21,8 +21,8 @@ half2 test_flat_add_f32(__generic float *addr, float x) { // CHECK-LABEL: test_flat_add_2f16 // CHECK: [[RMW:%.+]] = atomicrmw fadd ptr %{{.+}}, <2 x half> %{{.+}} syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} -// GFX940-LABEL: test_flat_add_2f16 -// GFX940: flat_atomic_pk_add_f16 +// GFX942-LABEL: test_flat_add_2f16 +// GFX942: flat_atomic_pk_add_f16 half2 test_flat_add_2f16(__generic half2 *addr, half2 x) { return __builtin_amdgcn_flat_atomic_fadd_v2f16(addr, x); } @@ -32,8 +32,8 @@ half2 test_flat_add_2f16(__generic half2 *addr, half2 x) { // CHECK: [[RMW:%.+]] = atomicrmw fadd ptr %{{.+}}, <2 x bfloat> [[BC0]] syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} // CHECK-NEXT: bitcast <2 x bfloat> [[RMW]] to <2 x i16> -// GFX940-LABEL: test_flat_add_2bf16 -// GFX940: flat_atomic_pk_add_bf16 +// GFX942-LABEL: test_flat_add_2bf16 +// GFX942: flat_atomic_pk_add_bf16 short2 test_flat_add_2bf16(__generic short2 *addr, short2 x) { return __builtin_amdgcn_flat_atomic_fadd_v2bf16(addr, x); } @@ -43,8 +43,8 @@ short2 test_flat_add_2bf16(__generic short2 *addr, short2 x) { // CHECK: [[RMW:%.+]] = atomicrmw fadd ptr addrspace(1) %{{.+}}, <2 x bfloat> [[BC0]] syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} // CHECK-NEXT: bitcast <2 x bfloat> [[RMW]] to <2 x i16> -// GFX940-LABEL: test_global_add_2bf16 -// GFX940: global_atomic_pk_add_bf16 +// GFX942-LABEL: test_global_add_2bf16 +// GFX942: global_atomic_pk_add_bf16 short2 test_global_add_2bf16(__global short2 *addr, short2 x) { return __builtin_amdgcn_global_atomic_fadd_v2bf16(addr, x); } @@ -55,24 +55,24 @@ short2 test_global_add_2bf16(__global short2 *addr, short2 x) { // CHECK: [[RMW:%.+]] = atomicrmw fadd ptr addrspace(3) %{{.+}}, <2 x bfloat> [[BC0]] syncscope("agent") monotonic, align 4{{$}} // CHECK-NEXT: bitcast <2 x bfloat> [[RMW]] to <2 x i16> -// GFX940-LABEL: test_local_add_2bf16 -// GFX940: ds_pk_add_rtn_bf16 +// GFX942-LABEL: test_local_add_2bf16 +// GFX942: ds_pk_add_rtn_bf16 short2 test_local_add_2bf16(__local short2 *addr, short2 x) { return __builtin_amdgcn_ds_atomic_fadd_v2bf16(addr, x); } // CHECK-LABEL: test_local_add_2f16 // CHECK: = atomicrmw fadd ptr addrspace(3) %{{.+}}, <2 x half> %{{.+}} monotonic, align 4 -// GFX940-LABEL: test_local_add_2f16 -// GFX940: ds_pk_add_rtn_f16 +// GFX942-LABEL: test_local_add_2f16 +// GFX942: ds_pk_add_rtn_f16 half2 test_local_add_2f16(__local half2 *addr, half2 x) { return __builtin_amdgcn_ds_atomic_fadd_v2f16(addr, x); } // CHECK-LABEL: test_local_add_2f16_noret // CHECK: = atomicrmw fadd ptr addrspace(3) %{{.+}}, <2 x half> %{{.+}} monotonic, align 4 -// GFX940-LABEL: test_local_add_2f16_noret -// GFX940: ds_pk_add_f16 +// GFX942-LABEL: test_local_add_2f16_noret +// GFX942: ds_pk_add_f16 void test_local_add_2f16_noret(__local half2 *addr, half2 x) { __builtin_amdgcn_ds_atomic_fadd_v2f16(addr, x); } diff --git a/clang/test/Driver/amdgpu-macros.cl b/clang/test/Driver/amdgpu-macros.cl index d97b2ddb1fc66..35dc190761ca4 100644 --- a/clang/test/Driver/amdgpu-macros.cl +++ b/clang/test/Driver/amdgpu-macros.cl @@ -107,8 +107,6 @@ // RUN: %clang -E -dM -target amdgcn -mcpu=gfx909 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=64 -DCPU=gfx909 -DFAMILY=GFX9 // RUN: %clang -E -dM -target amdgcn -mcpu=gfx90a %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=64 -DCPU=gfx90a -DFAMILY=GFX9 // RUN: %clang -E -dM -target amdgcn -mcpu=gfx90c %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=64 -DCPU=gfx90c -DFAMILY=GFX9 -// RUN: %clang -E -dM -target amdgcn -mcpu=gfx940 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=64 -DCPU=gfx940 -DFAMILY=GFX9 -// RUN: %clang -E -dM -target amdgcn -mcpu=gfx941 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=64 -DCPU=gfx941 -DFAMILY=GFX9 // RUN: %clang -E -dM -target amdgcn -mcpu=gfx942 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=64 -DCPU=gfx942 -DFAMILY=GFX9 // RUN: %clang -E -dM -target amdgcn -mcpu=gfx950 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=64 -DCPU=gfx950 -DFAMILY=GFX9 // RUN: %clang -E -dM -target amdgcn -mcpu=gfx1010 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1010 -DFAMILY=GFX10 diff --git a/clang/test/Driver/amdgpu-mcpu.cl b/clang/test/Driver/amdgpu-mcpu.cl index 7c34d3ec6c63a..ad5fd8ebaa6a6 100644 --- a/clang/test/Driver/amdgpu-mcpu.cl +++ b/clang/test/Driver/amdgpu-mcpu.cl @@ -92,8 +92,6 @@ // RUN: %clang -### -target amdgcn -mcpu=gfx909 %s 2>&1 | FileCheck --check-prefix=GFX909 %s // RUN: %clang -### -target amdgcn -mcpu=gfx90a %s 2>&1 | FileCheck --check-prefix=GFX90A %s // RUN: %clang -### -target amdgcn -mcpu=gfx90c %s 2>&1 | FileCheck --check-prefix=GFX90C %s -// RUN: %clang -### -target amdgcn -mcpu=gfx940 %s 2>&1 | FileCheck --check-prefix=GFX940 %s -// RUN: %clang -### -target amdgcn -mcpu=gfx941 %s 2>&1 | FileCheck --check-prefix=GFX941 %s // RUN: %clang -### -target amdgcn -mcpu=gfx942 %s 2>&1 | FileCheck --check-prefix=GFX942 %s // RUN: %clang -### -target amdgcn -mcpu=gfx950 %s 2>&1 | FileCheck --check-prefix=GFX950 %s // RUN: %clang -### -target amdgcn -mcpu=gfx1010 %s 2>&1 | FileCheck --check-prefix=GFX1010 %s @@ -148,8 +146,6 @@ // GFX909: "-target-cpu" "gfx909" // GFX90A: "-target-cpu" "gfx90a" // GFX90C: "-target-cpu" "gfx90c" -// GFX940: "-target-cpu" "gfx940" -// GFX941: "-target-cpu" "gfx941" // GFX942: "-target-cpu" "gfx942" // GFX950: "-target-cpu" "gfx950" // GFX1010: "-target-cpu" "gfx1010" diff --git a/clang/test/Driver/cuda-bad-arch.cu b/clang/test/Driver/cuda-bad-arch.cu index 8c8c5c3401329..85231a5b9705a 100644 --- a/clang/test/Driver/cuda-bad-arch.cu +++ b/clang/test/Driver/cuda-bad-arch.cu @@ -23,7 +23,7 @@ // RUN: | FileCheck -check-prefix OK %s // RUN: %clang -### -x hip --target=x86_64-linux-gnu -nogpulib -nogpuinc --cuda-gpu-arch=gfx90a -c %s 2>&1 \ // RUN: | FileCheck -check-prefix OK %s -// RUN: %clang -### -x hip --target=x86_64-linux-gnu -nogpulib -nogpuinc --cuda-gpu-arch=gfx940 -c %s 2>&1 \ +// RUN: %clang -### -x hip --target=x86_64-linux-gnu -nogpulib -nogpuinc --cuda-gpu-arch=gfx942 -c %s 2>&1 \ // RUN: | FileCheck -check-prefix OK %s // We don't allow using NVPTX/AMDGCN for host compilation. diff --git a/clang/test/Driver/hip-macros.hip b/clang/test/Driver/hip-macros.hip index 3b3afba0b18ca..bd93f9985a774 100644 --- a/clang/test/Driver/hip-macros.hip +++ b/clang/test/Driver/hip-macros.hip @@ -49,15 +49,13 @@ // RUN: %s 2>&1 | FileCheck --check-prefixes=IMAGE,NOWARN %s // RUN: %clang -E -dM --offload-arch=gfx1100 --cuda-device-only -nogpuinc -nogpulib \ // RUN: %s 2>&1 | FileCheck --check-prefixes=IMAGE,NOWARN %s -// RUN: %clang -E -dM --offload-arch=gfx940 --cuda-device-only -nogpuinc -nogpulib \ -// RUN: %s 2>&1 | FileCheck --check-prefixes=NOIMAGE,NOWARN %s -// RUN: %clang -E -dM --offload-arch=gfx941 --cuda-device-only -nogpuinc -nogpulib \ +// RUN: %clang -E -dM --offload-arch=gfx942 --cuda-device-only -nogpuinc -nogpulib \ // RUN: %s 2>&1 | FileCheck --check-prefixes=NOIMAGE,NOWARN %s // RUN: %clang -E -dM --offload-arch=gfx942 --cuda-device-only -nogpuinc -nogpulib \ // RUN: %s 2>&1 | FileCheck --check-prefixes=NOIMAGE,NOWARN %s // RUN: %clang -E -dM --offload-arch=gfx1100 --cuda-device-only -nogpuinc -nogpulib \ // RUN: -Xclang -target-feature -Xclang "-image-insts" %s 2>&1 | FileCheck --check-prefixes=IMAGE,WARN %s -// RUN: %clang -E -dM --offload-arch=gfx940 --cuda-device-only -nogpuinc -nogpulib \ +// RUN: %clang -E -dM --offload-arch=gfx942 --cuda-device-only -nogpuinc -nogpulib \ // RUN: -Xclang -target-feature -Xclang "+image-insts" %s 2>&1 | FileCheck --check-prefixes=NOIMAGE,WARN %s // NOWARN-NOT: warning // WARN: warning: feature flag '{{[+|-]}}image-insts' is ignored since the feature is read only [-Winvalid-command-line-argument] @@ -68,9 +66,9 @@ // RUN: %clang -E -dM --offload-arch=gfx1100 -nogpuinc -nogpulib \ // RUN: -fgpu-default-stream=per-thread %s 2>&1 | FileCheck --check-prefixes=PTS %s -// RUN: %clang -E -dM --offload-arch=gfx940 --cuda-device-only -nogpuinc -nogpulib \ +// RUN: %clang -E -dM --offload-arch=gfx942 --cuda-device-only -nogpuinc -nogpulib \ // RUN: -fgpu-default-stream=legacy %s 2>&1 | FileCheck --check-prefixes=NOPTS %s -// RUN: %clang -E -dM --offload-arch=gfx940 --cuda-device-only -nogpuinc -nogpulib \ +// RUN: %clang -E -dM --offload-arch=gfx942 --cuda-device-only -nogpuinc -nogpulib \ // RUN: %s 2>&1 | FileCheck --check-prefixes=NOPTS %s // PTS-DAG: #define __HIP_API_PER_THREAD_DEFAULT_STREAM__ 1 // PTS-DAG: #define __HIP_API_PER_THREAD_DEFAULT_STREAM__ 1 diff --git a/clang/test/Misc/target-invalid-cpu-note/nvptx.c b/clang/test/Misc/target-invalid-cpu-note/nvptx.c index 3afcdf8c9fe5c..06ef72878340f 100644 --- a/clang/test/Misc/target-invalid-cpu-note/nvptx.c +++ b/clang/test/Misc/target-invalid-cpu-note/nvptx.c @@ -52,8 +52,6 @@ // CHECK-SAME: {{^}}, gfx90a // CHECK-SAME: {{^}}, gfx90c // CHECK-SAME: {{^}}, gfx9-4-generic -// CHECK-SAME: {{^}}, gfx940 -// CHECK-SAME: {{^}}, gfx941 // CHECK-SAME: {{^}}, gfx942 // CHECK-SAME: {{^}}, gfx950 // CHECK-SAME: {{^}}, gfx10-1-generic diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx940-param.cl b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx942-param.cl similarity index 99% rename from clang/test/SemaOpenCL/builtins-amdgcn-error-gfx940-param.cl rename to clang/test/SemaOpenCL/builtins-amdgcn-error-gfx942-param.cl index 0fc2304d51ce0..db0387e9878f2 100644 --- a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx940-param.cl +++ b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx942-param.cl @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx940 -verify -S -o - %s +// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx942 -verify -S -o - %s // RUN: %clang_cc1 -triple spirv64-amd-amdhsa -verify -S -o - %s typedef float v2f __attribute__((ext_vector_type(2))); diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950.cl b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950.cl index d1c134c604dfc..b40b1c841b453 100644 --- a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950.cl +++ b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950.cl @@ -1,5 +1,5 @@ // REQUIRES: amdgpu-registered-target -// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx940 -verify -S -o - %s +// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx942 -verify -S -o - %s typedef float float4 __attribute__((ext_vector_type(4))); typedef float float16 __attribute__((ext_vector_type(16))); diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-gfx940-err.cl b/clang/test/SemaOpenCL/builtins-amdgcn-gfx942-err.cl similarity index 81% rename from clang/test/SemaOpenCL/builtins-amdgcn-gfx940-err.cl rename to clang/test/SemaOpenCL/builtins-amdgcn-gfx942-err.cl index 7cf80f7c92677..0b3f692f33998 100644 --- a/clang/test/SemaOpenCL/builtins-amdgcn-gfx940-err.cl +++ b/clang/test/SemaOpenCL/builtins-amdgcn-gfx942-err.cl @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -cl-std=CL2.0 -O0 -triple amdgcn-unknown-unknown -target-cpu gfx940 -S -verify=gfx940,expected -o - %s +// RUN: %clang_cc1 -cl-std=CL2.0 -O0 -triple amdgcn-unknown-unknown -target-cpu gfx942 -S -verify=gfx942,expected -o - %s // RUN: %clang_cc1 -cl-std=CL2.0 -O0 -triple amdgcn-unknown-unknown -target-cpu gfx950 -S -verify=gfx950,expected -o - %s // REQUIRES: amdgpu-registered-target @@ -8,12 +8,12 @@ void test_global_load_lds_unsupported_size(global u32* src, local u32 *dst, u32 __builtin_amdgcn_global_load_lds(src, dst, size, /*offset=*/0, /*aux=*/0); // expected-error{{argument to '__builtin_amdgcn_global_load_lds' must be a constant integer}} __builtin_amdgcn_global_load_lds(src, dst, /*size=*/4, offset, /*aux=*/0); // expected-error{{argument to '__builtin_amdgcn_global_load_lds' must be a constant integer}} __builtin_amdgcn_global_load_lds(src, dst, /*size=*/4, /*offset=*/0, aux); // expected-error{{argument to '__builtin_amdgcn_global_load_lds' must be a constant integer}} - __builtin_amdgcn_global_load_lds(src, dst, /*size=*/5, /*offset=*/0, /*aux=*/0); // expected-error{{invalid size value}} gfx940-note {{size must be 1, 2, or 4}} gfx950-note {{size must be 1, 2, 4, 12 or 16}} - __builtin_amdgcn_global_load_lds(src, dst, /*size=*/0, /*offset=*/0, /*aux=*/0); // expected-error{{invalid size value}} gfx940-note {{size must be 1, 2, or 4}} gfx950-note {{size must be 1, 2, 4, 12 or 16}} - __builtin_amdgcn_global_load_lds(src, dst, /*size=*/3, /*offset=*/0, /*aux=*/0); // expected-error{{invalid size value}} gfx940-note {{size must be 1, 2, or 4}} gfx950-note {{size must be 1, 2, 4, 12 or 16}} - __builtin_amdgcn_global_load_lds(src, dst, /*size=*/12, /*offset=*/0, /*aux=*/0); // gfx940-error{{invalid size value}} gfx940-note {{size must be 1, 2, or 4}} - __builtin_amdgcn_global_load_lds(src, dst, /*size=*/16, /*offset=*/0, /*aux=*/0); // gfx940-error{{invalid size value}} gfx940-note {{size must be 1, 2, or 4}} - __builtin_amdgcn_global_load_lds(src, dst, /*size=*/-1, /*offset=*/0, /*aux=*/0); // expected-error{{invalid size value}} gfx940-note {{size must be 1, 2, or 4}} gfx950-note {{size must be 1, 2, 4, 12 or 16}} + __builtin_amdgcn_global_load_lds(src, dst, /*size=*/5, /*offset=*/0, /*aux=*/0); // expected-error{{invalid size value}} gfx942-note {{size must be 1, 2, or 4}} gfx950-note {{size must be 1, 2, 4, 12 or 16}} + __builtin_amdgcn_global_load_lds(src, dst, /*size=*/0, /*offset=*/0, /*aux=*/0); // expected-error{{invalid size value}} gfx942-note {{size must be 1, 2, or 4}} gfx950-note {{size must be 1, 2, 4, 12 or 16}} + __builtin_amdgcn_global_load_lds(src, dst, /*size=*/3, /*offset=*/0, /*aux=*/0); // expected-error{{invalid size value}} gfx942-note {{size must be 1, 2, or 4}} gfx950-note {{size must be 1, 2, 4, 12 or 16}} + __builtin_amdgcn_global_load_lds(src, dst, /*size=*/12, /*offset=*/0, /*aux=*/0); // gfx942-error{{invalid size value}} gfx942-note {{size must be 1, 2, or 4}} + __builtin_amdgcn_global_load_lds(src, dst, /*size=*/16, /*offset=*/0, /*aux=*/0); // gfx942-error{{invalid size value}} gfx942-note {{size must be 1, 2, or 4}} + __builtin_amdgcn_global_load_lds(src, dst, /*size=*/-1, /*offset=*/0, /*aux=*/0); // expected-error{{invalid size value}} gfx942-note {{size must be 1, 2, or 4}} gfx950-note {{size must be 1, 2, 4, 12 or 16}} } __attribute__((target("gfx950-insts"))) _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits