https://github.com/kosarev updated https://github.com/llvm/llvm-project/pull/147541
>From 33e0843a1b66383e88f8bee4d51ce382209eb4a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= <nicolai.haeh...@amd.com> Date: Mon, 7 Jul 2025 11:34:03 +0100 Subject: [PATCH 1/2] [AMDGPU][Clang] Support bfloat16 arithmetic. Co-authored-by: Ivan Kosarev <ivan.kosa...@amd.com> --- clang/lib/Basic/Targets/AMDGPU.h | 2 ++ clang/test/CodeGen/AMDGPU/full-bf16.c | 23 +++++++++++++++++++++++ 2 files changed, 25 insertions(+) create mode 100644 clang/test/CodeGen/AMDGPU/full-bf16.c diff --git a/clang/lib/Basic/Targets/AMDGPU.h b/clang/lib/Basic/Targets/AMDGPU.h index 509128f3cf070..006c2fe475a94 100644 --- a/clang/lib/Basic/Targets/AMDGPU.h +++ b/clang/lib/Basic/Targets/AMDGPU.h @@ -451,6 +451,8 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUTargetInfo final : public TargetInfo { CUMode = false; else if (F == "+image-insts") HasImage = true; + else if (F == "+gfx950-insts" || F == "+gfx1250-insts") + HasFullBFloat16 = true; bool IsOn = F.front() == '+'; StringRef Name = StringRef(F).drop_front(); if (!llvm::is_contained(TargetIDFeatures, Name)) diff --git a/clang/test/CodeGen/AMDGPU/full-bf16.c b/clang/test/CodeGen/AMDGPU/full-bf16.c new file mode 100644 index 0000000000000..7770d02c59df1 --- /dev/null +++ b/clang/test/CodeGen/AMDGPU/full-bf16.c @@ -0,0 +1,23 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa-gnu -target-cpu gfx950 -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa-gnu -target-cpu gfx1250 -emit-llvm -o - %s | FileCheck %s + +// CHECK-LABEL: define dso_local bfloat @div( +// CHECK-SAME: bfloat noundef [[A:%.*]], bfloat noundef [[B:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca bfloat, align 2, addrspace(5) +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca bfloat, align 2, addrspace(5) +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca bfloat, align 2, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// CHECK-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr +// CHECK-NEXT: store bfloat [[A]], ptr [[A_ADDR_ASCAST]], align 2 +// CHECK-NEXT: store bfloat [[B]], ptr [[B_ADDR_ASCAST]], align 2 +// CHECK-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[A_ADDR_ASCAST]], align 2 +// CHECK-NEXT: [[TMP1:%.*]] = load bfloat, ptr [[B_ADDR_ASCAST]], align 2 +// CHECK-NEXT: [[DIV:%.*]] = fdiv bfloat [[TMP0]], [[TMP1]] +// CHECK-NEXT: ret bfloat [[DIV]] +// +__bf16 div(__bf16 a, __bf16 b) { + return a / b; +} >From 4fedbbd3ea544d48d332adbf8555e7609b631ecd Mon Sep 17 00:00:00 2001 From: Ivan Kosarev <ivan.kosa...@amd.com> Date: Tue, 8 Jul 2025 16:25:01 +0100 Subject: [PATCH 2/2] Enable bf16 arithmetic for all subtargets. --- clang/lib/Basic/Targets/AMDGPU.h | 3 +-- clang/test/CodeGen/AMDGPU/full-bf16.c | 5 +++++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/clang/lib/Basic/Targets/AMDGPU.h b/clang/lib/Basic/Targets/AMDGPU.h index 006c2fe475a94..1358abb70f984 100644 --- a/clang/lib/Basic/Targets/AMDGPU.h +++ b/clang/lib/Basic/Targets/AMDGPU.h @@ -439,6 +439,7 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUTargetInfo final : public TargetInfo { // pre-defined macros. bool handleTargetFeatures(std::vector<std::string> &Features, DiagnosticsEngine &Diags) override { + HasFullBFloat16 = true; auto TargetIDFeatures = getAllPossibleTargetIDFeatures(getTriple(), getArchNameAMDGCN(GPUKind)); for (const auto &F : Features) { @@ -451,8 +452,6 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUTargetInfo final : public TargetInfo { CUMode = false; else if (F == "+image-insts") HasImage = true; - else if (F == "+gfx950-insts" || F == "+gfx1250-insts") - HasFullBFloat16 = true; bool IsOn = F.front() == '+'; StringRef Name = StringRef(F).drop_front(); if (!llvm::is_contained(TargetIDFeatures, Name)) diff --git a/clang/test/CodeGen/AMDGPU/full-bf16.c b/clang/test/CodeGen/AMDGPU/full-bf16.c index 7770d02c59df1..d2ec34561cd8e 100644 --- a/clang/test/CodeGen/AMDGPU/full-bf16.c +++ b/clang/test/CodeGen/AMDGPU/full-bf16.c @@ -1,5 +1,10 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa-gnu -target-cpu tahiti -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa-gnu -target-cpu fiji -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa-gnu -target-cpu gfx900 -emit-llvm -o - %s | FileCheck %s // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa-gnu -target-cpu gfx950 -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa-gnu -target-cpu gfx1010 -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa-gnu -target-cpu gfx1100 -emit-llvm -o - %s | FileCheck %s // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa-gnu -target-cpu gfx1250 -emit-llvm -o - %s | FileCheck %s // CHECK-LABEL: define dso_local bfloat @div( _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits