https://github.com/E00N777 created https://github.com/llvm/llvm-project/pull/185852
Part of #185382. Lower `__builtin_neon_vduph_lane_bf16` and `__builtin_neon_vduph_laneq_bf16` in ClangIR to `cir.vec.extract`, and add dedicated AArch64 Neon BF16 tests. This is my first LLVM PR, so I'd really appreciate any suggestions on the implementation, test structure, or general LLVM contribution style. >From a992a797b28ca8b5445252955fc1584ac003f0f2 Mon Sep 17 00:00:00 2001 From: E0N777 <[email protected]> Date: Wed, 11 Mar 2026 18:20:45 +0800 Subject: [PATCH] [CIR][AArch64] Lower BF16 vduph lane builtins --- .../lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp | 12 ++++++-- clang/test/CodeGen/AArch64/neon/bf16-vduph.c | 30 +++++++++++++++++++ 2 files changed, 40 insertions(+), 2 deletions(-) create mode 100644 clang/test/CodeGen/AArch64/neon/bf16-vduph.c diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp index 5534e69b5f8bc..564d3e47a8c24 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp @@ -2802,10 +2802,8 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned builtinID, const CallExpr *expr, case NEON::BI__builtin_neon_vqdmlsls_lane_s32: case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: case NEON::BI__builtin_neon_vget_lane_bf16: - case NEON::BI__builtin_neon_vduph_lane_bf16: case NEON::BI__builtin_neon_vduph_lane_f16: case NEON::BI__builtin_neon_vgetq_lane_bf16: - case NEON::BI__builtin_neon_vduph_laneq_bf16: case NEON::BI__builtin_neon_vduph_laneq_f16: case NEON::BI__builtin_neon_vcvt_bf16_f32: case NEON::BI__builtin_neon_vcvtq_low_bf16_f32: @@ -2824,6 +2822,16 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned builtinID, const CallExpr *expr, return mlir::Value{}; } + switch (builtinID) { + default: + break; + case NEON::BI__builtin_neon_vduph_lane_bf16: + case NEON::BI__builtin_neon_vduph_laneq_bf16: { + uint64_t index = getZExtIntValueFromConstOp(ops[1]); + return builder.createExtractElement(loc, ops[0], index); + } + } + cir::VectorType ty = getNeonType(this, type, loc); if (!ty) return nullptr; diff --git a/clang/test/CodeGen/AArch64/neon/bf16-vduph.c b/clang/test/CodeGen/AArch64/neon/bf16-vduph.c new file mode 100644 index 0000000000000..e38383f567d98 --- /dev/null +++ b/clang/test/CodeGen/AArch64/neon/bf16-vduph.c @@ -0,0 +1,30 @@ +// REQUIRES: aarch64-registered-target || arm-registered-target + +// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -target-feature +bf16 -disable-O0-optnone -flax-vector-conversions=none -emit-llvm -o - %s | opt -S -passes=mem2reg,sroa | FileCheck %s --check-prefixes=LLVM +// RUN: %if cir-enabled %{%clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -target-feature +bf16 -disable-O0-optnone -flax-vector-conversions=none -fclangir -emit-llvm -o - %s | opt -S -passes=mem2reg,sroa | FileCheck %s --check-prefixes=LLVM %} +// RUN: %if cir-enabled %{%clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -target-feature +bf16 -disable-O0-optnone -flax-vector-conversions=none -fclangir -emit-cir -o - %s | FileCheck %s --check-prefixes=CIR %} + +typedef __bf16 bfloat16_t; +typedef __attribute__((neon_vector_type(4))) bfloat16_t bfloat16x4_t; +typedef __attribute__((neon_vector_type(8))) bfloat16_t bfloat16x8_t; + +// LLVM-LABEL: @test_vduph_lane_bf16( +// LLVM-SAME: <4 x bfloat> {{.*}} [[V:%.*]]) +// LLVM: [[VGET_LANE:%.*]] = extractelement <4 x bfloat> [[V]], i{{32|64}} 1 +// LLVM: ret bfloat [[VGET_LANE]] +// CIR-LABEL: @test_vduph_lane_bf16( +// CIR: cir.vec.extract %{{.*}}[%{{.*}} : !u64i] : !cir.vector<4 x !cir.bf16> +bfloat16_t test_vduph_lane_bf16(bfloat16x4_t v) { + return __builtin_bit_cast(bfloat16_t, __builtin_neon_vduph_lane_bf16(v, 1)); +} + +// LLVM-LABEL: @test_vduph_laneq_bf16( +// LLVM-SAME: <8 x bfloat> {{.*}} [[V:%.*]]) +// LLVM: [[VGETQ_LANE:%.*]] = extractelement <8 x bfloat> [[V]], i{{32|64}} 7 +// LLVM: ret bfloat [[VGETQ_LANE]] +// CIR-LABEL: @test_vduph_laneq_bf16( +// CIR: cir.vec.extract %{{.*}}[%{{.*}} : !u64i] : !cir.vector<8 x !cir.bf16> +bfloat16_t test_vduph_laneq_bf16(bfloat16x8_t v) { + return __builtin_bit_cast(bfloat16_t, + __builtin_neon_vduph_laneq_bf16(v, 7)); +} _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
