================
@@ -0,0 +1,28 @@
+// REQUIRES: aarch64-registered-target
+
+// RUN: %clang_cc1 -triple arm64-none-linux-gnu
-target-feature +fullfp16 -disable-O0-optnone -emit-llvm -o - %s |
opt -S -passes=mem2reg | FileCheck %s --check-prefixes=LLVM
+// RUN: %if cir-enabled %{%clang_cc1 -triple arm64-none-linux-gnu
-target-feature +fullfp16 -disable-O0-optnone -fclangir -emit-llvm -o - %s |
opt -S -passes=mem2reg,simplifycfg | FileCheck %s --check-prefixes=LLVM %}
----------------
banach-space wrote:
You might already know most of this, so apologies if I'm being too verbose.
The builtin that I am lowering here,`vnegh_f16`:
```cpp
#include <arm_fp16.h>
float16_t test_vnegh_f16(float16_t a) {
return vnegh_f16(a);
}
```
is in fact a macro which expands to:
```cpp
#define vnegh_f16(__p0) __extension__ ({ \
float16_t __ret; \
float16_t __s0 = __p0; \
__ret = __builtin_bit_cast(float16_t, __builtin_neon_vnegh_f16(__s0)); \
__ret; \
})
```
The corresponding AST is a bit complex ...
```bash
|-FunctionDecl 0xc89ce7397b48
</home/andwar02/work/llvm-project/clang/test/CodeGen/AArch64/neon/fullfp16.c:21:1,
line:28:1> line:21:11 test_vnegh_f16 'float16_t (float16_t)'
| |-ParmVarDecl 0xc89ce7397a30 <col:26, col:36> col:36 used a
'float16_t':'__fp16'
| `-CompoundStmt 0xc89ce73b8660 <col:39, line:28:1>
| `-ReturnStmt 0xc89ce73b8650 <line:27:3,
/home/andwar02/work/llvm-project/build/cir-release/lib/clang/23/include/arm_fp16.h:504:2>
| `-UnaryOperator 0xc89ce73b8638 <line:499:25, line:504:2>
'float16_t':'__fp16' prefix '__extension__' cannot overflow
| `-StmtExpr 0xc89ce73b8618 <line:499:39, line:504:2> 'float16_t':'__fp16'
| `-CompoundStmt 0xc89ce73b85e8 <line:499:40, line:503:10>
| |-DeclStmt 0xc89ce73b8188 <line:500:3, col:18>
| | `-VarDecl 0xc89ce73b8120 <col:3, col:13> col:13 used __ret
'float16_t':'__fp16'
| |-DeclStmt 0xc89ce73b8270 <line:501:3, col:24>
| | `-VarDecl 0xc89ce73b81d0 <col:3,
/home/andwar02/work/llvm-project/clang/test/CodeGen/AArch64/neon/fullfp16.c:27:20>
/home/andwar02/work/llvm-project/build/cir-release/lib/clang/23/include/arm_fp16.h:501:13
used __s0 'float16_t':'__fp16' cinit
| | `-ImplicitCastExpr 0xc89ce73b8258
</home/andwar02/work/llvm-project/clang/test/CodeGen/AArch64/neon/fullfp16.c:27:20>
'float16_t':'__fp16' <LValueToRValue>
| | `-DeclRefExpr 0xc89ce73b8238 <col:20> 'float16_t':'__fp16'
lvalue ParmVar 0xc89ce7397a30 'a' 'float16_t':'__fp16'
| |-ExprWithCleanups 0xc89ce73b8598
</home/andwar02/work/llvm-project/build/cir-release/lib/clang/23/include/arm_fp16.h:502:3,
col:71> 'float16_t':'__fp16'
| | `-BinaryOperator 0xc89ce73b8578 <col:3, col:71>
'float16_t':'__fp16' '='
| | |-DeclRefExpr 0xc89ce73b8288 <col:3> 'float16_t':'__fp16'
lvalue Var 0xc89ce73b8120 '__ret' 'float16_t':'__fp16'
| | `-BuiltinBitCastExpr 0xc89ce73b8550 <col:11, col:71>
'float16_t':'__fp16' <LValueToRValueBitCast>
| | `-MaterializeTemporaryExpr 0xc89ce73b8538 <col:41, col:70>
'__fp16' xvalue
| | `-CallExpr 0xc89ce73b84d8 <col:41, col:70> '__fp16'
| | |-ImplicitCastExpr 0xc89ce73b84c0 <col:41> '__fp16
(*)(__fp16)' <BuiltinFnToFnPtr>
| | | `-DeclRefExpr 0xc89ce73b8448 <col:41> '<builtin fn
type>' Function 0xc89ce73b82a8 '__builtin_neon_vnegh_f16' '__fp16 (__fp16)'
| | `-ImplicitCastExpr 0xc89ce73b8508 <col:66>
'float16_t':'__fp16' <LValueToRValue>
| | `-DeclRefExpr 0xc89ce73b8468 <col:66>
'float16_t':'__fp16' lvalue Var 0xc89ce73b81d0 '__s0' 'float16_t':'__fp16'
| `-ImplicitCastExpr 0xc89ce73b85d0 <line:503:3> 'float16_t':'__fp16'
<LValueToRValue>
| `-DeclRefExpr 0xc89ce73b85b0 <col:3> 'float16_t':'__fp16' lvalue
Var 0xc89ce73b8120 '__ret' 'float16_t':'__fp16'
```
... and so is the generated CIR output:
```mlir
cir.func no_inline dso_local @test_vnegh_f16(%arg0: !cir.f16 loc(fused[#loc3,
#loc4])) -> !cir.f16 {
%0 = cir.alloca !cir.f16, !cir.ptr<!cir.f16>, ["a", init] {alignment = 2 :
i64} loc(#loc10)
%1 = cir.alloca !cir.f16, !cir.ptr<!cir.f16>, ["__retval"] {alignment = 2 :
i64} loc(#loc2)
%2 = cir.alloca !cir.f16, !cir.ptr<!cir.f16>, ["tmp"] {alignment = 2 : i64}
loc(#loc5)
cir.store %arg0, %0 : !cir.f16, !cir.ptr<!cir.f16> loc(#loc6)
cir.scope {
%5 = cir.alloca !cir.f16, !cir.ptr<!cir.f16>, ["__ret"] {alignment = 2 :
i64} loc(#loc5)
%6 = cir.alloca !cir.f16, !cir.ptr<!cir.f16>, ["__s0", init] {alignment =
2 : i64} loc(#loc5)
%7 = cir.load align(2) %0 : !cir.ptr<!cir.f16>, !cir.f16 loc(#loc5)
cir.store align(2) %7, %6 : !cir.f16, !cir.ptr<!cir.f16> loc(#loc5)
cir.scope {
%9 = cir.alloca !cir.f16, !cir.ptr<!cir.f16>, ["ref.tmp0"] {alignment =
2 : i64} loc(#loc5)
%10 = cir.load align(2) %6 : !cir.ptr<!cir.f16>, !cir.f16 loc(#loc5)
%11 = cir.unary(minus, %10) : !cir.f16, !cir.f16 loc(#loc5)
cir.store align(2) %11, %9 : !cir.f16, !cir.ptr<!cir.f16> loc(#loc7)
%12 = cir.load align(2) %9 : !cir.ptr<!cir.f16>, !cir.f16 loc(#loc5)
cir.store align(2) %12, %5 : !cir.f16, !cir.ptr<!cir.f16> loc(#loc5)
} loc(#loc5)
%8 = cir.load align(2) %5 : !cir.ptr<!cir.f16>, !cir.f16 loc(#loc5)
cir.store align(2) %8, %2 : !cir.f16, !cir.ptr<!cir.f16> loc(#loc7)
} loc(#loc5)
%3 = cir.load align(2) %2 : !cir.ptr<!cir.f16>, !cir.f16 loc(#loc5)
cir.store %3, %1 : !cir.f16, !cir.ptr<!cir.f16> loc(#loc11)
%4 = cir.load %1 : !cir.ptr<!cir.f16>, !cir.f16 loc(#loc11)
cir.return %4 : !cir.f16 loc(#loc11)
} loc(#loc9)
} loc(#loc)
```
**NOTE:** CFG can be eliminated with `cir-opt --cir-flatten-cfg
--cir-canonicalize`, but we don't run the corresponding patterns/passes when
invoking `clang -emit-llvm -fclangir`.
**NOTE:** The macros for NEON builtins are generated in
https://github.com/llvm/llvm-project/blob/54532ffc6bd12aec70e04a96311fc4e3815ea433/clang/utils/TableGen/NeonEmitter.cpp.
https://github.com/llvm/llvm-project/pull/180597
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits