https://github.com/momchil-velikov updated https://github.com/llvm/llvm-project/pull/100608
>From 17964507593a4ae3d2b13c4fe84500472705485f Mon Sep 17 00:00:00 2001 From: Momchil Velikov <momchil.veli...@arm.com> Date: Thu, 25 Jul 2024 18:25:40 +0100 Subject: [PATCH 1/2] [AArch64] Implement FP8 floating-point mode helper intrinsics --- clang/test/CodeGen/aarch64-fpm-helpers.c | 162 +++++++++++++++++++++++ clang/utils/TableGen/NeonEmitter.cpp | 54 ++++++++ 2 files changed, 216 insertions(+) create mode 100644 clang/test/CodeGen/aarch64-fpm-helpers.c diff --git a/clang/test/CodeGen/aarch64-fpm-helpers.c b/clang/test/CodeGen/aarch64-fpm-helpers.c new file mode 100644 index 00000000000000..dba79cebae5478 --- /dev/null +++ b/clang/test/CodeGen/aarch64-fpm-helpers.c @@ -0,0 +1,162 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 + +// RUN: %clang_cc1 -O2 -triple aarch64 -emit-llvm -x c -DUSE_NEON_H %s -o - | FileCheck %s +// RUN: %clang_cc1 -O2 -triple aarch64 -emit-llvm -x c -DUSE_SVE_H %s -o - | FileCheck %s +// RUN: %clang_cc1 -O2 -triple aarch64 -emit-llvm -x c -DUSE_SME_H %s -o - | FileCheck %s +// RUN: %clang_cc1 -O2 -triple aarch64 -emit-llvm -x c++ -DUSE_NEON_H %s -o - | FileCheck %s +// RUN: %clang_cc1 -O2 -triple aarch64 -emit-llvm -x c++ -DUSE_SVE_H %s -o - | FileCheck %s +// RUN: %clang_cc1 -O2 -triple aarch64 -emit-llvm -x c++ -DUSE_SME_H %s -o - | FileCheck %s + +// REQUIRES: aarch64-registered-target + +#ifdef USE_NEON_H +#include "arm_neon.h" +#endif + +#ifdef USE_SVE_H +#include "arm_sve.h" +#endif + +#ifdef USE_SME_H +#include "arm_sme.h" +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +// CHECK-LABEL: define dso_local noundef i64 @test_init( +// CHECK-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: ret i64 0 +// +fpm_t test_init() { return __arm_fpm_init(); } + +// CHECK-LABEL: define dso_local noundef range(i64 0, 2) i64 @test_src1_1( +// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: ret i64 0 +// +fpm_t test_src1_1() { + return __arm_set_fpm_src1_format(__arm_fpm_init(), __ARM_FPM_E5M2); +} + +// CHECK-LABEL: define dso_local noundef range(i64 0, 2) i64 @test_src1_2( +// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: ret i64 1 +// +fpm_t test_src1_2() { + return __arm_set_fpm_src1_format(__arm_fpm_init(), __ARM_FPM_E4M3); +} + +// CHECK-LABEL: define dso_local noundef range(i64 0, 16) i64 @test_src2_1( +// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: ret i64 0 +// +fpm_t test_src2_1() { + return __arm_set_fpm_src2_format(__arm_fpm_init(), __ARM_FPM_E5M2); +} + +// CHECK-LABEL: define dso_local noundef range(i64 0, 16) i64 @test_src2_2( +// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: ret i64 8 +// +fpm_t test_src2_2() { + return __arm_set_fpm_src2_format(__arm_fpm_init(), __ARM_FPM_E4M3); +} + +// CHECK-LABEL: define dso_local noundef range(i64 0, 128) i64 @test_dst1_1( +// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: ret i64 0 +// +fpm_t test_dst1_1() { + return __arm_set_fpm_dst_format(__arm_fpm_init(), __ARM_FPM_E5M2); +} + +// CHECK-LABEL: define dso_local noundef range(i64 0, 128) i64 @test_dst2_2( +// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: ret i64 64 +// +fpm_t test_dst2_2() { + return __arm_set_fpm_dst_format(__arm_fpm_init(), __ARM_FPM_E4M3); +} + +// CHECK-LABEL: define dso_local noundef range(i64 0, 32768) i64 @test_of_mul_1( +// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: ret i64 0 +// +fpm_t test_of_mul_1() { + return __arm_set_fpm_overflow_mul(__arm_fpm_init(), __ARM_FPM_INFNAN); +} + +// CHECK-LABEL: define dso_local noundef range(i64 0, 32768) i64 @test_of_mul_2( +// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: ret i64 16384 +// +fpm_t test_of_mul_2() { + return __arm_set_fpm_overflow_mul(__arm_fpm_init(), __ARM_FPM_SATURATE); +} + +// CHECK-LABEL: define dso_local noundef range(i64 0, 65536) i64 @test_of_cvt_1( +// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: ret i64 0 +// +fpm_t test_of_cvt_1() { + return __arm_set_fpm_overflow_cvt(__arm_fpm_init(), __ARM_FPM_INFNAN); +} + +// CHECK-LABEL: define dso_local noundef range(i64 0, 65536) i64 @test_of_cvt_2( +// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: ret i64 32768 +// +fpm_t test_of_cvt_2() { + return __arm_set_fpm_overflow_cvt(__arm_fpm_init(), __ARM_FPM_SATURATE); +} + +// CHECK-LABEL: define dso_local noundef i64 @test_lscale( +// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: ret i64 8323072 +// +fpm_t test_lscale() { return __arm_set_fpm_lscale(__arm_fpm_init(), 127); } + +// CHECK-LABEL: define dso_local noundef i64 @test_lscale2( +// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: ret i64 270582939648 +// +fpm_t test_lscale2() { return __arm_set_fpm_lscale2(__arm_fpm_init(), 63); } + +// CHECK-LABEL: define dso_local noundef range(i64 0, 4294967296) i64 @test_nscale_1( +// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: ret i64 2147483648 +// +fpm_t test_nscale_1() { return __arm_set_fpm_nscale(__arm_fpm_init(), -128); } + +// CHECK-LABEL: define dso_local noundef range(i64 0, 4294967296) i64 @test_nscale_2( +// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: ret i64 2130706432 +// +fpm_t test_nscale_2() { return __arm_set_fpm_nscale(__arm_fpm_init(), 127); } + +// CHECK-LABEL: define dso_local noundef range(i64 0, 4294967296) i64 @test_nscale_3( +// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: ret i64 4278190080 +// +fpm_t test_nscale_3() { return __arm_set_fpm_nscale(__arm_fpm_init(), -1); } + +#ifdef __cplusplus +} +#endif diff --git a/clang/utils/TableGen/NeonEmitter.cpp b/clang/utils/TableGen/NeonEmitter.cpp index 8ec8e67388bbd2..58b36a14ef9b8e 100644 --- a/clang/utils/TableGen/NeonEmitter.cpp +++ b/clang/utils/TableGen/NeonEmitter.cpp @@ -2582,6 +2582,60 @@ void NeonEmitter::runVectorTypes(raw_ostream &OS) { OS << "typedef double float64_t;\n"; OS << "#endif\n\n"; + OS << R"( +typedef uint64_t fpm_t; + +enum __ARM_FPM_FORMAT { __ARM_FPM_E5M2, __ARM_FPM_E4M3 }; + +enum __ARM_FPM_OVERFLOW { __ARM_FPM_INFNAN, __ARM_FPM_SATURATE }; + +static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__)) +__arm_fpm_init(void) { + return 0; +} + +static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__)) +__arm_set_fpm_src1_format(fpm_t __fpm, enum __ARM_FPM_FORMAT __format) { + return (__fpm & ~7ull) | (fpm_t)__format; +} + +static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__)) +__arm_set_fpm_src2_format(fpm_t __fpm, enum __ARM_FPM_FORMAT __format) { + return (__fpm & ~0x38ull) | ((fpm_t)__format << 3u); +} + +static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__)) +__arm_set_fpm_dst_format(fpm_t __fpm, enum __ARM_FPM_FORMAT __format) { + return (__fpm & ~0x1c0ull) | ((fpm_t)__format << 6u); +} + +static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__)) +__arm_set_fpm_overflow_mul(fpm_t __fpm, enum __ARM_FPM_OVERFLOW __behaviour) { + return (__fpm & ~0x4000ull) | ((fpm_t)__behaviour << 14u); +} + +static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__)) +__arm_set_fpm_overflow_cvt(fpm_t __fpm, enum __ARM_FPM_OVERFLOW __behaviour) { + return (__fpm & ~0x8000ull) | ((fpm_t)__behaviour << 15u); +} + +static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__)) +__arm_set_fpm_lscale(fpm_t __fpm, uint64_t __scale) { + return (__fpm & ~0x7f0000ull) | (__scale << 16u); +} + +static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__)) +__arm_set_fpm_nscale(fpm_t __fpm, int64_t __scale) { + return (__fpm & ~0xff000000ull) | (((fpm_t)__scale & 0xffu) << 24u); +} + +static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__)) +__arm_set_fpm_lscale2(fpm_t __fpm, uint64_t __scale) { + return (uint32_t)__fpm | (__scale << 32u); +} + +)"; + emitNeonTypeDefs("cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlhQhfQfdQd", OS); emitNeonTypeDefs("bQb", OS); >From 01548e238ccd0f5a3ed1da318929ca73f9181102 Mon Sep 17 00:00:00 2001 From: Momchil Velikov <momchil.veli...@arm.com> Date: Tue, 3 Sep 2024 17:41:43 +0100 Subject: [PATCH 2/2] [fixup] Update the test --- clang/test/CodeGen/aarch64-fpm-helpers.c | 63 +++++++++++++----------- 1 file changed, 33 insertions(+), 30 deletions(-) diff --git a/clang/test/CodeGen/aarch64-fpm-helpers.c b/clang/test/CodeGen/aarch64-fpm-helpers.c index dba79cebae5478..4bced01d5c71fa 100644 --- a/clang/test/CodeGen/aarch64-fpm-helpers.c +++ b/clang/test/CodeGen/aarch64-fpm-helpers.c @@ -25,6 +25,9 @@ extern "C" { #endif +#define INIT_ZERO 0 +#define INIT_ONES 0xffffffffffffffffU + // CHECK-LABEL: define dso_local noundef i64 @test_init( // CHECK-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] @@ -32,94 +35,94 @@ extern "C" { // fpm_t test_init() { return __arm_fpm_init(); } -// CHECK-LABEL: define dso_local noundef range(i64 0, 2) i64 @test_src1_1( +// CHECK-LABEL: define dso_local noundef i64 @test_src1_1( // CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: ret i64 0 +// CHECK-NEXT: ret i64 -8 // fpm_t test_src1_1() { - return __arm_set_fpm_src1_format(__arm_fpm_init(), __ARM_FPM_E5M2); + return __arm_set_fpm_src1_format(INIT_ONES, __ARM_FPM_E5M2); } -// CHECK-LABEL: define dso_local noundef range(i64 0, 2) i64 @test_src1_2( +// CHECK-LABEL: define dso_local noundef i64 @test_src1_2( // CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: ret i64 1 // fpm_t test_src1_2() { - return __arm_set_fpm_src1_format(__arm_fpm_init(), __ARM_FPM_E4M3); + return __arm_set_fpm_src1_format(INIT_ZERO, __ARM_FPM_E4M3); } -// CHECK-LABEL: define dso_local noundef range(i64 0, 16) i64 @test_src2_1( +// CHECK-LABEL: define dso_local noundef i64 @test_src2_1( // CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: ret i64 0 +// CHECK-NEXT: ret i64 -57 // fpm_t test_src2_1() { - return __arm_set_fpm_src2_format(__arm_fpm_init(), __ARM_FPM_E5M2); + return __arm_set_fpm_src2_format(INIT_ONES, __ARM_FPM_E5M2); } -// CHECK-LABEL: define dso_local noundef range(i64 0, 16) i64 @test_src2_2( +// CHECK-LABEL: define dso_local noundef i64 @test_src2_2( // CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: ret i64 8 // fpm_t test_src2_2() { - return __arm_set_fpm_src2_format(__arm_fpm_init(), __ARM_FPM_E4M3); + return __arm_set_fpm_src2_format(INIT_ZERO, __ARM_FPM_E4M3); } -// CHECK-LABEL: define dso_local noundef range(i64 0, 128) i64 @test_dst1_1( +// CHECK-LABEL: define dso_local noundef i64 @test_dst1_1( // CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: ret i64 0 +// CHECK-NEXT: ret i64 -449 // fpm_t test_dst1_1() { - return __arm_set_fpm_dst_format(__arm_fpm_init(), __ARM_FPM_E5M2); + return __arm_set_fpm_dst_format(INIT_ONES, __ARM_FPM_E5M2); } -// CHECK-LABEL: define dso_local noundef range(i64 0, 128) i64 @test_dst2_2( +// CHECK-LABEL: define dso_local noundef i64 @test_dst2_2( // CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: ret i64 64 // fpm_t test_dst2_2() { - return __arm_set_fpm_dst_format(__arm_fpm_init(), __ARM_FPM_E4M3); + return __arm_set_fpm_dst_format(INIT_ZERO, __ARM_FPM_E4M3); } -// CHECK-LABEL: define dso_local noundef range(i64 0, 32768) i64 @test_of_mul_1( +// CHECK-LABEL: define dso_local noundef i64 @test_of_mul_1( // CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: ret i64 0 +// CHECK-NEXT: ret i64 -16385 // fpm_t test_of_mul_1() { - return __arm_set_fpm_overflow_mul(__arm_fpm_init(), __ARM_FPM_INFNAN); + return __arm_set_fpm_overflow_mul(INIT_ONES, __ARM_FPM_INFNAN); } -// CHECK-LABEL: define dso_local noundef range(i64 0, 32768) i64 @test_of_mul_2( +// CHECK-LABEL: define dso_local noundef i64 @test_of_mul_2( // CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: ret i64 16384 // fpm_t test_of_mul_2() { - return __arm_set_fpm_overflow_mul(__arm_fpm_init(), __ARM_FPM_SATURATE); + return __arm_set_fpm_overflow_mul(INIT_ZERO, __ARM_FPM_SATURATE); } -// CHECK-LABEL: define dso_local noundef range(i64 0, 65536) i64 @test_of_cvt_1( +// CHECK-LABEL: define dso_local noundef i64 @test_of_cvt_1( // CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: ret i64 0 +// CHECK-NEXT: ret i64 -32769 // fpm_t test_of_cvt_1() { - return __arm_set_fpm_overflow_cvt(__arm_fpm_init(), __ARM_FPM_INFNAN); + return __arm_set_fpm_overflow_cvt(INIT_ONES, __ARM_FPM_INFNAN); } -// CHECK-LABEL: define dso_local noundef range(i64 0, 65536) i64 @test_of_cvt_2( +// CHECK-LABEL: define dso_local noundef i64 @test_of_cvt_2( // CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: ret i64 32768 // fpm_t test_of_cvt_2() { - return __arm_set_fpm_overflow_cvt(__arm_fpm_init(), __ARM_FPM_SATURATE); + return __arm_set_fpm_overflow_cvt(INIT_ZERO, __ARM_FPM_SATURATE); } // CHECK-LABEL: define dso_local noundef i64 @test_lscale( @@ -127,35 +130,35 @@ fpm_t test_of_cvt_2() { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: ret i64 8323072 // -fpm_t test_lscale() { return __arm_set_fpm_lscale(__arm_fpm_init(), 127); } +fpm_t test_lscale() { return __arm_set_fpm_lscale(INIT_ZERO, 127); } // CHECK-LABEL: define dso_local noundef i64 @test_lscale2( // CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: ret i64 270582939648 // -fpm_t test_lscale2() { return __arm_set_fpm_lscale2(__arm_fpm_init(), 63); } +fpm_t test_lscale2() { return __arm_set_fpm_lscale2(INIT_ZERO, 63); } // CHECK-LABEL: define dso_local noundef range(i64 0, 4294967296) i64 @test_nscale_1( // CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: ret i64 2147483648 // -fpm_t test_nscale_1() { return __arm_set_fpm_nscale(__arm_fpm_init(), -128); } +fpm_t test_nscale_1() { return __arm_set_fpm_nscale(INIT_ZERO, -128); } // CHECK-LABEL: define dso_local noundef range(i64 0, 4294967296) i64 @test_nscale_2( // CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: ret i64 2130706432 // -fpm_t test_nscale_2() { return __arm_set_fpm_nscale(__arm_fpm_init(), 127); } +fpm_t test_nscale_2() { return __arm_set_fpm_nscale(INIT_ZERO, 127); } // CHECK-LABEL: define dso_local noundef range(i64 0, 4294967296) i64 @test_nscale_3( // CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: ret i64 4278190080 // -fpm_t test_nscale_3() { return __arm_set_fpm_nscale(__arm_fpm_init(), -1); } +fpm_t test_nscale_3() { return __arm_set_fpm_nscale(INIT_ZERO, -1); } #ifdef __cplusplus } _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits