[clang] [NFC][Clang][Headers] Update refs to ACLE in comments (PR #66662)

2023-09-18 Thread M Iyengar via cfe-commits

https://github.com/Blue-Dot created 
https://github.com/llvm/llvm-project/pull/2

Non functional change to update section comments in arm_acle.h, in order to 
align with updated documentation:  
[https://arm-software.github.io/acle/main/acle.html](https://arm-software.github.io/acle/main/acle.html)

>From b6c54823c95da82fc07e46b2edddbf6484c4201c Mon Sep 17 00:00:00 2001
From: Max Iyengar 
Date: Mon, 18 Sep 2023 13:21:28 +0100
Subject: [PATCH] [NFC][Clang][Headers] Update refs to ACLE in comments

---
 clang/lib/Headers/arm_acle.h | 61 ++--
 1 file changed, 30 insertions(+), 31 deletions(-)

diff --git a/clang/lib/Headers/arm_acle.h b/clang/lib/Headers/arm_acle.h
index 61d80258d166a1d..aed789863f29a2c 100644
--- a/clang/lib/Headers/arm_acle.h
+++ b/clang/lib/Headers/arm_acle.h
@@ -20,8 +20,8 @@
 extern "C" {
 #endif
 
-/* 8 SYNCHRONIZATION, BARRIER AND HINT INTRINSICS */
-/* 8.3 Memory barriers */
+/* 7 SYNCHRONIZATION, BARRIER AND HINT INTRINSICS */
+/* 7.3 Memory barriers */
 #if !__has_builtin(__dmb)
 #define __dmb(i) __builtin_arm_dmb(i)
 #endif
@@ -32,7 +32,7 @@ extern "C" {
 #define __isb(i) __builtin_arm_isb(i)
 #endif
 
-/* 8.4 Hints */
+/* 7.4 Hints */
 
 #if !__has_builtin(__wfi)
 static __inline__ void __attribute__((__always_inline__, __nodebug__)) 
__wfi(void) {
@@ -68,7 +68,7 @@ static __inline__ void __attribute__((__always_inline__, 
__nodebug__)) __yield(v
 #define __dbg(t) __builtin_arm_dbg(t)
 #endif
 
-/* 8.5 Swap */
+/* 7.5 Swap */
 static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
 __swp(uint32_t __x, volatile uint32_t *__p) {
   uint32_t v;
@@ -78,8 +78,8 @@ __swp(uint32_t __x, volatile uint32_t *__p) {
   return v;
 }
 
-/* 8.6 Memory prefetch intrinsics */
-/* 8.6.1 Data prefetch */
+/* 7.6 Memory prefetch intrinsics */
+/* 7.6.1 Data prefetch */
 #define __pld(addr) __pldx(0, 0, 0, addr)
 
 #if defined(__ARM_32BIT_STATE) && __ARM_32BIT_STATE
@@ -90,7 +90,7 @@ __swp(uint32_t __x, volatile uint32_t *__p) {
   __builtin_arm_prefetch(addr, access_kind, cache_level, retention_policy, 1)
 #endif
 
-/* 8.6.2 Instruction prefetch */
+/* 7.6.2 Instruction prefetch */
 #define __pli(addr) __plix(0, 0, addr)
 
 #if defined(__ARM_32BIT_STATE) && __ARM_32BIT_STATE
@@ -101,15 +101,15 @@ __swp(uint32_t __x, volatile uint32_t *__p) {
   __builtin_arm_prefetch(addr, 0, cache_level, retention_policy, 0)
 #endif
 
-/* 8.7 NOP */
+/* 7.7 NOP */
 #if !defined(_MSC_VER) || !defined(__aarch64__)
 static __inline__ void __attribute__((__always_inline__, __nodebug__)) 
__nop(void) {
   __builtin_arm_nop();
 }
 #endif
 
-/* 9 DATA-PROCESSING INTRINSICS */
-/* 9.2 Miscellaneous data-processing intrinsics */
+/* 8 DATA-PROCESSING INTRINSICS */
+/* 8.2 Miscellaneous data-processing intrinsics */
 /* ROR */
 static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
 __ror(uint32_t __x, uint32_t __y) {
@@ -248,9 +248,7 @@ __rbitl(unsigned long __t) {
 #endif
 }
 
-/*
- * 9.3 16-bit multiplications
- */
+/* 8.3 16-bit multiplications */
 #if defined(__ARM_FEATURE_DSP) && __ARM_FEATURE_DSP
 static __inline__ int32_t __attribute__((__always_inline__,__nodebug__))
 __smulbb(int32_t __a, int32_t __b) {
@@ -279,18 +277,18 @@ __smulwt(int32_t __a, int32_t __b) {
 #endif
 
 /*
- * 9.4 Saturating intrinsics
+ * 8.4 Saturating intrinsics
  *
  * FIXME: Change guard to their corresponding __ARM_FEATURE flag when Q flag
  * intrinsics are implemented and the flag is enabled.
  */
-/* 9.4.1 Width-specified saturation intrinsics */
+/* 8.4.1 Width-specified saturation intrinsics */
 #if defined(__ARM_FEATURE_SAT) && __ARM_FEATURE_SAT
 #define __ssat(x, y) __builtin_arm_ssat(x, y)
 #define __usat(x, y) __builtin_arm_usat(x, y)
 #endif
 
-/* 9.4.2 Saturating addition and subtraction intrinsics */
+/* 8.4.2 Saturating addition and subtraction intrinsics */
 #if defined(__ARM_FEATURE_DSP) && __ARM_FEATURE_DSP
 static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
 __qadd(int32_t __t, int32_t __v) {
@@ -308,7 +306,7 @@ __qdbl(int32_t __t) {
 }
 #endif
 
-/* 9.4.3 Accumultating multiplications */
+/* 8.4.3 Accumultating multiplications */
 #if defined(__ARM_FEATURE_DSP) && __ARM_FEATURE_DSP
 static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
 __smlabb(int32_t __a, int32_t __b, int32_t __c) {
@@ -337,13 +335,13 @@ __smlawt(int32_t __a, int32_t __b, int32_t __c) {
 #endif
 
 
-/* 9.5.4 Parallel 16-bit saturation */
+/* 8.5.4 Parallel 16-bit saturation */
 #if defined(__ARM_FEATURE_SIMD32) && __ARM_FEATURE_SIMD32
 #define __ssat16(x, y) __builtin_arm_ssat16(x, y)
 #define __usat16(x, y) __builtin_arm_usat16(x, y)
 #endif
 
-/* 9.5.5 Packing and unpacking */
+/* 8.5.5 Packing and unpacking */
 #if defined(__ARM_FEATURE_SIMD32) && __ARM_FEATURE_SIMD32
 typedef int32_t int8x4_t;
 typedef int32_t int16x2_t;
@@ -368,7 +366,7 @@ __uxtb16(int8x4_t __a) {
 }
 #endif
 
-/* 9.5.6 Parallel selection */
+/* 8.5.6 Parallel sele

[clang] Implement ACLE rintn and rintnf intrinsics (PR #66112)

2023-09-12 Thread M Iyengar via cfe-commits

https://github.com/Blue-Dot review_requested 
https://github.com/llvm/llvm-project/pull/66112
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Implement ACLE rintn and rintnf intrinsics (PR #66112)

2023-09-12 Thread M Iyengar via cfe-commits

https://github.com/Blue-Dot created 
https://github.com/llvm/llvm-project/pull/66112:

This patch adds support for two missing ACLE intrinsics for floating point 
round with ties to even:

- rintn
- rintnf

These are specified in ACLE section 8.6: 
[https://arm-software.github.io/acle/main/acle.html#floating-point-data-processing-intrinsics]

Change-Id: I951971ad5a3fd3822efdf8cbae22918c31eef28a

>From c36fd81936845d1bc2f69dbb86372c554e1e9dba Mon Sep 17 00:00:00 2001
From: Max Iyengar 
Date: Tue, 12 Sep 2023 15:20:57 +0100
Subject: [PATCH] Implement ACLE rintn and rintnf intrinsics

Change-Id: I951971ad5a3fd3822efdf8cbae22918c31eef28a
---
 clang/include/clang/Basic/BuiltinsAArch64.def |  4 
 clang/lib/CodeGen/CGBuiltin.cpp   |  7 +++
 clang/lib/Headers/arm_acle.h  | 15 +++
 clang/test/CodeGen/arm_acle.c | 16 
 4 files changed, 42 insertions(+)

diff --git a/clang/include/clang/Basic/BuiltinsAArch64.def 
b/clang/include/clang/Basic/BuiltinsAArch64.def
index eaae6c9ad846868..017138bd34f8bd5 100644
--- a/clang/include/clang/Basic/BuiltinsAArch64.def
+++ b/clang/include/clang/Basic/BuiltinsAArch64.def
@@ -110,6 +110,10 @@ BUILTIN(__builtin_arm_tcommit, "v", "n")
 BUILTIN(__builtin_arm_tcancel, "vWUIi", "n")
 BUILTIN(__builtin_arm_ttest, "WUi", "nc")
 
+// Floating point round to integral intrinsics
+BUILTIN(__builtin_arm_rintn, "dd", "g")
+BUILTIN(__builtin_arm_rintnf, "ff", "g")
+
 // Armv8.5-A FP rounding intrinsics
 TARGET_BUILTIN(__builtin_arm_rint32zf, "ff", "", "v8.5a")
 TARGET_BUILTIN(__builtin_arm_rint32z, "dd", "", "v8.5a")
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 1ee0c469af9ee8b..b594923e11e145a 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -10224,6 +10224,13 @@ Value 
*CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
   "cls");
   }
 
+  if (BuiltinID == clang::AArch64::BI__builtin_arm_rintn ||
+  BuiltinID == clang::AArch64::BI__builtin_arm_rintnf) {
+llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
+return Builder.CreateCall(
+CGM.getIntrinsic(Intrinsic::roundeven, Arg->getType()), Arg, "rintn");
+  }
+
   if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32zf ||
   BuiltinID == clang::AArch64::BI__builtin_arm_rint32z) {
 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
diff --git a/clang/lib/Headers/arm_acle.h b/clang/lib/Headers/arm_acle.h
index c208512bab59829..cb1fb596a41fc93 100644
--- a/clang/lib/Headers/arm_acle.h
+++ b/clang/lib/Headers/arm_acle.h
@@ -592,6 +592,21 @@ __smusdx(int16x2_t __a, int16x2_t __b) {
 }
 #endif
 
+/* 8.6 Floating-point data-processing intrinsics */
+#if (defined(__ARM_FEATURE_DIRECTED_ROUNDING)&& \
+  (__ARM_FEATURE_DIRECTED_ROUNDING)) && \
+  (defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE)
+static __inline__ double __attribute__((__always_inline__, __nodebug__))
+__rintn(double __a) {
+  return __builtin_arm_rintn(__a);
+}
+
+static __inline__ float __attribute__((__always_inline__, __nodebug__))
+__rintnf(float __a) {
+  return __builtin_arm_rintnf(__a);
+}
+#endif
+
 /* 9.7 CRC32 intrinsics */
 #if (defined(__ARM_FEATURE_CRC32) && __ARM_FEATURE_CRC32) ||   
\
 (defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE)
diff --git a/clang/test/CodeGen/arm_acle.c b/clang/test/CodeGen/arm_acle.c
index b1105a1d5aabb64..00afaf15fded392 100644
--- a/clang/test/CodeGen/arm_acle.c
+++ b/clang/test/CodeGen/arm_acle.c
@@ -1690,6 +1690,22 @@ int32_t test_jcvt(double v) {
 }
 #endif
 
+#if defined(__ARM_FEATURE_DIRECTED_ROUNDING) && defined(__ARM_64BIT_STATE)
+
+// AArch64-LABEL: @test_rintn(
+// AArch64-NEXT:  entry:
+// AArch64-NEXT:call double @llvm.roundeven.f64(double [[TMP0:%.*]])
+double test_rintn(double a) {
+  return __rintn(a);
+}
+
+// AArch64-LABEL: @test_rintnf(
+// AArch64-NEXT: entry:
+// AArch64-NEXT:  call float @llvm.roundeven.f32(float [[TMP0:%.*]])
+float test_rintnf(float b) {
+  return __rintnf(b);
+}
+#endif
 
 #if defined(__ARM_64BIT_STATE) && defined(__ARM_FEATURE_RNG)
 

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Implement ACLE rintn and rintnf intrinsics (PR #66112)

2023-09-12 Thread M Iyengar via cfe-commits

https://github.com/Blue-Dot review_requested 
https://github.com/llvm/llvm-project/pull/66112
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Implement ACLE rintn and rintnf intrinsics (PR #66112)

2023-09-12 Thread M Iyengar via cfe-commits

https://github.com/Blue-Dot review_requested 
https://github.com/llvm/llvm-project/pull/66112
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Implement ACLE rintn and rintnf intrinsics (PR #66112)

2023-09-13 Thread M Iyengar via cfe-commits

https://github.com/Blue-Dot updated 
https://github.com/llvm/llvm-project/pull/66112:

>From 6fd8cfd2c119dfd99702913ac9201a39e091f3e8 Mon Sep 17 00:00:00 2001
From: Blue-Dot 
Date: Wed, 13 Sep 2023 17:51:45 +0100
Subject: [PATCH 1/2] [AArch64][Clang] Implement ACLE rintn intrinsics

Change-Id: I7f05a59f07f05ff8ac84cbab59a7803f1fe1642f
---
 clang/include/clang/Basic/BuiltinsAArch64.def |  4 
 clang/lib/CodeGen/CGBuiltin.cpp   |  7 +++
 clang/lib/Headers/arm_acle.h  | 15 +++
 clang/test/CodeGen/arm_acle.c | 16 
 4 files changed, 42 insertions(+)

diff --git a/clang/include/clang/Basic/BuiltinsAArch64.def 
b/clang/include/clang/Basic/BuiltinsAArch64.def
index eaae6c9ad846868..017138bd34f8bd5 100644
--- a/clang/include/clang/Basic/BuiltinsAArch64.def
+++ b/clang/include/clang/Basic/BuiltinsAArch64.def
@@ -110,6 +110,10 @@ BUILTIN(__builtin_arm_tcommit, "v", "n")
 BUILTIN(__builtin_arm_tcancel, "vWUIi", "n")
 BUILTIN(__builtin_arm_ttest, "WUi", "nc")
 
+// Floating point round to integral intrinsics
+BUILTIN(__builtin_arm_rintn, "dd", "g")
+BUILTIN(__builtin_arm_rintnf, "ff", "g")
+
 // Armv8.5-A FP rounding intrinsics
 TARGET_BUILTIN(__builtin_arm_rint32zf, "ff", "", "v8.5a")
 TARGET_BUILTIN(__builtin_arm_rint32z, "dd", "", "v8.5a")
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 037a2f9f7b15322..8ca6f3fb26faeac 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -10219,6 +10219,13 @@ Value 
*CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
   "cls");
   }
 
+  if (BuiltinID == clang::AArch64::BI__builtin_arm_rintn ||
+  BuiltinID == clang::AArch64::BI__builtin_arm_rintnf) {
+llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
+return Builder.CreateCall(
+CGM.getIntrinsic(Intrinsic::roundeven, Arg->getType()), Arg, "rintn");
+  }
+
   if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32zf ||
   BuiltinID == clang::AArch64::BI__builtin_arm_rint32z) {
 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
diff --git a/clang/lib/Headers/arm_acle.h b/clang/lib/Headers/arm_acle.h
index c208512bab59829..cb1fb596a41fc93 100644
--- a/clang/lib/Headers/arm_acle.h
+++ b/clang/lib/Headers/arm_acle.h
@@ -592,6 +592,21 @@ __smusdx(int16x2_t __a, int16x2_t __b) {
 }
 #endif
 
+/* 8.6 Floating-point data-processing intrinsics */
+#if (defined(__ARM_FEATURE_DIRECTED_ROUNDING)&& \
+  (__ARM_FEATURE_DIRECTED_ROUNDING)) && \
+  (defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE)
+static __inline__ double __attribute__((__always_inline__, __nodebug__))
+__rintn(double __a) {
+  return __builtin_arm_rintn(__a);
+}
+
+static __inline__ float __attribute__((__always_inline__, __nodebug__))
+__rintnf(float __a) {
+  return __builtin_arm_rintnf(__a);
+}
+#endif
+
 /* 9.7 CRC32 intrinsics */
 #if (defined(__ARM_FEATURE_CRC32) && __ARM_FEATURE_CRC32) ||   
\
 (defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE)
diff --git a/clang/test/CodeGen/arm_acle.c b/clang/test/CodeGen/arm_acle.c
index b1105a1d5aabb64..00afaf15fded392 100644
--- a/clang/test/CodeGen/arm_acle.c
+++ b/clang/test/CodeGen/arm_acle.c
@@ -1690,6 +1690,22 @@ int32_t test_jcvt(double v) {
 }
 #endif
 
+#if defined(__ARM_FEATURE_DIRECTED_ROUNDING) && defined(__ARM_64BIT_STATE)
+
+// AArch64-LABEL: @test_rintn(
+// AArch64-NEXT:  entry:
+// AArch64-NEXT:call double @llvm.roundeven.f64(double [[TMP0:%.*]])
+double test_rintn(double a) {
+  return __rintn(a);
+}
+
+// AArch64-LABEL: @test_rintnf(
+// AArch64-NEXT: entry:
+// AArch64-NEXT:  call float @llvm.roundeven.f32(float [[TMP0:%.*]])
+float test_rintnf(float b) {
+  return __rintnf(b);
+}
+#endif
 
 #if defined(__ARM_64BIT_STATE) && defined(__ARM_FEATURE_RNG)
 

>From aa7084f1b792571c848c5b337c5a1e0ab00dbb6e Mon Sep 17 00:00:00 2001
From: Blue-Dot 
Date: Wed, 13 Sep 2023 17:56:43 +0100
Subject: [PATCH 2/2] [AArch64][Clang] Fixed to use __builtin_roundeven

Change-Id: Ibfe830cf69e3ea10f68f651e5a5d507769792496
---
 clang/include/clang/Basic/BuiltinsAArch64.def | 4 
 clang/lib/CodeGen/CGBuiltin.cpp   | 7 ---
 clang/lib/Headers/arm_acle.h  | 4 ++--
 3 files changed, 2 insertions(+), 13 deletions(-)

diff --git a/clang/include/clang/Basic/BuiltinsAArch64.def 
b/clang/include/clang/Basic/BuiltinsAArch64.def
index 017138bd34f8bd5..eaae6c9ad846868 100644
--- a/clang/include/clang/Basic/BuiltinsAArch64.def
+++ b/clang/include/clang/Basic/BuiltinsAArch64.def
@@ -110,10 +110,6 @@ BUILTIN(__builtin_arm_tcommit, "v", "n")
 BUILTIN(__builtin_arm_tcancel, "vWUIi", "n")
 BUILTIN(__builtin_arm_ttest, "WUi", "nc")
 
-// Floating point round to integral intrinsics
-BUILTIN(__builtin_arm_rintn, "dd", "g")
-BUILTIN(__builtin_arm_rintnf, "ff", "g")
-
 // Armv8.5-A FP rounding intrinsics
 TARGET_BUILTIN(__builtin