[PATCH] D158626: [AArch64] Add missing vrnd intrinsics

2023-08-23 Thread Max Iyengar via Phabricator via cfe-commits
miyengar created this revision.
miyengar added a reviewer: vhscampos.
Herald added subscribers: hiraditya, kristof.beyls.
Herald added a project: All.
miyengar requested review of this revision.
Herald added projects: clang, LLVM.
Herald added subscribers: llvm-commits, cfe-commits.

This patch adds 8 missing intrinsics as specified in the Arm ACLE document 
section 2.12.1.1 : 
https://arm-software.github.io/acle/neon_intrinsics/advsimd.html#rounding-3 


The intrinsics implemented are:

- vrnd32z_f64
- vrnd32zq_f64
- vrnd64z_f64
- vrnd64zq_f64
- vrnd32x_f64
- vrnd32xq_f64
- vrnd64x_f64
- vrnd64xq_f64


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D158626

Files:
  clang/include/clang/Basic/arm_neon.td
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/test/CodeGen/aarch64-v8.5a-neon-frint3264-intrinsic.c
  llvm/lib/Target/AArch64/AArch64InstrFormats.td
  llvm/test/CodeGen/AArch64/v8.5a-neon-frint3264-intrinsic.ll

Index: llvm/test/CodeGen/AArch64/v8.5a-neon-frint3264-intrinsic.ll
===
--- llvm/test/CodeGen/AArch64/v8.5a-neon-frint3264-intrinsic.ll
+++ llvm/test/CodeGen/AArch64/v8.5a-neon-frint3264-intrinsic.ll
@@ -81,3 +81,85 @@
   %val = tail call <4 x float> @llvm.aarch64.neon.frint64z.v4f32(<4 x float> %a)
   ret <4 x float> %val
 }
+
+declare <1 x double> @llvm.aarch64.neon.frint32x.v1f64(<1 x double>)
+declare <2 x double> @llvm.aarch64.neon.frint32x.v2f64(<2 x double>)
+declare <1 x double> @llvm.aarch64.neon.frint32z.v1f64(<1 x double>)
+declare <2 x double> @llvm.aarch64.neon.frint32z.v2f64(<2 x double>)
+
+define dso_local <1 x double> @t_vrnd32x_f64(<1 x double> %a) {
+; CHECK-LABEL: t_vrnd32x_f64:
+; CHECK: frint32x d0, d0
+; CHECK-NEXT:ret
+entry:
+  %val = tail call <1 x double> @llvm.aarch64.neon.frint32x.v1f64(<1 x double> %a)
+  ret <1 x double> %val
+}
+
+define dso_local <2 x double> @t_vrnd32xq_f64(<2 x double> %a) {
+; CHECK-LABEL: t_vrnd32xq_f64:
+; CHECK: frint32x v0.2d, v0.2d
+; CHECK-NEXT:ret
+entry:
+  %val = tail call <2 x double> @llvm.aarch64.neon.frint32x.v2f64(<2 x double> %a)
+  ret <2 x double> %val
+}
+
+define dso_local <1 x double> @t_vrnd32z_f64(<1 x double> %a) {
+; CHECK-LABEL: t_vrnd32z_f64:
+; CHECK: frint32z d0, d0
+; CHECK-NEXT:ret
+entry:
+  %val = tail call <1 x double> @llvm.aarch64.neon.frint32z.v1f64(<1 x double> %a)
+  ret <1 x double> %val
+}
+
+define dso_local <2 x double> @t_vrnd32zq_f64(<2 x double> %a) {
+; CHECK-LABEL: t_vrnd32zq_f64:
+; CHECK: frint32z v0.2d, v0.2d
+; CHECK-NEXT:ret
+entry:
+  %val = tail call <2 x double> @llvm.aarch64.neon.frint32z.v2f64(<2 x double> %a)
+  ret <2 x double> %val
+}
+
+declare <1 x double> @llvm.aarch64.neon.frint64x.v1f64(<1 x double>)
+declare <2 x double> @llvm.aarch64.neon.frint64x.v2f64(<2 x double>)
+declare <1 x double> @llvm.aarch64.neon.frint64z.v1f64(<1 x double>)
+declare <2 x double> @llvm.aarch64.neon.frint64z.v2f64(<2 x double>)
+
+define dso_local <1 x double> @t_vrnd64x_f64(<1 x double> %a) {
+; CHECK-LABEL: t_vrnd64x_f64:
+; CHECK: frint64x d0, d0
+; CHECK-NEXT:ret
+entry:
+  %val = tail call <1 x double> @llvm.aarch64.neon.frint64x.v1f64(<1 x double> %a)
+  ret <1 x double> %val
+}
+
+define dso_local <2 x double> @t_vrnd64xq_f64(<2 x double> %a) {
+; CHECK-LABEL: t_vrnd64xq_f64:
+; CHECK: frint64x v0.2d, v0.2d
+; CHECK-NEXT:ret
+entry:
+  %val = tail call <2 x double> @llvm.aarch64.neon.frint64x.v2f64(<2 x double> %a)
+  ret <2 x double> %val
+}
+
+define dso_local <1 x double> @t_vrnd64z_f64(<1 x double> %a) {
+; CHECK-LABEL: t_vrnd64z_f64:
+; CHECK: frint64z d0, d0
+; CHECK-NEXT:ret
+entry:
+  %val = tail call <1 x double> @llvm.aarch64.neon.frint64z.v1f64(<1 x double> %a)
+  ret <1 x double> %val
+}
+
+define dso_local <2 x double> @t_vrnd64zq_f64(<2 x double> %a) {
+; CHECK-LABEL: t_vrnd64zq_f64:
+; CHECK: frint64z v0.2d, v0.2d
+; CHECK-NEXT:ret
+entry:
+  %val = tail call <2 x double> @llvm.aarch64.neon.frint64z.v2f64(<2 x double> %a)
+  ret <2 x double> %val
+}
Index: llvm/lib/Target/AArch64/AArch64InstrFormats.td
===
--- llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -6282,24 +6282,30 @@
 : SIMDTwoVectorFP;
 
 // Supports only S and D element sizes
-let mayRaiseFPException = 1, Uses = [FPCR] in
-multiclass SIMDTwoVectorSD opc, string asm,
+multiclass SIMDTwoVectorSD {
-
-  def v2f32 : BaseSIMDTwoSameVector<0, U, 00, opc, 0b00, V64,
+  let mayRaiseFPException = 1, Uses = [FPCR] in {
+def v2f32 : BaseSIMDTwoSameVector<0, U, 00, {0b, opc}, 0b00, V64,
 asm, ".2s", ".2s",
   [(set (v2f32 V64:$Rd), (OpNode (v2f32 V64:$Rn)))]>;
-  def v4f32 : BaseSIMDTwoSameVector<1, U, 00,

[PATCH] D158626: [AArch64] Add missing vrnd intrinsics

2023-09-07 Thread Max Iyengar via Phabricator via cfe-commits
miyengar updated this revision to Diff 556170.
miyengar added a comment.

Moved pattern from `AArchInstrFormats.td` to `AArchInstrInfo.td`


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D158626/new/

https://reviews.llvm.org/D158626

Files:
  clang/include/clang/Basic/arm_neon.td
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/test/CodeGen/aarch64-v8.5a-neon-frint3264-intrinsic.c
  llvm/lib/Target/AArch64/AArch64InstrInfo.td
  llvm/test/CodeGen/AArch64/v8.5a-neon-frint3264-intrinsic.ll

Index: llvm/test/CodeGen/AArch64/v8.5a-neon-frint3264-intrinsic.ll
===
--- llvm/test/CodeGen/AArch64/v8.5a-neon-frint3264-intrinsic.ll
+++ llvm/test/CodeGen/AArch64/v8.5a-neon-frint3264-intrinsic.ll
@@ -81,3 +81,85 @@
   %val = tail call <4 x float> @llvm.aarch64.neon.frint64z.v4f32(<4 x float> %a)
   ret <4 x float> %val
 }
+
+declare <1 x double> @llvm.aarch64.neon.frint32x.v1f64(<1 x double>)
+declare <2 x double> @llvm.aarch64.neon.frint32x.v2f64(<2 x double>)
+declare <1 x double> @llvm.aarch64.neon.frint32z.v1f64(<1 x double>)
+declare <2 x double> @llvm.aarch64.neon.frint32z.v2f64(<2 x double>)
+
+define dso_local <1 x double> @t_vrnd32x_f64(<1 x double> %a) {
+; CHECK-LABEL: t_vrnd32x_f64:
+; CHECK: frint32x d0, d0
+; CHECK-NEXT:ret
+entry:
+  %val = tail call <1 x double> @llvm.aarch64.neon.frint32x.v1f64(<1 x double> %a)
+  ret <1 x double> %val
+}
+
+define dso_local <2 x double> @t_vrnd32xq_f64(<2 x double> %a) {
+; CHECK-LABEL: t_vrnd32xq_f64:
+; CHECK: frint32x v0.2d, v0.2d
+; CHECK-NEXT:ret
+entry:
+  %val = tail call <2 x double> @llvm.aarch64.neon.frint32x.v2f64(<2 x double> %a)
+  ret <2 x double> %val
+}
+
+define dso_local <1 x double> @t_vrnd32z_f64(<1 x double> %a) {
+; CHECK-LABEL: t_vrnd32z_f64:
+; CHECK: frint32z d0, d0
+; CHECK-NEXT:ret
+entry:
+  %val = tail call <1 x double> @llvm.aarch64.neon.frint32z.v1f64(<1 x double> %a)
+  ret <1 x double> %val
+}
+
+define dso_local <2 x double> @t_vrnd32zq_f64(<2 x double> %a) {
+; CHECK-LABEL: t_vrnd32zq_f64:
+; CHECK: frint32z v0.2d, v0.2d
+; CHECK-NEXT:ret
+entry:
+  %val = tail call <2 x double> @llvm.aarch64.neon.frint32z.v2f64(<2 x double> %a)
+  ret <2 x double> %val
+}
+
+declare <1 x double> @llvm.aarch64.neon.frint64x.v1f64(<1 x double>)
+declare <2 x double> @llvm.aarch64.neon.frint64x.v2f64(<2 x double>)
+declare <1 x double> @llvm.aarch64.neon.frint64z.v1f64(<1 x double>)
+declare <2 x double> @llvm.aarch64.neon.frint64z.v2f64(<2 x double>)
+
+define dso_local <1 x double> @t_vrnd64x_f64(<1 x double> %a) {
+; CHECK-LABEL: t_vrnd64x_f64:
+; CHECK: frint64x d0, d0
+; CHECK-NEXT:ret
+entry:
+  %val = tail call <1 x double> @llvm.aarch64.neon.frint64x.v1f64(<1 x double> %a)
+  ret <1 x double> %val
+}
+
+define dso_local <2 x double> @t_vrnd64xq_f64(<2 x double> %a) {
+; CHECK-LABEL: t_vrnd64xq_f64:
+; CHECK: frint64x v0.2d, v0.2d
+; CHECK-NEXT:ret
+entry:
+  %val = tail call <2 x double> @llvm.aarch64.neon.frint64x.v2f64(<2 x double> %a)
+  ret <2 x double> %val
+}
+
+define dso_local <1 x double> @t_vrnd64z_f64(<1 x double> %a) {
+; CHECK-LABEL: t_vrnd64z_f64:
+; CHECK: frint64z d0, d0
+; CHECK-NEXT:ret
+entry:
+  %val = tail call <1 x double> @llvm.aarch64.neon.frint64z.v1f64(<1 x double> %a)
+  ret <1 x double> %val
+}
+
+define dso_local <2 x double> @t_vrnd64zq_f64(<2 x double> %a) {
+; CHECK-LABEL: t_vrnd64zq_f64:
+; CHECK: frint64z v0.2d, v0.2d
+; CHECK-NEXT:ret
+entry:
+  %val = tail call <2 x double> @llvm.aarch64.neon.frint64z.v2f64(<2 x double> %a)
+  ret <2 x double> %val
+}
Index: llvm/lib/Target/AArch64/AArch64InstrInfo.td
===
--- llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -4446,6 +4446,16 @@
   defm FRINT64X : FRIntNNT<0b11, "frint64x", int_aarch64_frint64x>;
 } // HasFRInt3264
 
+// Pattern to convert 1x64 vector intrinsics to equivalent scalar instructions
+def : Pat<(v1f64 (int_aarch64_neon_frint32z (v1f64 FPR64:$Rn))),
+  (FRINT32ZDr FPR64:$Rn)>;
+def : Pat<(v1f64 (int_aarch64_neon_frint64z (v1f64 FPR64:$Rn))),
+  (FRINT64ZDr FPR64:$Rn)>;
+def : Pat<(v1f64 (int_aarch64_neon_frint32x (v1f64 FPR64:$Rn))),
+  (FRINT32XDr FPR64:$Rn)>;
+def : Pat<(v1f64 (int_aarch64_neon_frint64x (v1f64 FPR64:$Rn))),
+  (FRINT64XDr FPR64:$Rn)>;
+
 // Emitting strict_lrint as two instructions is valid as any exceptions that
 // occur will happen in exactly one of the instructions (e.g. if the input is
 // not an integer the inexact exception will happen in the FRINTX but not then
Index: clang/test/CodeGen/aarch64-v8.5a-neon-frint3264-intrinsic.c
===
--- clang/test/CodeGen/aarch64-v8.5a-neon-frint3264-intrinsic.c
+++ clang/test/CodeGen/aarch64-v8.5a-neon-frint3264-intrinsic.c
@@ -62,3

[PATCH] D158626: [AArch64] Add missing vrnd intrinsics

2023-09-07 Thread Max Iyengar via Phabricator via cfe-commits
miyengar added inline comments.



Comment at: llvm/lib/Target/AArch64/AArch64InstrFormats.td:6309
+
+  def : Pat<(v1f64 (OpNode (v1f64 FPR64:$Rn))),
+   (!cast(NAME # Dr) FPR64:$Rn)>;

dmgreen wrote:
> miyengar wrote:
> > dmgreen wrote:
> > > I think the instructions in this multiclass are the vector variants. Can 
> > > the pattern be moved to the FRIntNNT/SingleOperandFPNo16 class?
> > Thanks for the comment. I have tried to do this, but have run into a 
> > problem:
> > 
> > This pattern targets the LLVM internal intrinsic: 
> > `int_aarch64_neon_frint***_v1f64`, as referenced in `AArch64InstrInfo.td`: 
> > 
> > ```
> > defm FRINT32Z : FRIntNNTVector<0, 0, "frint32z", int_aarch64_neon_frint32z>;
> > ```
> > It pattern matches 'v1f64' to 'Dr', which corresponds to a scalar 
> > instruction defined in FRIntNNT/SingleOperandFPNo16 - I.e. it is matching 
> > the vector variant to the scalar variant.
> > 
> > Moving the pattern to FRIntNNT results in an error "Cannot select: 
> > intrinsic %llvm.aarch64.neon.frint64x" as FRIntNNT is designed to match the 
> > intrinsics whose name does not contain "neon". Since the current patch 
> > deals specifically with the neon variant, I don't see an uncomplicated way 
> > to modify FRIntNNT to accommodate both variants of such intrinsic (neon and 
> > vanilla).
> > 
> > I have moved the pattern from SIMDTwoVectorSD to FRIntNNTVector though 
> > which is hopefully a better place to put it.
> > 
> > Does this seem like a sensible solution? Thanks!
> I see, Because there is a difference between int_aarch64_frint32z and 
> int_aarch64_neon_frint32z.
> 
> Could you pass int_aarch64_neon_frint32z to FRIntNNT too, or make them free 
> patterns in AArch64InstrInfo.td? Otherwise this is using the class to 
> generate vector variants of FRINT32Z to also generate the scalar FRINT32ZDr 
> instruction too. It likely doesn't matter a huge amount, but it feels like it 
> breaks the encapsulation of the FRIntNNTVector class.
Ah okay, this makes sense, thank you!

I have moved the patterns from FRIntNNTVector to make them free patterns in 
AArch64InstrInfo.td instead - this should preserve the encapsulation of the 
FRIntNNTVector class. 


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D158626/new/

https://reviews.llvm.org/D158626

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D158626: [AArch64] Add missing vrnd intrinsics

2023-08-24 Thread Max Iyengar via Phabricator via cfe-commits
miyengar updated this revision to Diff 553104.

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D158626/new/

https://reviews.llvm.org/D158626

Files:
  clang/include/clang/Basic/arm_neon.td
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/test/CodeGen/aarch64-v8.5a-neon-frint3264-intrinsic.c
  llvm/lib/Target/AArch64/AArch64InstrFormats.td
  llvm/test/CodeGen/AArch64/v8.5a-neon-frint3264-intrinsic.ll

Index: llvm/test/CodeGen/AArch64/v8.5a-neon-frint3264-intrinsic.ll
===
--- llvm/test/CodeGen/AArch64/v8.5a-neon-frint3264-intrinsic.ll
+++ llvm/test/CodeGen/AArch64/v8.5a-neon-frint3264-intrinsic.ll
@@ -81,3 +81,85 @@
   %val = tail call <4 x float> @llvm.aarch64.neon.frint64z.v4f32(<4 x float> %a)
   ret <4 x float> %val
 }
+
+declare <1 x double> @llvm.aarch64.neon.frint32x.v1f64(<1 x double>)
+declare <2 x double> @llvm.aarch64.neon.frint32x.v2f64(<2 x double>)
+declare <1 x double> @llvm.aarch64.neon.frint32z.v1f64(<1 x double>)
+declare <2 x double> @llvm.aarch64.neon.frint32z.v2f64(<2 x double>)
+
+define dso_local <1 x double> @t_vrnd32x_f64(<1 x double> %a) {
+; CHECK-LABEL: t_vrnd32x_f64:
+; CHECK: frint32x d0, d0
+; CHECK-NEXT:ret
+entry:
+  %val = tail call <1 x double> @llvm.aarch64.neon.frint32x.v1f64(<1 x double> %a)
+  ret <1 x double> %val
+}
+
+define dso_local <2 x double> @t_vrnd32xq_f64(<2 x double> %a) {
+; CHECK-LABEL: t_vrnd32xq_f64:
+; CHECK: frint32x v0.2d, v0.2d
+; CHECK-NEXT:ret
+entry:
+  %val = tail call <2 x double> @llvm.aarch64.neon.frint32x.v2f64(<2 x double> %a)
+  ret <2 x double> %val
+}
+
+define dso_local <1 x double> @t_vrnd32z_f64(<1 x double> %a) {
+; CHECK-LABEL: t_vrnd32z_f64:
+; CHECK: frint32z d0, d0
+; CHECK-NEXT:ret
+entry:
+  %val = tail call <1 x double> @llvm.aarch64.neon.frint32z.v1f64(<1 x double> %a)
+  ret <1 x double> %val
+}
+
+define dso_local <2 x double> @t_vrnd32zq_f64(<2 x double> %a) {
+; CHECK-LABEL: t_vrnd32zq_f64:
+; CHECK: frint32z v0.2d, v0.2d
+; CHECK-NEXT:ret
+entry:
+  %val = tail call <2 x double> @llvm.aarch64.neon.frint32z.v2f64(<2 x double> %a)
+  ret <2 x double> %val
+}
+
+declare <1 x double> @llvm.aarch64.neon.frint64x.v1f64(<1 x double>)
+declare <2 x double> @llvm.aarch64.neon.frint64x.v2f64(<2 x double>)
+declare <1 x double> @llvm.aarch64.neon.frint64z.v1f64(<1 x double>)
+declare <2 x double> @llvm.aarch64.neon.frint64z.v2f64(<2 x double>)
+
+define dso_local <1 x double> @t_vrnd64x_f64(<1 x double> %a) {
+; CHECK-LABEL: t_vrnd64x_f64:
+; CHECK: frint64x d0, d0
+; CHECK-NEXT:ret
+entry:
+  %val = tail call <1 x double> @llvm.aarch64.neon.frint64x.v1f64(<1 x double> %a)
+  ret <1 x double> %val
+}
+
+define dso_local <2 x double> @t_vrnd64xq_f64(<2 x double> %a) {
+; CHECK-LABEL: t_vrnd64xq_f64:
+; CHECK: frint64x v0.2d, v0.2d
+; CHECK-NEXT:ret
+entry:
+  %val = tail call <2 x double> @llvm.aarch64.neon.frint64x.v2f64(<2 x double> %a)
+  ret <2 x double> %val
+}
+
+define dso_local <1 x double> @t_vrnd64z_f64(<1 x double> %a) {
+; CHECK-LABEL: t_vrnd64z_f64:
+; CHECK: frint64z d0, d0
+; CHECK-NEXT:ret
+entry:
+  %val = tail call <1 x double> @llvm.aarch64.neon.frint64z.v1f64(<1 x double> %a)
+  ret <1 x double> %val
+}
+
+define dso_local <2 x double> @t_vrnd64zq_f64(<2 x double> %a) {
+; CHECK-LABEL: t_vrnd64zq_f64:
+; CHECK: frint64z v0.2d, v0.2d
+; CHECK-NEXT:ret
+entry:
+  %val = tail call <2 x double> @llvm.aarch64.neon.frint64z.v2f64(<2 x double> %a)
+  ret <2 x double> %val
+}
Index: llvm/lib/Target/AArch64/AArch64InstrFormats.td
===
--- llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -6292,24 +6292,27 @@
 : SIMDTwoVectorFP;
 
 // Supports only S and D element sizes
-let mayRaiseFPException = 1, Uses = [FPCR] in
-multiclass SIMDTwoVectorSD opc, string asm,
+multiclass SIMDTwoVectorSD {
-
-  def v2f32 : BaseSIMDTwoSameVector<0, U, 00, opc, 0b00, V64,
+  let mayRaiseFPException = 1, Uses = [FPCR] in {
+def v2f32 : BaseSIMDTwoSameVector<0, U, 00, {0b, opc}, 0b00, V64,
 asm, ".2s", ".2s",
   [(set (v2f32 V64:$Rd), (OpNode (v2f32 V64:$Rn)))]>;
-  def v4f32 : BaseSIMDTwoSameVector<1, U, 00, opc, 0b00, V128,
+def v4f32 : BaseSIMDTwoSameVector<1, U, 00, {0b, opc}, 0b00, V128,
 asm, ".4s", ".4s",
   [(set (v4f32 V128:$Rd), (OpNode (v4f32 V128:$Rn)))]>;
-  def v2f64 : BaseSIMDTwoSameVector<1, U, 01, opc, 0b00, V128,
+def v2f64 : BaseSIMDTwoSameVector<1, U, 01, {0b, opc}, 0b00, V128,
 asm, ".2d", ".2d",
   [(set (v2f64 V128:$Rd), (OpNode (v2f64 V128:$Rn)))]>;
+  }
+
+  def : Pat<(v1f64 (OpNode (v1f64 FPR64:$Rn))),
+   (!cas

[PATCH] D158626: [AArch64] Add missing vrnd intrinsics

2023-08-24 Thread Max Iyengar via Phabricator via cfe-commits
miyengar marked an inline comment as done.
miyengar added a comment.

Thank you for the feedback! I've added an amended patch using the pre-existing 
instruction. Also, I've tried to submit the patch with context this time.




Comment at: llvm/lib/Target/AArch64/AArch64InstrFormats.td:6297
   [(set (v2f64 V128:$Rd), (OpNode (v2f64 V128:$Rn)))]>;
+def f64 : BaseSingleOperandFPData<{0b0100, U, opc},
+FPR64, f64, asm, null_frag>;

dmgreen wrote:
> This looks like it is defining a new instruction. Does that already exist 
> somewhere? Probably from somewhere like FRIntNNT.
Ah thanks! I've amended this to use the already existing instruction 
(FRINT32ZDr) as defined in FRIntNNT


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D158626/new/

https://reviews.llvm.org/D158626

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D158626: [AArch64] Add missing vrnd intrinsics

2023-09-05 Thread Max Iyengar via Phabricator via cfe-commits
miyengar updated this revision to Diff 555879.
miyengar marked an inline comment as done.
miyengar added a comment.

Moved v1f64 -> Dr (Vector to Scalar) Pattern from `SIMDTwoVectorSD` to 
`FRIntNNTVector`


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D158626/new/

https://reviews.llvm.org/D158626

Files:
  clang/include/clang/Basic/arm_neon.td
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/test/CodeGen/aarch64-v8.5a-neon-frint3264-intrinsic.c
  llvm/lib/Target/AArch64/AArch64InstrFormats.td
  llvm/test/CodeGen/AArch64/v8.5a-neon-frint3264-intrinsic.ll

Index: llvm/test/CodeGen/AArch64/v8.5a-neon-frint3264-intrinsic.ll
===
--- llvm/test/CodeGen/AArch64/v8.5a-neon-frint3264-intrinsic.ll
+++ llvm/test/CodeGen/AArch64/v8.5a-neon-frint3264-intrinsic.ll
@@ -81,3 +81,85 @@
   %val = tail call <4 x float> @llvm.aarch64.neon.frint64z.v4f32(<4 x float> %a)
   ret <4 x float> %val
 }
+
+declare <1 x double> @llvm.aarch64.neon.frint32x.v1f64(<1 x double>)
+declare <2 x double> @llvm.aarch64.neon.frint32x.v2f64(<2 x double>)
+declare <1 x double> @llvm.aarch64.neon.frint32z.v1f64(<1 x double>)
+declare <2 x double> @llvm.aarch64.neon.frint32z.v2f64(<2 x double>)
+
+define dso_local <1 x double> @t_vrnd32x_f64(<1 x double> %a) {
+; CHECK-LABEL: t_vrnd32x_f64:
+; CHECK: frint32x d0, d0
+; CHECK-NEXT:ret
+entry:
+  %val = tail call <1 x double> @llvm.aarch64.neon.frint32x.v1f64(<1 x double> %a)
+  ret <1 x double> %val
+}
+
+define dso_local <2 x double> @t_vrnd32xq_f64(<2 x double> %a) {
+; CHECK-LABEL: t_vrnd32xq_f64:
+; CHECK: frint32x v0.2d, v0.2d
+; CHECK-NEXT:ret
+entry:
+  %val = tail call <2 x double> @llvm.aarch64.neon.frint32x.v2f64(<2 x double> %a)
+  ret <2 x double> %val
+}
+
+define dso_local <1 x double> @t_vrnd32z_f64(<1 x double> %a) {
+; CHECK-LABEL: t_vrnd32z_f64:
+; CHECK: frint32z d0, d0
+; CHECK-NEXT:ret
+entry:
+  %val = tail call <1 x double> @llvm.aarch64.neon.frint32z.v1f64(<1 x double> %a)
+  ret <1 x double> %val
+}
+
+define dso_local <2 x double> @t_vrnd32zq_f64(<2 x double> %a) {
+; CHECK-LABEL: t_vrnd32zq_f64:
+; CHECK: frint32z v0.2d, v0.2d
+; CHECK-NEXT:ret
+entry:
+  %val = tail call <2 x double> @llvm.aarch64.neon.frint32z.v2f64(<2 x double> %a)
+  ret <2 x double> %val
+}
+
+declare <1 x double> @llvm.aarch64.neon.frint64x.v1f64(<1 x double>)
+declare <2 x double> @llvm.aarch64.neon.frint64x.v2f64(<2 x double>)
+declare <1 x double> @llvm.aarch64.neon.frint64z.v1f64(<1 x double>)
+declare <2 x double> @llvm.aarch64.neon.frint64z.v2f64(<2 x double>)
+
+define dso_local <1 x double> @t_vrnd64x_f64(<1 x double> %a) {
+; CHECK-LABEL: t_vrnd64x_f64:
+; CHECK: frint64x d0, d0
+; CHECK-NEXT:ret
+entry:
+  %val = tail call <1 x double> @llvm.aarch64.neon.frint64x.v1f64(<1 x double> %a)
+  ret <1 x double> %val
+}
+
+define dso_local <2 x double> @t_vrnd64xq_f64(<2 x double> %a) {
+; CHECK-LABEL: t_vrnd64xq_f64:
+; CHECK: frint64x v0.2d, v0.2d
+; CHECK-NEXT:ret
+entry:
+  %val = tail call <2 x double> @llvm.aarch64.neon.frint64x.v2f64(<2 x double> %a)
+  ret <2 x double> %val
+}
+
+define dso_local <1 x double> @t_vrnd64z_f64(<1 x double> %a) {
+; CHECK-LABEL: t_vrnd64z_f64:
+; CHECK: frint64z d0, d0
+; CHECK-NEXT:ret
+entry:
+  %val = tail call <1 x double> @llvm.aarch64.neon.frint64z.v1f64(<1 x double> %a)
+  ret <1 x double> %val
+}
+
+define dso_local <2 x double> @t_vrnd64zq_f64(<2 x double> %a) {
+; CHECK-LABEL: t_vrnd64zq_f64:
+; CHECK: frint64z v0.2d, v0.2d
+; CHECK-NEXT:ret
+entry:
+  %val = tail call <2 x double> @llvm.aarch64.neon.frint64z.v2f64(<2 x double> %a)
+  ret <2 x double> %val
+}
Index: llvm/lib/Target/AArch64/AArch64InstrFormats.td
===
--- llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -6309,7 +6309,10 @@
 
 multiclass FRIntNNTVector :
-   SIMDTwoVectorSD;
+   SIMDTwoVectorSD {
+  def : Pat<(v1f64 (OpNode (v1f64 FPR64:$Rn))),
+  (!cast(NAME # Dr) FPR64:$Rn)>;
+}
 
 // Supports only S element size.
 multiclass SIMDTwoVectorS opc, string asm,
Index: clang/test/CodeGen/aarch64-v8.5a-neon-frint3264-intrinsic.c
===
--- clang/test/CodeGen/aarch64-v8.5a-neon-frint3264-intrinsic.c
+++ clang/test/CodeGen/aarch64-v8.5a-neon-frint3264-intrinsic.c
@@ -62,3 +62,59 @@
 float32x4_t test_vrnd64zq_f32(float32x4_t a) {
   return vrnd64zq_f32(a);
 }
+
+// CHECK-LABEL: test_vrnd32x_f64
+// CHECK:  [[RND:%.*]] =  call <1 x double> @llvm.aarch64.neon.frint32x.v1f64(<1 x double> %a)
+// CHECK:  ret <1 x double> [[RND]]
+float64x1_t test_vrnd32x_f64(float64x1_t a) {
+  return vrnd32x_f64(a);
+}
+
+// CHECK-LABEL: test_vrnd32xq_f64
+// CHECK:  [[RND:%.*]] =  call <2 x double> @llvm.aarch64.neon.frint32x.v2f64(<2 x double> %a)
+// CHE

[PATCH] D158626: [AArch64] Add missing vrnd intrinsics

2023-09-05 Thread Max Iyengar via Phabricator via cfe-commits
miyengar added inline comments.



Comment at: llvm/lib/Target/AArch64/AArch64InstrFormats.td:6309
+
+  def : Pat<(v1f64 (OpNode (v1f64 FPR64:$Rn))),
+   (!cast(NAME # Dr) FPR64:$Rn)>;

dmgreen wrote:
> I think the instructions in this multiclass are the vector variants. Can the 
> pattern be moved to the FRIntNNT/SingleOperandFPNo16 class?
Thanks for the comment. I have tried to do this, but have run into a problem:

This pattern targets the LLVM internal intrinsic: 
`int_aarch64_neon_frint***_v1f64`, as referenced in `AArch64InstrInfo.td`: 

```
defm FRINT32Z : FRIntNNTVector<0, 0, "frint32z", int_aarch64_neon_frint32z>;
```
It pattern matches 'v1f64' to 'Dr', which corresponds to a scalar instruction 
defined in FRIntNNT/SingleOperandFPNo16 - I.e. it is matching the vector 
variant to the scalar variant.

Moving the pattern to FRIntNNT results in an error "Cannot select: intrinsic 
%llvm.aarch64.neon.frint64x" as FRIntNNT is designed to match the intrinsics 
whose name does not contain "neon". Since the current patch deals specifically 
with the neon variant, I don't see an uncomplicated way to modify FRIntNNT to 
accommodate both variants of such intrinsic (neon and vanilla).

I have moved the pattern from SIMDTwoVectorSD to FRIntNNTVector though which is 
hopefully a better place to put it.

Does this seem like a sensible solution? Thanks!


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D158626/new/

https://reviews.llvm.org/D158626

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits