[PATCH] D102397: [AArch64] Lower bitreverse in ISel

2021-05-13 Thread Irina Dobrescu via Phabricator via cfe-commits
Rin created this revision.
Herald added subscribers: dexonsmith, danielkiss, hiraditya, kristof.beyls.
Rin requested review of this revision.
Herald added projects: clang, LLVM.
Herald added subscribers: llvm-commits, cfe-commits.

Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D102397

Files:
  clang/test/CodeGen/aarch64-neon-misc.c
  llvm/lib/IR/AutoUpgrade.cpp
  llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
  llvm/lib/Target/AArch64/AArch64InstrInfo.td
  llvm/test/CodeGen/AArch64/arm64-vbitwise.ll
  llvm/test/CodeGen/AArch64/bitreverse.ll

Index: llvm/test/CodeGen/AArch64/bitreverse.ll
===
--- llvm/test/CodeGen/AArch64/bitreverse.ll
+++ llvm/test/CodeGen/AArch64/bitreverse.ll
@@ -31,30 +31,8 @@
 declare <8 x i8> @llvm.bitreverse.v8i8(<8 x i8>) readnone
 
 define <8 x i8> @g_vec(<8 x i8> %a) {
-; CHECK-DAG: movi [[M1:v.*]], #15
-; CHECK-DAG: movi [[M2:v.*]], #240
-; CHECK: and  [[A1:v.*]], v0.8b, [[M1]]
-; CHECK: and  [[A2:v.*]], v0.8b, [[M2]]
-; CHECK-DAG: shl  [[L4:v.*]], [[A1]], #4
-; CHECK-DAG: ushr [[R4:v.*]], [[A2]], #4
-; CHECK-DAG: orr  [[V4:v.*]], [[R4]], [[L4]]
-
-; CHECK-DAG: movi [[M3:v.*]], #51
-; CHECK-DAG: movi [[M4:v.*]], #204
-; CHECK: and  [[A3:v.*]], [[V4]], [[M3]]
-; CHECK: and  [[A4:v.*]], [[V4]], [[M4]]
-; CHECK-DAG: shl  [[L2:v.*]], [[A3]], #2
-; CHECK-DAG: ushr [[R2:v.*]], [[A4]], #2
-; CHECK-DAG: orr  [[V2:v.*]], [[R2]], [[L2]]
-
-; CHECK-DAG: movi [[M5:v.*]], #85
-; CHECK-DAG: movi [[M6:v.*]], #170
-; CHECK: and  [[A5:v.*]], [[V2]], [[M5]]
-; CHECK: and  [[A6:v.*]], [[V2]], [[M6]]
-; CHECK-DAG: shl  [[L1:v.*]], [[A5]], #1
-; CHECK-DAG: ushr [[R1:v.*]], [[A6]], #1
-; CHECK: orr  [[V1:v.*]], [[R1]], [[L1]]
-
+;CHECK-LABEL: g_vec:
+;rbit	v0.8b, v0.8b
 ; CHECK: ret
   %b = call <8 x i8> @llvm.bitreverse.v8i8(<8 x i8> %a)
   ret <8 x i8> %b
Index: llvm/test/CodeGen/AArch64/arm64-vbitwise.ll
===
--- llvm/test/CodeGen/AArch64/arm64-vbitwise.ll
+++ llvm/test/CodeGen/AArch64/arm64-vbitwise.ll
@@ -4,7 +4,7 @@
 ;CHECK-LABEL: rbit_8b:
 ;CHECK: rbit.8b
 	%tmp1 = load <8 x i8>, <8 x i8>* %A
-	%tmp3 = call <8 x i8> @llvm.aarch64.neon.rbit.v8i8(<8 x i8> %tmp1)
+	%tmp3 = call <8 x i8> @llvm.bitreverse.v8i8(<8 x i8> %tmp1)
 	ret <8 x i8> %tmp3
 }
 
@@ -12,12 +12,12 @@
 ;CHECK-LABEL: rbit_16b:
 ;CHECK: rbit.16b
 	%tmp1 = load <16 x i8>, <16 x i8>* %A
-	%tmp3 = call <16 x i8> @llvm.aarch64.neon.rbit.v16i8(<16 x i8> %tmp1)
+	%tmp3 = call <16 x i8> @llvm.bitreverse.v16i8(<16 x i8> %tmp1)
 	ret <16 x i8> %tmp3
 }
 
-declare <8 x i8> @llvm.aarch64.neon.rbit.v8i8(<8 x i8>) nounwind readnone
-declare <16 x i8> @llvm.aarch64.neon.rbit.v16i8(<16 x i8>) nounwind readnone
+declare <8 x i8> @llvm.bitreverse.v8i8(<8 x i8>) nounwind readnone
+declare <16 x i8> @llvm.bitreverse.v16i8(<16 x i8>) nounwind readnone
 
 define <8 x i16> @sxtl8h(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: sxtl8h:
Index: llvm/lib/Target/AArch64/AArch64InstrInfo.td
===
--- llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -4131,7 +4131,8 @@
 def : Pat<(vnot (v1i64 V64:$Rn)),  (NOTv8i8  V64:$Rn)>;
 def : Pat<(vnot (v2i64 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
 
-defm RBIT   : SIMDTwoVectorB<1, 0b01, 0b00101, "rbit", int_aarch64_neon_rbit>;
+//defm RBIT   : SIMDTwoVectorB<1, 0b01, 0b00101, "rbit", int_aarch64_neon_rbit>;
+defm RBIT   : SIMDTwoVectorB<1, 0b01, 0b00101, "rbit", bitreverse>;
 defm REV16  : SIMDTwoVectorB<0, 0b00, 0b1, "rev16", AArch64rev16>;
 defm REV32  : SIMDTwoVectorBH<1, 0b0, "rev32", AArch64rev32>;
 defm REV64  : SIMDTwoVectorBHS<0, 0b0, "rev64", AArch64rev64>;
Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
===
--- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -378,6 +378,8 @@
   setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Custom);
   setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
   setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
+  setOperationAction(ISD::BITREVERSE, MVT::v8i8, Legal);
+  setOperationAction(ISD::BITREVERSE, MVT::v16i8, Legal);
   setOperationAction(ISD::BRCOND, MVT::Other, Expand);
   setOperationAction(ISD::BR_CC, MVT::i32, Custom);
   setOperationAction(ISD::BR_CC, MVT::i64, Custom);
Index: llvm/lib/IR/AutoUpgrade.cpp
===
--- llvm/lib/IR/AutoUpgrade.cpp
+++ llvm/lib/IR/AutoUpgrade.cpp
@@ -553,6 +553,11 @@
 F->arg_begin()->getType());
   return true;
 }
+if (Name.startswith("aarch64.neon.rbit")) {
+  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::bitreverse,
+F->arg_begin()->getType());
+  retur

[PATCH] D102397: [AArch64] Lower bitreverse in ISel

2021-05-14 Thread Irina Dobrescu via Phabricator via cfe-commits
Rin updated this revision to Diff 345411.
Rin added a comment.

Remove unnecessary comment and add more bitreverse tests


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D102397/new/

https://reviews.llvm.org/D102397

Files:
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/test/CodeGen/aarch64-neon-misc.c
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/IR/AutoUpgrade.cpp
  llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
  llvm/lib/Target/AArch64/AArch64InstrInfo.td
  llvm/test/CodeGen/AArch64/arm64-vbitwise.ll
  llvm/test/CodeGen/AArch64/bitreverse.ll

Index: llvm/test/CodeGen/AArch64/bitreverse.ll
===
--- llvm/test/CodeGen/AArch64/bitreverse.ll
+++ llvm/test/CodeGen/AArch64/bitreverse.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=aarch64-eabi %s -o - | FileCheck %s
 
 ; These tests just check that the plumbing is in place for @llvm.bitreverse.
@@ -6,13 +7,16 @@
 
 define <2 x i16> @f(<2 x i16> %a) {
 ; CHECK-LABEL: f:
-; CHECK: fmov [[REG1:w[0-9]+]], s0
-; CHECK-DAG: rbit [[REG2:w[0-9]+]], [[REG1]]
-; CHECK-DAG: fmov s0, [[REG2]]
-; CHECK-DAG: mov [[REG3:w[0-9]+]], v0.s[1]
-; CHECK-DAG: rbit [[REG4:w[0-9]+]], [[REG3]]
-; CHECK-DAG: mov v0.s[1], [[REG4]]
-; CHECK-DAG: ushr v0.2s, v0.2s, #16
+; CHECK:   // %bb.0:
+; CHECK-NEXT:// kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:fmov w8, s0
+; CHECK-NEXT:rbit w8, w8
+; CHECK-NEXT:mov w9, v0.s[1]
+; CHECK-NEXT:fmov s0, w8
+; CHECK-NEXT:rbit w8, w9
+; CHECK-NEXT:mov v0.s[1], w8
+; CHECK-NEXT:ushr v0.2s, v0.2s, #16
+; CHECK-NEXT:ret
   %b = call <2 x i16> @llvm.bitreverse.v2i16(<2 x i16> %a)
   ret <2 x i16> %b
 }
@@ -21,41 +25,161 @@
 
 define i8 @g(i8 %a) {
 ; CHECK-LABEL: g:
-; CHECK: rbit [[REG:w[0-9]+]], w0
-; CHECK-NEXT: lsr w0, [[REG]], #24
-; CHECK-NEXT: ret
+; CHECK:   // %bb.0:
+; CHECK-NEXT:rbit w8, w0
+; CHECK-NEXT:lsr w0, w8, #24
+; CHECK-NEXT:ret
   %b = call i8 @llvm.bitreverse.i8(i8 %a)
   ret i8 %b
 }
 
+declare i16 @llvm.bitreverse.i16(i16) readnone
+
+define i16 @g_16(i16 %a) {
+; CHECK-LABEL: g_16:
+; CHECK:   // %bb.0:
+; CHECK-NEXT:rbit w8, w0
+; CHECK-NEXT:lsr w0, w8, #16
+; CHECK-NEXT:ret
+  %b = call i16 @llvm.bitreverse.i16(i16 %a)
+  ret i16 %b
+}
+
+declare i32 @llvm.bitreverse.i32(i32) readnone
+
+define i32 @g_32(i32 %a) {
+; CHECK-LABEL: g_32:
+; CHECK:   // %bb.0:
+; CHECK-NEXT:rbit w0, w0
+; CHECK-NEXT:ret
+  %b = call i32 @llvm.bitreverse.i32(i32 %a)
+  ret i32 %b
+}
+
+declare i64 @llvm.bitreverse.i64(i64) readnone
+
+define i64 @g_64(i64 %a) {
+; CHECK-LABEL: g_64:
+; CHECK:   // %bb.0:
+; CHECK-NEXT:rbit x0, x0
+; CHECK-NEXT:ret
+  %b = call i64 @llvm.bitreverse.i64(i64 %a)
+  ret i64 %b
+}
+
 declare <8 x i8> @llvm.bitreverse.v8i8(<8 x i8>) readnone
 
 define <8 x i8> @g_vec(<8 x i8> %a) {
-; CHECK-DAG: movi [[M1:v.*]], #15
-; CHECK-DAG: movi [[M2:v.*]], #240
-; CHECK: and  [[A1:v.*]], v0.8b, [[M1]]
-; CHECK: and  [[A2:v.*]], v0.8b, [[M2]]
-; CHECK-DAG: shl  [[L4:v.*]], [[A1]], #4
-; CHECK-DAG: ushr [[R4:v.*]], [[A2]], #4
-; CHECK-DAG: orr  [[V4:v.*]], [[R4]], [[L4]]
-
-; CHECK-DAG: movi [[M3:v.*]], #51
-; CHECK-DAG: movi [[M4:v.*]], #204
-; CHECK: and  [[A3:v.*]], [[V4]], [[M3]]
-; CHECK: and  [[A4:v.*]], [[V4]], [[M4]]
-; CHECK-DAG: shl  [[L2:v.*]], [[A3]], #2
-; CHECK-DAG: ushr [[R2:v.*]], [[A4]], #2
-; CHECK-DAG: orr  [[V2:v.*]], [[R2]], [[L2]]
-
-; CHECK-DAG: movi [[M5:v.*]], #85
-; CHECK-DAG: movi [[M6:v.*]], #170
-; CHECK: and  [[A5:v.*]], [[V2]], [[M5]]
-; CHECK: and  [[A6:v.*]], [[V2]], [[M6]]
-; CHECK-DAG: shl  [[L1:v.*]], [[A5]], #1
-; CHECK-DAG: ushr [[R1:v.*]], [[A6]], #1
-; CHECK: orr  [[V1:v.*]], [[R1]], [[L1]]
-
-; CHECK: ret
+; CHECK-LABEL: g_vec:
+; CHECK:   // %bb.0:
+; CHECK-NEXT:rbit v0.8b, v0.8b
+; CHECK-NEXT:ret
   %b = call <8 x i8> @llvm.bitreverse.v8i8(<8 x i8> %a)
   ret <8 x i8> %b
 }
+
+declare <16 x i8> @llvm.bitreverse.v16i8(<16 x i8>) readnone
+
+define <16 x i8> @g_vec_16x8(<16 x i8> %a) {
+; CHECK-LABEL: g_vec_16x8:
+; CHECK:   // %bb.0:
+; CHECK-NEXT:rbit v0.16b, v0.16b
+; CHECK-NEXT:ret
+  %b = call <16 x i8> @llvm.bitreverse.v16i8(<16 x i8> %a)
+  ret <16 x i8> %b
+}
+
+declare <4 x i16> @llvm.bitreverse.v4i16(<4 x i16>) readnone
+
+define <4 x i16> @g_vec_4x16(<4 x i16> %a) {
+; CHECK-LABEL: g_vec_4x16:
+; CHECK:   // %bb.0:
+; CHECK-NEXT:rev16 v0.8b, v0.8b
+; CHECK-NEXT:rbit v0.8b, v0.8b
+; CHECK-NEXT:ret
+  %b = call <4 x i16> @llvm.bitreverse.v4i16(<4 x i16> %a)
+  ret <4 x i16> %b
+}
+
+declare <8 x i16> @llvm.bitreverse.v8i16(<8 x i16>) readnone
+
+define <8 x i16> @g_vec_8x16(<8 x i16> %a) {
+; CHECK-LABEL: g_vec_8x16:
+; CHECK:   // %bb.0:
+; CHECK-NEXT:rev16 v0.16b, v0.16b
+; CHECK-NEXT:rbit v0.16b, v0.16b
+; CHECK-NEXT:r

[PATCH] D102397: [AArch64] Lower bitreverse in ISel

2021-05-14 Thread Irina Dobrescu via Phabricator via cfe-commits
Rin updated this revision to Diff 345465.
Rin added a comment.

Add AutoUpgrade test and move bitreverse lowering


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D102397/new/

https://reviews.llvm.org/D102397

Files:
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/test/CodeGen/aarch64-neon-misc.c
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/IR/AutoUpgrade.cpp
  llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
  llvm/lib/Target/AArch64/AArch64InstrInfo.td
  llvm/test/CodeGen/AArch64/arm64-vbitwise.ll
  llvm/test/CodeGen/AArch64/bitreverse.ll
  llvm/test/CodeGen/AArch64/neon_rbit.ll

Index: llvm/test/CodeGen/AArch64/neon_rbit.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/neon_rbit.ll
@@ -0,0 +1,121 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-eabi -mattr=+fullfp16 %s -o - | FileCheck %s
+
+; The llvm.aarch64_neon_rbit intrinsic should be auto-upgraded to the
+; target-independent bitreverse intrinsic.
+
+declare <8 x i8> @llvm.aarch64.neon.rbit.v8i8(<8 x i8>) nounwind readnone
+
+define <8 x i8> @rbit_8x8(<8 x i8> %A) nounwind {
+; CHECK-LABEL: rbit_8x8:
+; CHECK:   // %bb.0:
+; CHECK-NEXT:rbit v0.8b, v0.8b
+; CHECK-NEXT:ret
+%tmp3 = call <8 x i8> @llvm.aarch64.neon.rbit.v8i8(<8 x i8> %A)
+	ret <8 x i8> %tmp3
+}
+
+declare <16 x i8> @llvm.aarch64.neon.rbit.v16i8(<16 x i8>) nounwind readnone
+
+define <16 x i8> @rbit_16x8(<16 x i8> %A) nounwind {
+; CHECK-LABEL: rbit_16x8:
+; CHECK:   // %bb.0:
+; CHECK-NEXT:rbit v0.16b, v0.16b
+; CHECK-NEXT:ret
+%tmp3 = call <16 x i8> @llvm.aarch64.neon.rbit.v16i8(<16 x i8> %A)
+	ret <16 x i8> %tmp3
+}
+
+declare <4 x i16> @llvm.aarch64.neon.rbit.v4i16(<4 x i16>) nounwind readnone
+
+define <4 x i16> @rbit_4x16(<4 x i16> %A) nounwind {
+; CHECK-LABEL: rbit_4x16:
+; CHECK:   // %bb.0:
+; CHECK-NEXT:rev16 v0.8b, v0.8b
+; CHECK-NEXT:rbit v0.8b, v0.8b
+; CHECK-NEXT:ret
+%tmp3 = call <4 x i16> @llvm.aarch64.neon.rbit.v4i16(<4 x i16> %A)
+	ret <4 x i16> %tmp3
+}
+
+declare <8 x i16> @llvm.aarch64.neon.rbit.v8i16(<8 x i16>) nounwind readnone
+
+define <8 x i16> @rbit_8x16(<8 x i16> %A) {
+; CHECK-LABEL: rbit_8x16:
+; CHECK:   // %bb.0:
+; CHECK-NEXT:rev16 v0.16b, v0.16b
+; CHECK-NEXT:rbit v0.16b, v0.16b
+; CHECK-NEXT:ret
+  %tmp3 = call <8 x i16> @llvm.aarch64.neon.rbit.v8i16(<8 x i16> %A)
+  ret <8 x i16> %tmp3
+}
+
+declare <2 x i32> @llvm.aarch64.neon.rbit.v2i32(<2 x i32>) nounwind readnone
+
+define <2 x i32> @rbit_2x32(<2 x i32> %A) {
+; CHECK-LABEL: rbit_2x32:
+; CHECK:   // %bb.0:
+; CHECK-NEXT:// kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:fmov w8, s0
+; CHECK-NEXT:rbit w8, w8
+; CHECK-NEXT:mov w9, v0.s[1]
+; CHECK-NEXT:fmov s0, w8
+; CHECK-NEXT:rbit w8, w9
+; CHECK-NEXT:mov v0.s[1], w8
+; CHECK-NEXT:// kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:ret
+  %tmp3 = call <2 x i32> @llvm.aarch64.neon.rbit.v2i32(<2 x i32> %A)
+  ret <2 x i32> %tmp3
+}
+
+declare <4 x i32> @llvm.aarch64.neon.rbit.v4i32(<4 x i32>) nounwind readnone
+
+define <4 x i32> @rbit_4x32(<4 x i32> %A) {
+; CHECK-LABEL: rbit_4x32:
+; CHECK:   // %bb.0:
+; CHECK-NEXT:fmov w10, s0
+; CHECK-NEXT:mov w8, v0.s[1]
+; CHECK-NEXT:rbit w10, w10
+; CHECK-NEXT:mov w9, v0.s[2]
+; CHECK-NEXT:mov w11, v0.s[3]
+; CHECK-NEXT:fmov s0, w10
+; CHECK-NEXT:rbit w8, w8
+; CHECK-NEXT:rbit w9, w9
+; CHECK-NEXT:mov v0.s[1], w8
+; CHECK-NEXT:mov v0.s[2], w9
+; CHECK-NEXT:rbit w8, w11
+; CHECK-NEXT:mov v0.s[3], w8
+; CHECK-NEXT:ret
+  %tmp3 = call <4 x i32> @llvm.aarch64.neon.rbit.v4i32(<4 x i32> %A)
+  ret <4 x i32> %tmp3
+}
+
+declare <1 x i64> @llvm.aarch64.neon.rbit.v1i64(<1 x i64>) readnone
+
+define <1 x i64> @rbit_1x64(<1 x i64> %A) {
+; CHECK-LABEL: rbit_1x64:
+; CHECK:   // %bb.0:
+; CHECK-NEXT:// kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:fmov x8, d0
+; CHECK-NEXT:rbit x8, x8
+; CHECK-NEXT:fmov d0, x8
+; CHECK-NEXT:ret
+  %tmp3 = call <1 x i64> @llvm.aarch64.neon.rbit.v1i64(<1 x i64> %A)
+  ret <1 x i64> %tmp3
+}
+
+declare <2 x i64> @llvm.aarch64.neon.rbit.v2i64(<2 x i64>) readnone
+
+define <2 x i64> @rbit_2x64(<2 x i64> %A) {
+; CHECK-LABEL: rbit_2x64:
+; CHECK:   // %bb.0:
+; CHECK-NEXT:fmov x8, d0
+; CHECK-NEXT:rbit x8, x8
+; CHECK-NEXT:mov x9, v0.d[1]
+; CHECK-NEXT:fmov d0, x8
+; CHECK-NEXT:rbit x8, x9
+; CHECK-NEXT:mov v0.d[1], x8
+; CHECK-NEXT:ret
+  %tmp3 = call <2 x i64> @llvm.aarch64.neon.rbit.v2i64(<2 x i64> %A)
+  ret <2 x i64> %tmp3
+}
Index: llvm/test/CodeGen/AArch64/bitreverse.ll
===
--- llvm/test/CodeGen/AArch64/bitreverse.ll
+++ llvm/test/CodeGen/AArch64/bitreverse.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/

[PATCH] D102397: [AArch64] Lower bitreverse in ISel

2021-05-17 Thread Irina Dobrescu via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rG50511df32edf: [AArch64] Lower bitreverse in ISel (authored 
by Rin).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D102397/new/

https://reviews.llvm.org/D102397

Files:
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/test/CodeGen/aarch64-neon-misc.c
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/IR/AutoUpgrade.cpp
  llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
  llvm/lib/Target/AArch64/AArch64InstrInfo.td
  llvm/test/CodeGen/AArch64/arm64-vbitwise.ll
  llvm/test/CodeGen/AArch64/bitreverse.ll
  llvm/test/CodeGen/AArch64/neon_rbit.ll

Index: llvm/test/CodeGen/AArch64/neon_rbit.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/neon_rbit.ll
@@ -0,0 +1,121 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-eabi -mattr=+fullfp16 %s -o - | FileCheck %s
+
+; The llvm.aarch64_neon_rbit intrinsic should be auto-upgraded to the
+; target-independent bitreverse intrinsic.
+
+declare <8 x i8> @llvm.aarch64.neon.rbit.v8i8(<8 x i8>) nounwind readnone
+
+define <8 x i8> @rbit_8x8(<8 x i8> %A) nounwind {
+; CHECK-LABEL: rbit_8x8:
+; CHECK:   // %bb.0:
+; CHECK-NEXT:rbit v0.8b, v0.8b
+; CHECK-NEXT:ret
+%tmp3 = call <8 x i8> @llvm.aarch64.neon.rbit.v8i8(<8 x i8> %A)
+	ret <8 x i8> %tmp3
+}
+
+declare <16 x i8> @llvm.aarch64.neon.rbit.v16i8(<16 x i8>) nounwind readnone
+
+define <16 x i8> @rbit_16x8(<16 x i8> %A) nounwind {
+; CHECK-LABEL: rbit_16x8:
+; CHECK:   // %bb.0:
+; CHECK-NEXT:rbit v0.16b, v0.16b
+; CHECK-NEXT:ret
+%tmp3 = call <16 x i8> @llvm.aarch64.neon.rbit.v16i8(<16 x i8> %A)
+	ret <16 x i8> %tmp3
+}
+
+declare <4 x i16> @llvm.aarch64.neon.rbit.v4i16(<4 x i16>) nounwind readnone
+
+define <4 x i16> @rbit_4x16(<4 x i16> %A) nounwind {
+; CHECK-LABEL: rbit_4x16:
+; CHECK:   // %bb.0:
+; CHECK-NEXT:rev16 v0.8b, v0.8b
+; CHECK-NEXT:rbit v0.8b, v0.8b
+; CHECK-NEXT:ret
+%tmp3 = call <4 x i16> @llvm.aarch64.neon.rbit.v4i16(<4 x i16> %A)
+	ret <4 x i16> %tmp3
+}
+
+declare <8 x i16> @llvm.aarch64.neon.rbit.v8i16(<8 x i16>) nounwind readnone
+
+define <8 x i16> @rbit_8x16(<8 x i16> %A) {
+; CHECK-LABEL: rbit_8x16:
+; CHECK:   // %bb.0:
+; CHECK-NEXT:rev16 v0.16b, v0.16b
+; CHECK-NEXT:rbit v0.16b, v0.16b
+; CHECK-NEXT:ret
+  %tmp3 = call <8 x i16> @llvm.aarch64.neon.rbit.v8i16(<8 x i16> %A)
+  ret <8 x i16> %tmp3
+}
+
+declare <2 x i32> @llvm.aarch64.neon.rbit.v2i32(<2 x i32>) nounwind readnone
+
+define <2 x i32> @rbit_2x32(<2 x i32> %A) {
+; CHECK-LABEL: rbit_2x32:
+; CHECK:   // %bb.0:
+; CHECK-NEXT:// kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:fmov w8, s0
+; CHECK-NEXT:rbit w8, w8
+; CHECK-NEXT:mov w9, v0.s[1]
+; CHECK-NEXT:fmov s0, w8
+; CHECK-NEXT:rbit w8, w9
+; CHECK-NEXT:mov v0.s[1], w8
+; CHECK-NEXT:// kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:ret
+  %tmp3 = call <2 x i32> @llvm.aarch64.neon.rbit.v2i32(<2 x i32> %A)
+  ret <2 x i32> %tmp3
+}
+
+declare <4 x i32> @llvm.aarch64.neon.rbit.v4i32(<4 x i32>) nounwind readnone
+
+define <4 x i32> @rbit_4x32(<4 x i32> %A) {
+; CHECK-LABEL: rbit_4x32:
+; CHECK:   // %bb.0:
+; CHECK-NEXT:fmov w10, s0
+; CHECK-NEXT:mov w8, v0.s[1]
+; CHECK-NEXT:rbit w10, w10
+; CHECK-NEXT:mov w9, v0.s[2]
+; CHECK-NEXT:mov w11, v0.s[3]
+; CHECK-NEXT:fmov s0, w10
+; CHECK-NEXT:rbit w8, w8
+; CHECK-NEXT:rbit w9, w9
+; CHECK-NEXT:mov v0.s[1], w8
+; CHECK-NEXT:mov v0.s[2], w9
+; CHECK-NEXT:rbit w8, w11
+; CHECK-NEXT:mov v0.s[3], w8
+; CHECK-NEXT:ret
+  %tmp3 = call <4 x i32> @llvm.aarch64.neon.rbit.v4i32(<4 x i32> %A)
+  ret <4 x i32> %tmp3
+}
+
+declare <1 x i64> @llvm.aarch64.neon.rbit.v1i64(<1 x i64>) readnone
+
+define <1 x i64> @rbit_1x64(<1 x i64> %A) {
+; CHECK-LABEL: rbit_1x64:
+; CHECK:   // %bb.0:
+; CHECK-NEXT:// kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:fmov x8, d0
+; CHECK-NEXT:rbit x8, x8
+; CHECK-NEXT:fmov d0, x8
+; CHECK-NEXT:ret
+  %tmp3 = call <1 x i64> @llvm.aarch64.neon.rbit.v1i64(<1 x i64> %A)
+  ret <1 x i64> %tmp3
+}
+
+declare <2 x i64> @llvm.aarch64.neon.rbit.v2i64(<2 x i64>) readnone
+
+define <2 x i64> @rbit_2x64(<2 x i64> %A) {
+; CHECK-LABEL: rbit_2x64:
+; CHECK:   // %bb.0:
+; CHECK-NEXT:fmov x8, d0
+; CHECK-NEXT:rbit x8, x8
+; CHECK-NEXT:mov x9, v0.d[1]
+; CHECK-NEXT:fmov d0, x8
+; CHECK-NEXT:rbit x8, x9
+; CHECK-NEXT:mov v0.d[1], x8
+; CHECK-NEXT:ret
+  %tmp3 = call <2 x i64> @llvm.aarch64.neon.rbit.v2i64(<2 x i64> %A)
+  ret <2 x i64> %tmp3
+}
Index: llvm/test/CodeGen/AArch64/bitreverse.ll
===
--- llvm/test/CodeGen/AArch64/bitreverse.ll
+++ llvm/test/CodeGen/AArch64/bitreverse.ll
@@ -1,3 +1,4 @@
+; NOT