kmclaughlin created this revision.
kmclaughlin added reviewers: sdesmalen, efriedma, david-arm.
Herald added subscribers: psnobl, rkruppe, hiraditya, tschuett.
Herald added a project: LLVM.

This patch adds legalisation of extensions where the operand
of the extend is a legal scalable type but the result is not.

In these cases we can try to use the [S|U]UNPK[HI|LO] operations
to extend each half individually and concatenate the result.

For example:

  zext <vscale x 16 x i8> %a to <vscale x 16 x i16>

should emit:

  uunpklo z2.h, z0.b
  uunpkhi z1.h, z0.b

Patch by Richard Sandiford


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D79587

Files:
  llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
  llvm/lib/Target/AArch64/AArch64ISelLowering.h
  llvm/test/CodeGen/AArch64/llvm-ir-to-intrinsic.ll
  llvm/test/CodeGen/AArch64/sve-arith.ll
  llvm/test/CodeGen/AArch64/sve-ext.ll

Index: llvm/test/CodeGen/AArch64/sve-ext.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve-ext.ll
@@ -0,0 +1,127 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+
+;
+; SEXT
+;
+
+define <vscale x 16 x i16> @sext_b_to_h(<vscale x 16 x i8> %a) {
+; CHECK-LABEL: sext_b_to_h:
+; CHECK-DAG: sunpklo z2.h, z0.b
+; CHECK-DAG: sunpkhi z1.h, z0.b
+; CHECK-DAG: mov z0.d, z2.d
+; CHECK-NEXT: ret
+  %ext = sext <vscale x 16 x i8> %a to <vscale x 16 x i16>
+  ret <vscale x 16 x i16> %ext
+}
+
+define <vscale x 8 x i32> @sext_h_to_s(<vscale x 8 x i16> %a) {
+; CHECK-LABEL: sext_h_to_s:
+; CHECK-DAG: sunpklo z2.s, z0.h
+; CHECK-DAG: sunpkhi z1.s, z0.h
+; CHECK-DAG: mov z0.d, z2.d
+; CHECK-NEXT: ret
+  %ext = sext <vscale x 8 x i16> %a to <vscale x 8 x i32>
+  ret <vscale x 8 x i32> %ext
+}
+
+define <vscale x 4 x i64> @sext_s_to_d(<vscale x 4 x i32> %a) {
+; CHECK-LABEL: sext_s_to_d:
+; CHECK-DAG: sunpklo z2.d, z0.s
+; CHECK-DAG: sunpkhi z1.d, z0.s
+; CHECK-DAG: mov z0.d, z2.d
+; CHECK-NEXT: ret
+  %ext = sext <vscale x 4 x i32> %a to <vscale x 4 x i64>
+  ret <vscale x 4 x i64> %ext
+}
+
+define <vscale x 16 x i32> @sext_b_to_s(<vscale x 16 x i8> %a) {
+; CHECK-LABEL: sext_b_to_s:
+; CHECK-DAG: sunpklo [[LO:z[0-9]+]].h, z0.b
+; CHECK-DAG: sunpkhi [[HI:z[0-9]+]].h, z0.b
+; CHECK-DAG: sunpklo [[LOLO:z[0-9]+]].s, [[LO]].h
+; CHECK-DAG: sunpkhi {{z[0-9]+}}.s, [[LO]].h
+; CHECK-DAG: sunpklo {{z[0-9]+}}.s, [[HI]].h
+; CHECK-DAG: sunpkhi {{z[0-9]+}}.s, [[HI]].h
+; CHECK: ret
+  %ext = sext <vscale x 16 x i8> %a to <vscale x 16 x i32>
+  ret <vscale x 16 x i32> %ext
+}
+
+define <vscale x 4 x i16> @sext_promote_b_to_s(<vscale x 4 x i8> %in) {
+; CHECK-LABEL: @sext_promote
+; CHECK-DAG: ptrue p0.s
+; CHECK-DAG: sxtb z0.s, p0/m, z0.s
+; CHECK-NEXT: ret
+  %out = sext <vscale x 4 x i8> %in to <vscale x 4 x i16>
+  ret <vscale x 4 x i16> %out
+}
+
+define <vscale x 2 x i32> @sext_promote_h_to_d(<vscale x 2 x i16> %in) {
+; CHECK-LABEL: @sext_promote_h_to_d
+; CHECK-DAG: ptrue p0.d
+; CHECK-DAG: sxth z0.d, p0/m, z0.d
+; CHECK-NEXT: ret
+  %out = sext <vscale x 2 x i16> %in to <vscale x 2 x i32>
+  ret <vscale x 2 x i32> %out
+}
+
+; ZEXT
+
+define <vscale x 16 x i16> @zext_b_to_h(<vscale x 16 x i8> %a) {
+; CHECK-LABEL: zext_b_to_h:
+; CHECK-DAG: uunpklo z2.h, z0.b
+; CHECK-DAG: uunpkhi z1.h, z0.b
+; CHECK-DAG: mov z0.d, z2.d
+; CHECK-NEXT: ret
+  %ext = zext <vscale x 16 x i8> %a to <vscale x 16 x i16>
+  ret <vscale x 16 x i16> %ext
+}
+
+define <vscale x 8 x i32> @zext_h_to_s(<vscale x 8 x i16> %a) {
+; CHECK-LABEL: zext_h_to_s:
+; CHECK-DAG: uunpklo z2.s, z0.h
+; CHECK-DAG: uunpkhi z1.s, z0.h
+; CHECK-DAG: mov z0.d, z2.d
+; CHECK-NEXT: ret
+  %ext = zext <vscale x 8 x i16> %a to <vscale x 8 x i32>
+  ret <vscale x 8 x i32> %ext
+}
+
+define <vscale x 4 x i64> @zext_s_to_d(<vscale x 4 x i32> %a) {
+; CHECK-LABEL: zext_s_to_d:
+; CHECK-DAG: uunpklo z2.d, z0.s
+; CHECK-DAG: uunpkhi z1.d, z0.s
+; CHECK-DAG: mov z0.d, z2.d
+; CHECK-NEXT: ret
+  %ext = zext <vscale x 4 x i32> %a to <vscale x 4 x i64>
+  ret <vscale x 4 x i64> %ext
+}
+
+define <vscale x 16 x i32> @zext_b_to_s(<vscale x 16 x i8> %a) {
+; CHECK-LABEL: zext_b_to_s:
+; CHECK-DAG: uunpklo [[LO:z[0-9]+]].h, z0.b
+; CHECK-DAG: uunpkhi [[HI:z[0-9]+]].h, z0.b
+; CHECK-DAG: uunpklo z0.s, [[LO]].h
+; CHECK-DAG: uunpkhi z1.s, [[LO]].h
+; CHECK-DAG: uunpklo z2.s, [[HI]].h
+; CHECK-DAG: uunpkhi z3.s, [[HI]].h
+; CHECK: ret
+  %ext = zext <vscale x 16 x i8> %a to <vscale x 16 x i32>
+  ret <vscale x 16 x i32> %ext
+}
+
+define <vscale x 4 x i16> @zext_promote_b_to_s(<vscale x 4 x i8> %in) {
+; CHECK-LABEL: @zext_promote
+; CHECK-DAG: and z0.s, z0.s, #0xff
+; CHECK-NEXT: ret
+  %out = zext <vscale x 4 x i8> %in to <vscale x 4 x i16>
+  ret <vscale x 4 x i16> %out
+}
+
+define <vscale x 2 x i32> @zext_promote_h_to_d(<vscale x 2 x i16> %in) {
+; CHECK-LABEL: @zext_promote_h_to_d
+; CHECK-DAG: and z0.d, z0.d, #0xffff
+; CHECK-NEXT: ret
+  %out = zext <vscale x 2 x i16> %in to <vscale x 2 x i32>
+  ret <vscale x 2 x i32> %out
+}
Index: llvm/test/CodeGen/AArch64/sve-arith.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve-arith.ll
@@ -0,0 +1,608 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+
+;
+; SDIV
+;
+
+define <vscale x 4 x i32> @sdiv_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: @sdiv_i32
+; CHECK-DAG: ptrue p0.s
+; CHECK-DAG: sdiv z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+  %div = sdiv <vscale x 4 x i32> %a, %b
+  ret <vscale x 4 x i32> %div
+}
+
+define <vscale x 2 x i64> @sdiv_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: @sdiv_i64
+; CHECK-DAG: ptrue p0.d
+; CHECK-DAG: sdiv z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+  %div = sdiv <vscale x 2 x i64> %a, %b
+  ret <vscale x 2 x i64> %div
+}
+
+define <vscale x 8 x i32> @sdiv_split_i32(<vscale x 8 x i32> %a, <vscale x 8 x i32> %b) {
+; CHECK-LABEL: @sdiv_split_i32
+; CHECK-DAG: ptrue p0.s
+; CHECK-DAG: sdiv z0.s, p0/m, z0.s, z2.s
+; CHECK-DAG: sdiv z1.s, p0/m, z1.s, z3.s
+; CHECK-NEXT: ret
+  %div = sdiv <vscale x 8 x i32> %a, %b
+  ret <vscale x 8 x i32> %div
+}
+
+define <vscale x 2 x i32> @sdiv_promote_i32(<vscale x 2 x i32> %a, <vscale x 2 x i32> %b) {
+; CHECK-LABEL: @sdiv_promote_i32
+; CHECK-DAG: ptrue p0.d
+; CHECK-DAG: sxtw z1.d, p0/m, z1.d
+; CHECK-DAG: sxtw z0.d, p0/m, z0.d
+; CHECK-DAG: sdiv z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+  %div = sdiv <vscale x 2 x i32> %a, %b
+  ret <vscale x 2 x i32> %div
+}
+
+define <vscale x 4 x i64> @sdiv_split_i64(<vscale x 4 x i64> %a, <vscale x 4 x i64> %b) {
+; CHECK-LABEL: @sdiv_split_i64
+; CHECK-DAG: ptrue p0.d
+; CHECK-DAG: sdiv z0.d, p0/m, z0.d, z2.d
+; CHECK-DAG: sdiv z1.d, p0/m, z1.d, z3.d
+; CHECK-NEXT: ret
+  %div = sdiv <vscale x 4 x i64> %a, %b
+  ret <vscale x 4 x i64> %div
+}
+
+;
+; UDIV
+;
+
+define <vscale x 4 x i32> @udiv_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: @udiv_i32
+; CHECK-DAG: ptrue p0.s
+; CHECK-DAG: udiv z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+  %div = udiv <vscale x 4 x i32> %a, %b
+  ret <vscale x 4 x i32> %div
+}
+
+define <vscale x 2 x i64> @udiv_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: @udiv_i64
+; CHECK-DAG: ptrue p0.d
+; CHECK-DAG: udiv z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+  %div = udiv <vscale x 2 x i64> %a, %b
+  ret <vscale x 2 x i64> %div
+}
+
+define <vscale x 8 x i32> @udiv_split_i32(<vscale x 8 x i32> %a, <vscale x 8 x i32> %b) {
+; CHECK-LABEL: @udiv_split_i32
+; CHECK-DAG: ptrue p0.s
+; CHECK-DAG: udiv z0.s, p0/m, z0.s, z2.s
+; CHECK-DAG: udiv z1.s, p0/m, z1.s, z3.s
+; CHECK-NEXT: ret
+  %div = udiv <vscale x 8 x i32> %a, %b
+  ret <vscale x 8 x i32> %div
+}
+
+define <vscale x 2 x i32> @udiv_promote_i32(<vscale x 2 x i32> %a, <vscale x 2 x i32> %b) {
+; CHECK-LABEL: @udiv_promote_i32
+; CHECK-DAG: ptrue p0.d
+; CHECK-DAG: and z1.d, z1.d, #0xffffffff
+; CHECK-DAG: and z0.d, z0.d, #0xffffffff
+; CHECK-DAG: udiv z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+  %div = udiv <vscale x 2 x i32> %a, %b
+  ret <vscale x 2 x i32> %div
+}
+
+define <vscale x 4 x i64> @udiv_split_i64(<vscale x 4 x i64> %a, <vscale x 4 x i64> %b) {
+; CHECK-LABEL: @udiv_split_i64
+; CHECK-DAG: ptrue p0.d
+; CHECK-DAG: udiv z0.d, p0/m, z0.d, z2.d
+; CHECK-DAG: udiv z1.d, p0/m, z1.d, z3.d
+; CHECK-NEXT: ret
+  %div = udiv <vscale x 4 x i64> %a, %b
+  ret <vscale x 4 x i64> %div
+}
+
+;
+; SMIN
+;
+
+define <vscale x 16 x i8> @smin_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) {
+; CHECK-LABEL: @smin_i8
+; CHECK-DAG: ptrue p0.b
+; CHECK-DAG: smin z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT: ret
+  %cmp = icmp slt <vscale x 16 x i8> %a, %b
+  %min = select <vscale x 16 x i1> %cmp, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b
+  ret <vscale x 16 x i8> %min
+}
+
+define <vscale x 8 x i16> @smin_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) {
+; CHECK-LABEL: @smin_i16
+; CHECK-DAG: ptrue p0.h
+; CHECK-DAG: smin z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+  %cmp = icmp slt <vscale x 8 x i16> %a, %b
+  %min = select <vscale x 8 x i1> %cmp, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b
+  ret <vscale x 8 x i16> %min
+}
+
+define <vscale x 4 x i32> @smin_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) {
+; CHECK-LABEL: smin_i32:
+; CHECK-DAG: ptrue p0.s
+; CHECK-DAG: smin z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+  %cmp = icmp slt <vscale x 4 x i32> %a, %b
+  %min = select <vscale x 4 x i1> %cmp, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b
+  ret <vscale x 4 x i32> %min
+}
+
+define <vscale x 2 x i64> @smin_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) {
+; CHECK-LABEL: smin_i64:
+; CHECK-DAG: ptrue p0.d
+; CHECK-DAG: smin z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+  %cmp = icmp slt <vscale x 2 x i64> %a, %b
+  %min = select <vscale x 2 x i1> %cmp, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b
+  ret <vscale x 2 x i64> %min
+}
+
+define <vscale x 32 x i8> @smin_split_i8(<vscale x 32 x i8> %a, <vscale x 32 x i8> %b, <vscale x 32 x i8> %c) {
+; CHECK-LABEL: @smin_split_i8
+; CHECK-DAG: ptrue p0.b
+; CHECK-DAG: smin z0.b, p0/m, z0.b, z2.b
+; CHECK-DAG: smin z1.b, p0/m, z1.b, z3.b
+; CHECK-NEXT: ret
+  %cmp = icmp slt <vscale x 32 x i8> %a, %b
+  %min = select <vscale x 32 x i1> %cmp, <vscale x 32 x i8> %a, <vscale x 32 x i8> %b
+  ret <vscale x 32 x i8> %min
+}
+
+define <vscale x 32 x i16> @smin_split_i16(<vscale x 32 x i16> %a, <vscale x 32 x i16> %b, <vscale x 32 x i16> %c) {
+; CHECK-LABEL: smin_split_i16:
+; CHECK-DAG: ptrue p0.h
+; CHECK-DAG: smin z0.h, p0/m, z0.h, z4.h
+; CHECK-DAG: smin z1.h, p0/m, z1.h, z5.h
+; CHECK-DAG: smin z2.h, p0/m, z2.h, z6.h
+; CHECK-DAG: smin z3.h, p0/m, z3.h, z7.h
+; CHECK-NEXT: ret
+  %cmp = icmp slt <vscale x 32 x i16> %a, %b
+  %min = select <vscale x 32 x i1> %cmp, <vscale x 32 x i16> %a, <vscale x 32 x i16> %b
+  ret <vscale x 32 x i16> %min
+}
+
+define <vscale x 8 x i32> @smin_split_i32(<vscale x 8 x i32> %a, <vscale x 8 x i32> %b, <vscale x 8 x i32> %c) {
+; CHECK-LABEL: smin_split_i32:
+; CHECK-DAG: ptrue p0.s
+; CHECK-DAG: smin z0.s, p0/m, z0.s, z2.s
+; CHECK-DAG: smin z1.s, p0/m, z1.s, z3.s
+; CHECK-NEXT: ret
+  %cmp = icmp slt <vscale x 8 x i32> %a, %b
+  %min = select <vscale x 8 x i1> %cmp, <vscale x 8 x i32> %a, <vscale x 8 x i32> %b
+  ret <vscale x 8 x i32> %min
+}
+
+define <vscale x 4 x i64> @smin_split_i64(<vscale x 4 x i64> %a, <vscale x 4 x i64> %b, <vscale x 4 x i64> %c) {
+; CHECK-LABEL: smin_split_i64:
+; CHECK-DAG: ptrue p0.d
+; CHECK-DAG: smin z0.d, p0/m, z0.d, z2.d
+; CHECK-DAG: smin z1.d, p0/m, z1.d, z3.d
+; CHECK-NEXT: ret
+  %cmp = icmp slt <vscale x 4 x i64> %a, %b
+  %min = select <vscale x 4 x i1> %cmp, <vscale x 4 x i64> %a, <vscale x 4 x i64> %b
+  ret <vscale x 4 x i64> %min
+}
+
+define <vscale x 8 x i8> @smin_promote_i8(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b, <vscale x 8 x i8> %c) {
+; CHECK-LABEL: @smin_promote_i8
+; CHECK-DAG: ptrue p0.h
+; CHECK-DAG: sxtb z1.h, p0/m, z1.h
+; CHECK-DAG: sxtb z0.h, p0/m, z0.h
+; CHECK-DAG: smin z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+  %cmp = icmp slt <vscale x 8 x i8> %a, %b
+  %min = select <vscale x 8 x i1> %cmp, <vscale x 8 x i8> %a, <vscale x 8 x i8> %b
+  ret <vscale x 8 x i8> %min
+}
+
+define <vscale x 4 x i16> @smin_promote_i16(<vscale x 4 x i16> %a, <vscale x 4 x i16> %b, <vscale x 4 x i16> %c) {
+; CHECK-LABEL: @smin_promote_i16
+; CHECK-DAG: ptrue p0.s
+; CHECK-DAG: sxth z1.s, p0/m, z1.s
+; CHECK-DAG: sxth z0.s, p0/m, z0.s
+; CHECK-DAG: smin z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+  %cmp = icmp slt <vscale x 4 x i16> %a, %b
+  %min = select <vscale x 4 x i1> %cmp, <vscale x 4 x i16> %a, <vscale x 4 x i16> %b
+  ret <vscale x 4 x i16> %min
+}
+
+define <vscale x 2 x i32> @smin_promote_i32(<vscale x 2 x i32> %a, <vscale x 2 x i32> %b, <vscale x 2 x i32> %c) {
+; CHECK-LABEL: @smin_promote_i32
+; CHECK-DAG: ptrue p0.d
+; CHECK-DAG: sxtw z1.d, p0/m, z1.d
+; CHECK-DAG: sxtw z0.d, p0/m, z0.d
+; CHECK-DAG: smin z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+  %cmp = icmp slt <vscale x 2 x i32> %a, %b
+  %min = select <vscale x 2 x i1> %cmp, <vscale x 2 x i32> %a, <vscale x 2 x i32> %b
+  ret <vscale x 2 x i32> %min
+}
+
+;
+; UMIN
+;
+
+define <vscale x 16 x i8> @umin_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) {
+; CHECK-LABEL: @umin_i8
+; CHECK-DAG: ptrue p0.b
+; CHECK-DAG: umin z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT: ret
+  %cmp = icmp ult <vscale x 16 x i8> %a, %b
+  %min = select <vscale x 16 x i1> %cmp, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b
+  ret <vscale x 16 x i8> %min
+}
+
+define <vscale x 8 x i16> @umin_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) {
+; CHECK-LABEL: @umin_i16
+; CHECK-DAG: ptrue p0.h
+; CHECK-DAG: umin z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+  %cmp = icmp ult <vscale x 8 x i16> %a, %b
+  %min = select <vscale x 8 x i1> %cmp, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b
+  ret <vscale x 8 x i16> %min
+}
+
+define <vscale x 4 x i32> @umin_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) {
+; CHECK-LABEL: umin_i32:
+; CHECK-DAG: ptrue p0.s
+; CHECK-DAG: umin z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+  %cmp = icmp ult <vscale x 4 x i32> %a, %b
+  %min = select <vscale x 4 x i1> %cmp, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b
+  ret <vscale x 4 x i32> %min
+}
+
+define <vscale x 2 x i64> @umin_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) {
+; CHECK-LABEL: umin_i64:
+; CHECK-DAG: ptrue p0.d
+; CHECK-DAG: umin z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+  %cmp = icmp ult <vscale x 2 x i64> %a, %b
+  %min = select <vscale x 2 x i1> %cmp, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b
+  ret <vscale x 2 x i64> %min
+}
+
+define <vscale x 4 x i64> @umin_split_i64(<vscale x 4 x i64> %a, <vscale x 4 x i64> %b, <vscale x 4 x i64> %c) {
+; CHECK-LABEL: umin_split_i64:
+; CHECK-DAG: ptrue p0.d
+; CHECK-DAG: umin z0.d, p0/m, z0.d, z2.d
+; CHECK-DAG: umin z1.d, p0/m, z1.d, z3.d
+; CHECK-NEXT: ret
+  %cmp = icmp ult <vscale x 4 x i64> %a, %b
+  %min = select <vscale x 4 x i1> %cmp, <vscale x 4 x i64> %a, <vscale x 4 x i64> %b
+  ret <vscale x 4 x i64> %min
+}
+
+define <vscale x 8 x i8> @umin_promote_i8(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b, <vscale x 8 x i8> %c) {
+; CHECK-LABEL: @umin_promote_i8
+; CHECK-DAG: ptrue p0.h
+; CHECK-DAG: umin z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+  %cmp = icmp ult <vscale x 8 x i8> %a, %b
+  %min = select <vscale x 8 x i1> %cmp, <vscale x 8 x i8> %a, <vscale x 8 x i8> %b
+  ret <vscale x 8 x i8> %min
+}
+
+;
+; SMAX
+;
+
+define <vscale x 16 x i8> @smax_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) {
+; CHECK-LABEL: @smax_i8
+; CHECK-DAG: ptrue p0.b
+; CHECK-DAG: smax z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT: ret
+  %cmp = icmp sgt <vscale x 16 x i8> %a, %b
+  %max = select <vscale x 16 x i1> %cmp, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b
+  ret <vscale x 16 x i8> %max
+}
+
+define <vscale x 8 x i16> @smax_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) {
+; CHECK-LABEL: @smax_i16
+; CHECK-DAG: ptrue p0.h
+; CHECK-DAG: smax z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+  %cmp = icmp sgt <vscale x 8 x i16> %a, %b
+  %max = select <vscale x 8 x i1> %cmp, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b
+  ret <vscale x 8 x i16> %max
+}
+
+define <vscale x 4 x i32> @smax_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) {
+; CHECK-LABEL: smax_i32:
+; CHECK-DAG: ptrue p0.s
+; CHECK-DAG: smax z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+  %cmp = icmp sgt <vscale x 4 x i32> %a, %b
+  %max = select <vscale x 4 x i1> %cmp, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b
+  ret <vscale x 4 x i32> %max
+}
+
+define <vscale x 2 x i64> @smax_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) {
+; CHECK-LABEL: smax_i64:
+; CHECK-DAG: ptrue p0.d
+; CHECK-DAG: smax z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+  %cmp = icmp sgt <vscale x 2 x i64> %a, %b
+  %max = select <vscale x 2 x i1> %cmp, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b
+  ret <vscale x 2 x i64> %max
+}
+
+define <vscale x 8 x i32> @smax_split_i32(<vscale x 8 x i32> %a, <vscale x 8 x i32> %b, <vscale x 8 x i32> %c) {
+; CHECK-LABEL: smax_split_i32:
+; CHECK-DAG: ptrue p0.s
+; CHECK-DAG: smax z0.s, p0/m, z0.s, z2.s
+; CHECK-DAG: smax z1.s, p0/m, z1.s, z3.s
+; CHECK-NEXT: ret
+  %cmp = icmp sgt <vscale x 8 x i32> %a, %b
+  %max = select <vscale x 8 x i1> %cmp, <vscale x 8 x i32> %a, <vscale x 8 x i32> %b
+  ret <vscale x 8 x i32> %max
+}
+
+define <vscale x 4 x i16> @smax_promote_i16(<vscale x 4 x i16> %a, <vscale x 4 x i16> %b, <vscale x 4 x i16> %c) {
+; CHECK-LABEL: @smax_promote_i16
+; CHECK-DAG: ptrue p0.s
+; CHECK-DAG: smax z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+  %cmp = icmp sgt <vscale x 4 x i16> %a, %b
+  %max = select <vscale x 4 x i1> %cmp, <vscale x 4 x i16> %a, <vscale x 4 x i16> %b
+  ret <vscale x 4 x i16> %max
+}
+
+;
+; UMAX
+;
+
+define <vscale x 16 x i8> @umax_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) {
+; CHECK-LABEL: @umax_i8
+; CHECK-DAG: ptrue p0.b
+; CHECK-DAG: umax z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT: ret
+  %cmp = icmp ugt <vscale x 16 x i8> %a, %b
+  %max = select <vscale x 16 x i1> %cmp, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b
+  ret <vscale x 16 x i8> %max
+}
+
+define <vscale x 8 x i16> @umax_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) {
+; CHECK-LABEL: @umax_i16
+; CHECK-DAG: ptrue p0.h
+; CHECK-DAG: umax z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+  %cmp = icmp ugt <vscale x 8 x i16> %a, %b
+  %max = select <vscale x 8 x i1> %cmp, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b
+  ret <vscale x 8 x i16> %max
+}
+
+define <vscale x 4 x i32> @umax_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) {
+; CHECK-LABEL: umax_i32:
+; CHECK-DAG: ptrue p0.s
+; CHECK-DAG: umax z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+  %cmp = icmp ugt <vscale x 4 x i32> %a, %b
+  %max = select <vscale x 4 x i1> %cmp, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b
+  ret <vscale x 4 x i32> %max
+}
+
+define <vscale x 2 x i64> @umax_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) {
+; CHECK-LABEL: umax_i64:
+; CHECK-DAG: ptrue p0.d
+; CHECK-DAG: umax z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+  %cmp = icmp ugt <vscale x 2 x i64> %a, %b
+  %max = select <vscale x 2 x i1> %cmp, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b
+  ret <vscale x 2 x i64> %max
+}
+
+define <vscale x 16 x i16> @umax_split_i16(<vscale x 16 x i16> %a, <vscale x 16 x i16> %b, <vscale x 16 x i16> %c) {
+; CHECK-LABEL: umax_split_i16:
+; CHECK-DAG: ptrue p0.h
+; CHECK-DAG: umax z0.h, p0/m, z0.h, z2.h
+; CHECK-DAG: umax z1.h, p0/m, z1.h, z3.h
+; CHECK-NEXT: ret
+  %cmp = icmp ugt <vscale x 16 x i16> %a, %b
+  %max = select <vscale x 16 x i1> %cmp, <vscale x 16 x i16> %a, <vscale x 16 x i16> %b
+  ret <vscale x 16 x i16> %max
+}
+
+define <vscale x 2 x i32> @umax_promote_i32(<vscale x 2 x i32> %a, <vscale x 2 x i32> %b, <vscale x 2 x i32> %c) {
+; CHECK-LABEL: @umax_promote_i32
+; CHECK-DAG: ptrue p0.d
+; CHECK-DAG: umax z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+  %cmp = icmp ugt <vscale x 2 x i32> %a, %b
+  %max = select <vscale x 2 x i1> %cmp, <vscale x 2 x i32> %a, <vscale x 2 x i32> %b
+  ret <vscale x 2 x i32> %max
+}
+
+;
+; ASR
+;
+
+define <vscale x 16 x i8> @asr_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b){
+; CHECK-LABEL: @asr_i8
+; CHECK-DAG: ptrue p0.b
+; CHECK-DAG: asr z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT: ret
+  %shr = ashr <vscale x 16 x i8> %a, %b
+  ret <vscale x 16 x i8> %shr
+}
+
+define <vscale x 8 x i16> @asr_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b){
+; CHECK-LABEL: @asr_i16
+; CHECK-DAG: ptrue p0.h
+; CHECK-DAG: asr z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+  %shr = ashr <vscale x 8 x i16> %a, %b
+  ret <vscale x 8 x i16> %shr
+}
+
+define <vscale x 4 x i32> @asr_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b){
+; CHECK-LABEL: @asr_i32
+; CHECK-DAG: ptrue p0.s
+; CHECK-DAG: asr z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+  %shr = ashr <vscale x 4 x i32> %a, %b
+  ret <vscale x 4 x i32> %shr
+}
+
+define <vscale x 2 x i64> @asr_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b){
+; CHECK-LABEL: @asr_i64
+; CHECK-DAG: ptrue p0.d
+; CHECK-DAG: asr z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+  %shr = ashr <vscale x 2 x i64> %a, %b
+  ret <vscale x 2 x i64> %shr
+}
+
+define <vscale x 16 x i16> @asr_split_i16(<vscale x 16 x i16> %a, <vscale x 16 x i16> %b){
+; CHECK-LABEL: @asr_split_i16
+; CHECK-DAG: ptrue p0.h
+; CHECK-DAG: asr z0.h, p0/m, z0.h, z2.h
+; CHECK-DAG: asr z1.h, p0/m, z1.h, z3.h
+; CHECK-NEXT: ret
+  %shr = ashr <vscale x 16 x i16> %a, %b
+  ret <vscale x 16 x i16> %shr
+}
+
+define <vscale x 2 x i32> @asr_promote_i32(<vscale x 2 x i32> %a, <vscale x 2 x i32> %b){
+; CHECK-LABEL: @asr_promote_i32
+; CHECK-DAG: ptrue p0.d
+; CHECK-DAG: and z1.d, z1.d, #0xffffffff
+; CHECK-DAG: asr z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+  %shr = ashr <vscale x 2 x i32> %a, %b
+  ret <vscale x 2 x i32> %shr
+}
+
+;
+; LSL
+;
+
+define <vscale x 16 x i8> @lsl_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b){
+; CHECK-LABEL: @lsl_i8
+; CHECK-DAG: ptrue p0.b
+; CHECK-DAG: lsl z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT: ret
+  %shl = shl <vscale x 16 x i8> %a, %b
+  ret <vscale x 16 x i8> %shl
+}
+
+define <vscale x 8 x i16> @lsl_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b){
+; CHECK-LABEL: @lsl_i16
+; CHECK-DAG: ptrue p0.h
+; CHECK-DAG: lsl z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+  %shl = shl <vscale x 8 x i16> %a, %b
+  ret <vscale x 8 x i16> %shl
+}
+
+define <vscale x 4 x i32> @lsl_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b){
+; CHECK-LABEL: @lsl_i32
+; CHECK-DAG: ptrue p0.s
+; CHECK-DAG: lsl z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+  %shl = shl <vscale x 4 x i32> %a, %b
+  ret <vscale x 4 x i32> %shl
+}
+
+define <vscale x 2 x i64> @lsl_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b){
+; CHECK-LABEL: @lsl_i64
+; CHECK-DAG: ptrue p0.d
+; CHECK-DAG: lsl z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+  %shl = shl <vscale x 2 x i64> %a, %b
+  ret <vscale x 2 x i64> %shl
+}
+
+define <vscale x 4 x i64> @lsl_split_i64(<vscale x 4 x i64> %a, <vscale x 4 x i64> %b){
+; CHECK-LABEL: @lsl_split_i64
+; CHECK-DAG: ptrue p0.d
+; CHECK-DAG: lsl z0.d, p0/m, z0.d, z2.d
+; CHECK-DAG: lsl z1.d, p0/m, z1.d, z3.d
+; CHECK-NEXT: ret
+  %shl = shl <vscale x 4 x i64> %a, %b
+  ret <vscale x 4 x i64> %shl
+}
+
+define <vscale x 4 x i16> @lsl_promote_i16(<vscale x 4 x i16> %a, <vscale x 4 x i16> %b){
+; CHECK-LABEL: @lsl_promote_i16
+; CHECK-DAG: ptrue p0.s
+; CHECK-DAG: and z1.s, z1.s, #0xffff
+; CHECK-DAG: lsl z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+  %shl = shl <vscale x 4 x i16> %a, %b
+  ret <vscale x 4 x i16> %shl
+}
+
+;
+; LSR
+;
+
+define <vscale x 16 x i8> @lsr_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b){
+; CHECK-LABEL: @lsr_i8
+; CHECK-DAG: ptrue p0.b
+; CHECK-DAG: lsr z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT: ret
+  %shr = lshr <vscale x 16 x i8> %a, %b
+  ret <vscale x 16 x i8> %shr
+}
+
+define <vscale x 8 x i16> @lsr_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b){
+; CHECK-LABEL: @lsr_i16
+; CHECK-DAG: ptrue p0.h
+; CHECK-DAG: lsr z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+  %shr = lshr <vscale x 8 x i16> %a, %b
+  ret <vscale x 8 x i16> %shr
+}
+
+define <vscale x 4 x i32> @lsr_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b){
+; CHECK-LABEL: @lsr_i32
+; CHECK-DAG: ptrue p0.s
+; CHECK-DAG: lsr z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+  %shr = lshr <vscale x 4 x i32> %a, %b
+  ret <vscale x 4 x i32> %shr
+}
+
+define <vscale x 2 x i64> @lsr_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b){
+; CHECK-LABEL: @lsr_i64
+; CHECK-DAG: ptrue p0.d
+; CHECK-DAG: lsr z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+  %shr = lshr <vscale x 2 x i64> %a, %b
+  ret <vscale x 2 x i64> %shr
+}
+
+define <vscale x 8 x i8> @lsr_promote_i8(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b){
+; CHECK-LABEL: @lsr_promote_i8
+; CHECK-DAG: ptrue p0.h
+; CHECK-DAG: and z1.h, z1.h, #0xff
+; CHECK-DAG: lsr z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+  %shr = lshr <vscale x 8 x i8> %a, %b
+  ret <vscale x 8 x i8> %shr
+}
+
+define <vscale x 8 x i32> @lsr_split_i32(<vscale x 8 x i32> %a, <vscale x 8 x i32> %b){
+; CHECK-LABEL: @lsr_split_i32
+; CHECK-DAG: ptrue p0.s
+; CHECK-DAG: lsr z0.s, p0/m, z0.s, z2.s
+; CHECK-DAG: lsr z1.s, p0/m, z1.s, z3.s
+; CHECK-NEXT: ret
+  %shr = lshr <vscale x 8 x i32> %a, %b
+  ret <vscale x 8 x i32> %shr
+}
Index: llvm/test/CodeGen/AArch64/llvm-ir-to-intrinsic.ll
===================================================================
--- llvm/test/CodeGen/AArch64/llvm-ir-to-intrinsic.ll
+++ /dev/null
@@ -1,608 +0,0 @@
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
-
-;
-; SDIV
-;
-
-define <vscale x 4 x i32> @sdiv_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
-; CHECK-LABEL: @sdiv_i32
-; CHECK-DAG: ptrue p0.s
-; CHECK-DAG: sdiv z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: ret
-  %div = sdiv <vscale x 4 x i32> %a, %b
-  ret <vscale x 4 x i32> %div
-}
-
-define <vscale x 2 x i64> @sdiv_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
-; CHECK-LABEL: @sdiv_i64
-; CHECK-DAG: ptrue p0.d
-; CHECK-DAG: sdiv z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ret
-  %div = sdiv <vscale x 2 x i64> %a, %b
-  ret <vscale x 2 x i64> %div
-}
-
-define <vscale x 8 x i32> @sdiv_split_i32(<vscale x 8 x i32> %a, <vscale x 8 x i32> %b) {
-; CHECK-LABEL: @sdiv_split_i32
-; CHECK-DAG: ptrue p0.s
-; CHECK-DAG: sdiv z0.s, p0/m, z0.s, z2.s
-; CHECK-DAG: sdiv z1.s, p0/m, z1.s, z3.s
-; CHECK-NEXT: ret
-  %div = sdiv <vscale x 8 x i32> %a, %b
-  ret <vscale x 8 x i32> %div
-}
-
-define <vscale x 2 x i32> @sdiv_promote_i32(<vscale x 2 x i32> %a, <vscale x 2 x i32> %b) {
-; CHECK-LABEL: @sdiv_promote_i32
-; CHECK-DAG: ptrue p0.d
-; CHECK-DAG: sxtw z1.d, p0/m, z1.d
-; CHECK-DAG: sxtw z0.d, p0/m, z0.d
-; CHECK-DAG: sdiv z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ret
-  %div = sdiv <vscale x 2 x i32> %a, %b
-  ret <vscale x 2 x i32> %div
-}
-
-define <vscale x 4 x i64> @sdiv_split_i64(<vscale x 4 x i64> %a, <vscale x 4 x i64> %b) {
-; CHECK-LABEL: @sdiv_split_i64
-; CHECK-DAG: ptrue p0.d
-; CHECK-DAG: sdiv z0.d, p0/m, z0.d, z2.d
-; CHECK-DAG: sdiv z1.d, p0/m, z1.d, z3.d
-; CHECK-NEXT: ret
-  %div = sdiv <vscale x 4 x i64> %a, %b
-  ret <vscale x 4 x i64> %div
-}
-
-;
-; UDIV
-;
-
-define <vscale x 4 x i32> @udiv_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
-; CHECK-LABEL: @udiv_i32
-; CHECK-DAG: ptrue p0.s
-; CHECK-DAG: udiv z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: ret
-  %div = udiv <vscale x 4 x i32> %a, %b
-  ret <vscale x 4 x i32> %div
-}
-
-define <vscale x 2 x i64> @udiv_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
-; CHECK-LABEL: @udiv_i64
-; CHECK-DAG: ptrue p0.d
-; CHECK-DAG: udiv z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ret
-  %div = udiv <vscale x 2 x i64> %a, %b
-  ret <vscale x 2 x i64> %div
-}
-
-define <vscale x 8 x i32> @udiv_split_i32(<vscale x 8 x i32> %a, <vscale x 8 x i32> %b) {
-; CHECK-LABEL: @udiv_split_i32
-; CHECK-DAG: ptrue p0.s
-; CHECK-DAG: udiv z0.s, p0/m, z0.s, z2.s
-; CHECK-DAG: udiv z1.s, p0/m, z1.s, z3.s
-; CHECK-NEXT: ret
-  %div = udiv <vscale x 8 x i32> %a, %b
-  ret <vscale x 8 x i32> %div
-}
-
-define <vscale x 2 x i32> @udiv_promote_i32(<vscale x 2 x i32> %a, <vscale x 2 x i32> %b) {
-; CHECK-LABEL: @udiv_promote_i32
-; CHECK-DAG: ptrue p0.d
-; CHECK-DAG: and z1.d, z1.d, #0xffffffff
-; CHECK-DAG: and z0.d, z0.d, #0xffffffff
-; CHECK-DAG: udiv z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ret
-  %div = udiv <vscale x 2 x i32> %a, %b
-  ret <vscale x 2 x i32> %div
-}
-
-define <vscale x 4 x i64> @udiv_split_i64(<vscale x 4 x i64> %a, <vscale x 4 x i64> %b) {
-; CHECK-LABEL: @udiv_split_i64
-; CHECK-DAG: ptrue p0.d
-; CHECK-DAG: udiv z0.d, p0/m, z0.d, z2.d
-; CHECK-DAG: udiv z1.d, p0/m, z1.d, z3.d
-; CHECK-NEXT: ret
-  %div = udiv <vscale x 4 x i64> %a, %b
-  ret <vscale x 4 x i64> %div
-}
-
-;
-; SMIN
-;
-
-define <vscale x 16 x i8> @smin_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) {
-; CHECK-LABEL: @smin_i8
-; CHECK-DAG: ptrue p0.b
-; CHECK-DAG: smin z0.b, p0/m, z0.b, z1.b
-; CHECK-NEXT: ret
-  %cmp = icmp slt <vscale x 16 x i8> %a, %b
-  %min = select <vscale x 16 x i1> %cmp, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b
-  ret <vscale x 16 x i8> %min
-}
-
-define <vscale x 8 x i16> @smin_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) {
-; CHECK-LABEL: @smin_i16
-; CHECK-DAG: ptrue p0.h
-; CHECK-DAG: smin z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: ret
-  %cmp = icmp slt <vscale x 8 x i16> %a, %b
-  %min = select <vscale x 8 x i1> %cmp, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b
-  ret <vscale x 8 x i16> %min
-}
-
-define <vscale x 4 x i32> @smin_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) {
-; CHECK-LABEL: smin_i32:
-; CHECK-DAG: ptrue p0.s
-; CHECK-DAG: smin z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: ret
-  %cmp = icmp slt <vscale x 4 x i32> %a, %b
-  %min = select <vscale x 4 x i1> %cmp, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b
-  ret <vscale x 4 x i32> %min
-}
-
-define <vscale x 2 x i64> @smin_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) {
-; CHECK-LABEL: smin_i64:
-; CHECK-DAG: ptrue p0.d
-; CHECK-DAG: smin z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ret
-  %cmp = icmp slt <vscale x 2 x i64> %a, %b
-  %min = select <vscale x 2 x i1> %cmp, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b
-  ret <vscale x 2 x i64> %min
-}
-
-define <vscale x 32 x i8> @smin_split_i8(<vscale x 32 x i8> %a, <vscale x 32 x i8> %b, <vscale x 32 x i8> %c) {
-; CHECK-LABEL: @smin_split_i8
-; CHECK-DAG: ptrue p0.b
-; CHECK-DAG: smin z0.b, p0/m, z0.b, z2.b
-; CHECK-DAG: smin z1.b, p0/m, z1.b, z3.b
-; CHECK-NEXT: ret
-  %cmp = icmp slt <vscale x 32 x i8> %a, %b
-  %min = select <vscale x 32 x i1> %cmp, <vscale x 32 x i8> %a, <vscale x 32 x i8> %b
-  ret <vscale x 32 x i8> %min
-}
-
-define <vscale x 32 x i16> @smin_split_i16(<vscale x 32 x i16> %a, <vscale x 32 x i16> %b, <vscale x 32 x i16> %c) {
-; CHECK-LABEL: smin_split_i16:
-; CHECK-DAG: ptrue p0.h
-; CHECK-DAG: smin z0.h, p0/m, z0.h, z4.h
-; CHECK-DAG: smin z1.h, p0/m, z1.h, z5.h
-; CHECK-DAG: smin z2.h, p0/m, z2.h, z6.h
-; CHECK-DAG: smin z3.h, p0/m, z3.h, z7.h
-; CHECK-NEXT: ret
-  %cmp = icmp slt <vscale x 32 x i16> %a, %b
-  %min = select <vscale x 32 x i1> %cmp, <vscale x 32 x i16> %a, <vscale x 32 x i16> %b
-  ret <vscale x 32 x i16> %min
-}
-
-define <vscale x 8 x i32> @smin_split_i32(<vscale x 8 x i32> %a, <vscale x 8 x i32> %b, <vscale x 8 x i32> %c) {
-; CHECK-LABEL: smin_split_i32:
-; CHECK-DAG: ptrue p0.s
-; CHECK-DAG: smin z0.s, p0/m, z0.s, z2.s
-; CHECK-DAG: smin z1.s, p0/m, z1.s, z3.s
-; CHECK-NEXT: ret
-  %cmp = icmp slt <vscale x 8 x i32> %a, %b
-  %min = select <vscale x 8 x i1> %cmp, <vscale x 8 x i32> %a, <vscale x 8 x i32> %b
-  ret <vscale x 8 x i32> %min
-}
-
-define <vscale x 4 x i64> @smin_split_i64(<vscale x 4 x i64> %a, <vscale x 4 x i64> %b, <vscale x 4 x i64> %c) {
-; CHECK-LABEL: smin_split_i64:
-; CHECK-DAG: ptrue p0.d
-; CHECK-DAG: smin z0.d, p0/m, z0.d, z2.d
-; CHECK-DAG: smin z1.d, p0/m, z1.d, z3.d
-; CHECK-NEXT: ret
-  %cmp = icmp slt <vscale x 4 x i64> %a, %b
-  %min = select <vscale x 4 x i1> %cmp, <vscale x 4 x i64> %a, <vscale x 4 x i64> %b
-  ret <vscale x 4 x i64> %min
-}
-
-define <vscale x 8 x i8> @smin_promote_i8(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b, <vscale x 8 x i8> %c) {
-; CHECK-LABEL: @smin_promote_i8
-; CHECK-DAG: ptrue p0.h
-; CHECK-DAG: sxtb z1.h, p0/m, z1.h
-; CHECK-DAG: sxtb z0.h, p0/m, z0.h
-; CHECK-DAG: smin z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: ret
-  %cmp = icmp slt <vscale x 8 x i8> %a, %b
-  %min = select <vscale x 8 x i1> %cmp, <vscale x 8 x i8> %a, <vscale x 8 x i8> %b
-  ret <vscale x 8 x i8> %min
-}
-
-define <vscale x 4 x i16> @smin_promote_i16(<vscale x 4 x i16> %a, <vscale x 4 x i16> %b, <vscale x 4 x i16> %c) {
-; CHECK-LABEL: @smin_promote_i16
-; CHECK-DAG: ptrue p0.s
-; CHECK-DAG: sxth z1.s, p0/m, z1.s
-; CHECK-DAG: sxth z0.s, p0/m, z0.s
-; CHECK-DAG: smin z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: ret
-  %cmp = icmp slt <vscale x 4 x i16> %a, %b
-  %min = select <vscale x 4 x i1> %cmp, <vscale x 4 x i16> %a, <vscale x 4 x i16> %b
-  ret <vscale x 4 x i16> %min
-}
-
-define <vscale x 2 x i32> @smin_promote_i32(<vscale x 2 x i32> %a, <vscale x 2 x i32> %b, <vscale x 2 x i32> %c) {
-; CHECK-LABEL: @smin_promote_i32
-; CHECK-DAG: ptrue p0.d
-; CHECK-DAG: sxtw z1.d, p0/m, z1.d
-; CHECK-DAG: sxtw z0.d, p0/m, z0.d
-; CHECK-DAG: smin z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ret
-  %cmp = icmp slt <vscale x 2 x i32> %a, %b
-  %min = select <vscale x 2 x i1> %cmp, <vscale x 2 x i32> %a, <vscale x 2 x i32> %b
-  ret <vscale x 2 x i32> %min
-}
-
-;
-; UMIN
-;
-
-define <vscale x 16 x i8> @umin_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) {
-; CHECK-LABEL: @umin_i8
-; CHECK-DAG: ptrue p0.b
-; CHECK-DAG: umin z0.b, p0/m, z0.b, z1.b
-; CHECK-NEXT: ret
-  %cmp = icmp ult <vscale x 16 x i8> %a, %b
-  %min = select <vscale x 16 x i1> %cmp, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b
-  ret <vscale x 16 x i8> %min
-}
-
-define <vscale x 8 x i16> @umin_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) {
-; CHECK-LABEL: @umin_i16
-; CHECK-DAG: ptrue p0.h
-; CHECK-DAG: umin z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: ret
-  %cmp = icmp ult <vscale x 8 x i16> %a, %b
-  %min = select <vscale x 8 x i1> %cmp, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b
-  ret <vscale x 8 x i16> %min
-}
-
-define <vscale x 4 x i32> @umin_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) {
-; CHECK-LABEL: umin_i32:
-; CHECK-DAG: ptrue p0.s
-; CHECK-DAG: umin z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: ret
-  %cmp = icmp ult <vscale x 4 x i32> %a, %b
-  %min = select <vscale x 4 x i1> %cmp, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b
-  ret <vscale x 4 x i32> %min
-}
-
-define <vscale x 2 x i64> @umin_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) {
-; CHECK-LABEL: umin_i64:
-; CHECK-DAG: ptrue p0.d
-; CHECK-DAG: umin z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ret
-  %cmp = icmp ult <vscale x 2 x i64> %a, %b
-  %min = select <vscale x 2 x i1> %cmp, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b
-  ret <vscale x 2 x i64> %min
-}
-
-define <vscale x 4 x i64> @umin_split_i64(<vscale x 4 x i64> %a, <vscale x 4 x i64> %b, <vscale x 4 x i64> %c) {
-; CHECK-LABEL: umin_split_i64:
-; CHECK-DAG: ptrue p0.d
-; CHECK-DAG: umin z0.d, p0/m, z0.d, z2.d
-; CHECK-DAG: umin z1.d, p0/m, z1.d, z3.d
-; CHECK-NEXT: ret
-  %cmp = icmp ult <vscale x 4 x i64> %a, %b
-  %min = select <vscale x 4 x i1> %cmp, <vscale x 4 x i64> %a, <vscale x 4 x i64> %b
-  ret <vscale x 4 x i64> %min
-}
-
-define <vscale x 8 x i8> @umin_promote_i8(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b, <vscale x 8 x i8> %c) {
-; CHECK-LABEL: @umin_promote_i8
-; CHECK-DAG: ptrue p0.h
-; CHECK-DAG: umin z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: ret
-  %cmp = icmp ult <vscale x 8 x i8> %a, %b
-  %min = select <vscale x 8 x i1> %cmp, <vscale x 8 x i8> %a, <vscale x 8 x i8> %b
-  ret <vscale x 8 x i8> %min
-}
-
-;
-; SMAX
-;
-
-define <vscale x 16 x i8> @smax_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) {
-; CHECK-LABEL: @smax_i8
-; CHECK-DAG: ptrue p0.b
-; CHECK-DAG: smax z0.b, p0/m, z0.b, z1.b
-; CHECK-NEXT: ret
-  %cmp = icmp sgt <vscale x 16 x i8> %a, %b
-  %max = select <vscale x 16 x i1> %cmp, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b
-  ret <vscale x 16 x i8> %max
-}
-
-define <vscale x 8 x i16> @smax_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) {
-; CHECK-LABEL: @smax_i16
-; CHECK-DAG: ptrue p0.h
-; CHECK-DAG: smax z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: ret
-  %cmp = icmp sgt <vscale x 8 x i16> %a, %b
-  %max = select <vscale x 8 x i1> %cmp, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b
-  ret <vscale x 8 x i16> %max
-}
-
-define <vscale x 4 x i32> @smax_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) {
-; CHECK-LABEL: smax_i32:
-; CHECK-DAG: ptrue p0.s
-; CHECK-DAG: smax z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: ret
-  %cmp = icmp sgt <vscale x 4 x i32> %a, %b
-  %max = select <vscale x 4 x i1> %cmp, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b
-  ret <vscale x 4 x i32> %max
-}
-
-define <vscale x 2 x i64> @smax_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) {
-; CHECK-LABEL: smax_i64:
-; CHECK-DAG: ptrue p0.d
-; CHECK-DAG: smax z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ret
-  %cmp = icmp sgt <vscale x 2 x i64> %a, %b
-  %max = select <vscale x 2 x i1> %cmp, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b
-  ret <vscale x 2 x i64> %max
-}
-
-define <vscale x 8 x i32> @smax_split_i32(<vscale x 8 x i32> %a, <vscale x 8 x i32> %b, <vscale x 8 x i32> %c) {
-; CHECK-LABEL: smax_split_i32:
-; CHECK-DAG: ptrue p0.s
-; CHECK-DAG: smax z0.s, p0/m, z0.s, z2.s
-; CHECK-DAG: smax z1.s, p0/m, z1.s, z3.s
-; CHECK-NEXT: ret
-  %cmp = icmp sgt <vscale x 8 x i32> %a, %b
-  %max = select <vscale x 8 x i1> %cmp, <vscale x 8 x i32> %a, <vscale x 8 x i32> %b
-  ret <vscale x 8 x i32> %max
-}
-
-define <vscale x 4 x i16> @smax_promote_i16(<vscale x 4 x i16> %a, <vscale x 4 x i16> %b, <vscale x 4 x i16> %c) {
-; CHECK-LABEL: @smax_promote_i16
-; CHECK-DAG: ptrue p0.s
-; CHECK-DAG: smax z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: ret
-  %cmp = icmp sgt <vscale x 4 x i16> %a, %b
-  %max = select <vscale x 4 x i1> %cmp, <vscale x 4 x i16> %a, <vscale x 4 x i16> %b
-  ret <vscale x 4 x i16> %max
-}
-
-;
-; UMAX
-;
-
-define <vscale x 16 x i8> @umax_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) {
-; CHECK-LABEL: @umax_i8
-; CHECK-DAG: ptrue p0.b
-; CHECK-DAG: umax z0.b, p0/m, z0.b, z1.b
-; CHECK-NEXT: ret
-  %cmp = icmp ugt <vscale x 16 x i8> %a, %b
-  %max = select <vscale x 16 x i1> %cmp, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b
-  ret <vscale x 16 x i8> %max
-}
-
-define <vscale x 8 x i16> @umax_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) {
-; CHECK-LABEL: @umax_i16
-; CHECK-DAG: ptrue p0.h
-; CHECK-DAG: umax z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: ret
-  %cmp = icmp ugt <vscale x 8 x i16> %a, %b
-  %max = select <vscale x 8 x i1> %cmp, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b
-  ret <vscale x 8 x i16> %max
-}
-
-define <vscale x 4 x i32> @umax_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) {
-; CHECK-LABEL: umax_i32:
-; CHECK-DAG: ptrue p0.s
-; CHECK-DAG: umax z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: ret
-  %cmp = icmp ugt <vscale x 4 x i32> %a, %b
-  %max = select <vscale x 4 x i1> %cmp, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b
-  ret <vscale x 4 x i32> %max
-}
-
-define <vscale x 2 x i64> @umax_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) {
-; CHECK-LABEL: umax_i64:
-; CHECK-DAG: ptrue p0.d
-; CHECK-DAG: umax z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ret
-  %cmp = icmp ugt <vscale x 2 x i64> %a, %b
-  %max = select <vscale x 2 x i1> %cmp, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b
-  ret <vscale x 2 x i64> %max
-}
-
-define <vscale x 16 x i16> @umax_split_i16(<vscale x 16 x i16> %a, <vscale x 16 x i16> %b, <vscale x 16 x i16> %c) {
-; CHECK-LABEL: umax_split_i16:
-; CHECK-DAG: ptrue p0.h
-; CHECK-DAG: umax z0.h, p0/m, z0.h, z2.h
-; CHECK-DAG: umax z1.h, p0/m, z1.h, z3.h
-; CHECK-NEXT: ret
-  %cmp = icmp ugt <vscale x 16 x i16> %a, %b
-  %max = select <vscale x 16 x i1> %cmp, <vscale x 16 x i16> %a, <vscale x 16 x i16> %b
-  ret <vscale x 16 x i16> %max
-}
-
-define <vscale x 2 x i32> @umax_promote_i32(<vscale x 2 x i32> %a, <vscale x 2 x i32> %b, <vscale x 2 x i32> %c) {
-; CHECK-LABEL: @umax_promote_i32
-; CHECK-DAG: ptrue p0.d
-; CHECK-DAG: umax z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ret
-  %cmp = icmp ugt <vscale x 2 x i32> %a, %b
-  %max = select <vscale x 2 x i1> %cmp, <vscale x 2 x i32> %a, <vscale x 2 x i32> %b
-  ret <vscale x 2 x i32> %max
-}
-
-;
-; ASR
-;
-
-define <vscale x 16 x i8> @asr_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b){
-; CHECK-LABEL: @asr_i8
-; CHECK-DAG: ptrue p0.b
-; CHECK-DAG: asr z0.b, p0/m, z0.b, z1.b
-; CHECK-NEXT: ret
-  %shr = ashr <vscale x 16 x i8> %a, %b
-  ret <vscale x 16 x i8> %shr
-}
-
-define <vscale x 8 x i16> @asr_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b){
-; CHECK-LABEL: @asr_i16
-; CHECK-DAG: ptrue p0.h
-; CHECK-DAG: asr z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: ret
-  %shr = ashr <vscale x 8 x i16> %a, %b
-  ret <vscale x 8 x i16> %shr
-}
-
-define <vscale x 4 x i32> @asr_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b){
-; CHECK-LABEL: @asr_i32
-; CHECK-DAG: ptrue p0.s
-; CHECK-DAG: asr z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: ret
-  %shr = ashr <vscale x 4 x i32> %a, %b
-  ret <vscale x 4 x i32> %shr
-}
-
-define <vscale x 2 x i64> @asr_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b){
-; CHECK-LABEL: @asr_i64
-; CHECK-DAG: ptrue p0.d
-; CHECK-DAG: asr z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ret
-  %shr = ashr <vscale x 2 x i64> %a, %b
-  ret <vscale x 2 x i64> %shr
-}
-
-define <vscale x 16 x i16> @asr_split_i16(<vscale x 16 x i16> %a, <vscale x 16 x i16> %b){
-; CHECK-LABEL: @asr_split_i16
-; CHECK-DAG: ptrue p0.h
-; CHECK-DAG: asr z0.h, p0/m, z0.h, z2.h
-; CHECK-DAG: asr z1.h, p0/m, z1.h, z3.h
-; CHECK-NEXT: ret
-  %shr = ashr <vscale x 16 x i16> %a, %b
-  ret <vscale x 16 x i16> %shr
-}
-
-define <vscale x 2 x i32> @asr_promote_i32(<vscale x 2 x i32> %a, <vscale x 2 x i32> %b){
-; CHECK-LABEL: @asr_promote_i32
-; CHECK-DAG: ptrue p0.d
-; CHECK-DAG: and z1.d, z1.d, #0xffffffff
-; CHECK-DAG: asr z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ret
-  %shr = ashr <vscale x 2 x i32> %a, %b
-  ret <vscale x 2 x i32> %shr
-}
-
-;
-; LSL
-;
-
-define <vscale x 16 x i8> @lsl_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b){
-; CHECK-LABEL: @lsl_i8
-; CHECK-DAG: ptrue p0.b
-; CHECK-DAG: lsl z0.b, p0/m, z0.b, z1.b
-; CHECK-NEXT: ret
-  %shl = shl <vscale x 16 x i8> %a, %b
-  ret <vscale x 16 x i8> %shl
-}
-
-define <vscale x 8 x i16> @lsl_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b){
-; CHECK-LABEL: @lsl_i16
-; CHECK-DAG: ptrue p0.h
-; CHECK-DAG: lsl z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: ret
-  %shl = shl <vscale x 8 x i16> %a, %b
-  ret <vscale x 8 x i16> %shl
-}
-
-define <vscale x 4 x i32> @lsl_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b){
-; CHECK-LABEL: @lsl_i32
-; CHECK-DAG: ptrue p0.s
-; CHECK-DAG: lsl z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: ret
-  %shl = shl <vscale x 4 x i32> %a, %b
-  ret <vscale x 4 x i32> %shl
-}
-
-define <vscale x 2 x i64> @lsl_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b){
-; CHECK-LABEL: @lsl_i64
-; CHECK-DAG: ptrue p0.d
-; CHECK-DAG: lsl z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ret
-  %shl = shl <vscale x 2 x i64> %a, %b
-  ret <vscale x 2 x i64> %shl
-}
-
-define <vscale x 4 x i64> @lsl_split_i64(<vscale x 4 x i64> %a, <vscale x 4 x i64> %b){
-; CHECK-LABEL: @lsl_split_i64
-; CHECK-DAG: ptrue p0.d
-; CHECK-DAG: lsl z0.d, p0/m, z0.d, z2.d
-; CHECK-DAG: lsl z1.d, p0/m, z1.d, z3.d
-; CHECK-NEXT: ret
-  %shl = shl <vscale x 4 x i64> %a, %b
-  ret <vscale x 4 x i64> %shl
-}
-
-define <vscale x 4 x i16> @lsl_promote_i16(<vscale x 4 x i16> %a, <vscale x 4 x i16> %b){
-; CHECK-LABEL: @lsl_promote_i16
-; CHECK-DAG: ptrue p0.s
-; CHECK-DAG: and z1.s, z1.s, #0xffff
-; CHECK-DAG: lsl z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: ret
-  %shl = shl <vscale x 4 x i16> %a, %b
-  ret <vscale x 4 x i16> %shl
-}
-
-;
-; LSR
-;
-
-define <vscale x 16 x i8> @lsr_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b){
-; CHECK-LABEL: @lsr_i8
-; CHECK-DAG: ptrue p0.b
-; CHECK-DAG: lsr z0.b, p0/m, z0.b, z1.b
-; CHECK-NEXT: ret
-  %shr = lshr <vscale x 16 x i8> %a, %b
-  ret <vscale x 16 x i8> %shr
-}
-
-define <vscale x 8 x i16> @lsr_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b){
-; CHECK-LABEL: @lsr_i16
-; CHECK-DAG: ptrue p0.h
-; CHECK-DAG: lsr z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: ret
-  %shr = lshr <vscale x 8 x i16> %a, %b
-  ret <vscale x 8 x i16> %shr
-}
-
-define <vscale x 4 x i32> @lsr_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b){
-; CHECK-LABEL: @lsr_i32
-; CHECK-DAG: ptrue p0.s
-; CHECK-DAG: lsr z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: ret
-  %shr = lshr <vscale x 4 x i32> %a, %b
-  ret <vscale x 4 x i32> %shr
-}
-
-define <vscale x 2 x i64> @lsr_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b){
-; CHECK-LABEL: @lsr_i64
-; CHECK-DAG: ptrue p0.d
-; CHECK-DAG: lsr z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ret
-  %shr = lshr <vscale x 2 x i64> %a, %b
-  ret <vscale x 2 x i64> %shr
-}
-
-define <vscale x 8 x i8> @lsr_promote_i8(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b){
-; CHECK-LABEL: @lsr_promote_i8
-; CHECK-DAG: ptrue p0.h
-; CHECK-DAG: and z1.h, z1.h, #0xff
-; CHECK-DAG: lsr z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: ret
-  %shr = lshr <vscale x 8 x i8> %a, %b
-  ret <vscale x 8 x i8> %shr
-}
-
-define <vscale x 8 x i32> @lsr_split_i32(<vscale x 8 x i32> %a, <vscale x 8 x i32> %b){
-; CHECK-LABEL: @lsr_split_i32
-; CHECK-DAG: ptrue p0.s
-; CHECK-DAG: lsr z0.s, p0/m, z0.s, z2.s
-; CHECK-DAG: lsr z1.s, p0/m, z1.s, z3.s
-; CHECK-NEXT: ret
-  %shr = lshr <vscale x 8 x i32> %a, %b
-  ret <vscale x 8 x i32> %shr
-}
Index: llvm/lib/Target/AArch64/AArch64ISelLowering.h
===================================================================
--- llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -879,6 +879,10 @@
                                   SDValue &Offset, ISD::MemIndexedMode &AM,
                                   SelectionDAG &DAG) const override;
 
+  void ReplaceExtensionResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
+                               SelectionDAG &DAG, unsigned HiOpcode,
+                               unsigned LoOpcode) const;
+
   void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
                           SelectionDAG &DAG) const override;
 
Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
===================================================================
--- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -890,6 +890,14 @@
         setOperationAction(ISD::SHL, VT, Custom);
         setOperationAction(ISD::SRL, VT, Custom);
         setOperationAction(ISD::SRA, VT, Custom);
+      } else {
+        if (VT.getVectorElementType() != MVT::i1) {
+          // Use UNPK{LO,HI} sequences to lower extensions from legal SVE
+          // types to wider-than-legal types.
+          setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
+          setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
+          setOperationAction(ISD::ANY_EXTEND, VT, Custom);
+        }
       }
     }
     setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom);
@@ -3290,6 +3298,15 @@
   default:
     llvm_unreachable("unimplemented operand");
     return SDValue();
+  case ISD::ANY_EXTEND:
+  case ISD::SIGN_EXTEND:
+  case ISD::ZERO_EXTEND:
+    // Needed because we have selected custom lowering for illegal SVE types.
+    // The cases we actually want to handle are where the operand is legal
+    // and the result isn't, which go through ReplaceNodeResults instead.
+    // This code only sees cases where the result is legal and the operand
+    // isn't.
+    return SDValue();
   case ISD::BITCAST:
     return LowerBITCAST(Op, DAG);
   case ISD::GlobalAddress:
@@ -13609,6 +13626,52 @@
   Results.push_back(SplitVal);
 }
 
+// If the node is an extension from a legal SVE type to something wider,
+// use HiOpcode and LoOpcode to extend each half individually, then
+// concatenate them together.
+void AArch64TargetLowering::ReplaceExtensionResults(
+    SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG,
+    unsigned HiOpcode, unsigned LoOpcode) const {
+  SDLoc DL(N);
+  SDValue In = N->getOperand(0);
+  EVT InVT = In.getValueType();
+
+  assert(InVT.isScalableVector() && "Can only lower scalable vectors");
+
+  if (!isTypeLegal(InVT))
+    return;
+
+  EVT InEltVT = InVT.getVectorElementType();
+  auto EltCnt = InVT.getVectorElementCount();
+  unsigned InEltBits = InEltVT.getSizeInBits();
+
+  if (InEltBits != 8 && InEltBits != 16 && InEltBits != 32)
+    return;
+
+  // The result must be at least twice as wide as the input in order for
+  // this to work.
+  EVT VT = N->getValueType(0);
+  EVT EltVT = VT.getVectorElementType();
+  if (EltVT.getSizeInBits() < InEltBits * 2)
+    return;
+
+  // Extend In to a double-width vector.
+  EVT NewEltVT = EVT::getIntegerVT(*DAG.getContext(), InEltBits * 2);
+  EVT NewVT = EVT::getVectorVT(*DAG.getContext(), NewEltVT, EltCnt / 2);
+  assert(isTypeLegal(NewVT) && "Extension result should be legal");
+
+  SDValue Lo = DAG.getNode(LoOpcode, DL, NewVT, In);
+  SDValue Hi = DAG.getNode(HiOpcode, DL, NewVT, In);
+
+  // If necessary, extend again using the original code.  Such extensions
+  // will also need legalizing, but at least we're making forward progress.
+  NewVT = EVT::getVectorVT(*DAG.getContext(), EltVT, EltCnt / 2);
+  Lo = DAG.getNode(N->getOpcode(), DL, NewVT, Lo);
+  Hi = DAG.getNode(N->getOpcode(), DL, NewVT, Hi);
+
+  Results.push_back(DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi));
+}
+
 static std::pair<SDValue, SDValue> splitInt128(SDValue N, SelectionDAG &DAG) {
   SDLoc DL(N);
   SDValue Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, N);
@@ -13749,6 +13812,15 @@
   case ISD::ATOMIC_CMP_SWAP:
     ReplaceCMP_SWAP_128Results(N, Results, DAG, Subtarget);
     return;
+  case ISD::SIGN_EXTEND:
+    ReplaceExtensionResults(N, Results, DAG,
+                            AArch64ISD::SUNPKHI, AArch64ISD::SUNPKLO);
+    return;
+  case ISD::ZERO_EXTEND:
+  case ISD::ANY_EXTEND:
+    ReplaceExtensionResults(N, Results, DAG,
+                            AArch64ISD::UUNPKHI, AArch64ISD::UUNPKLO);
+    return;
   case ISD::LOAD: {
     assert(SDValue(N, 0).getValueType() == MVT::i128 &&
            "unexpected load's value type");
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to