kmclaughlin created this revision. kmclaughlin added reviewers: sdesmalen, efriedma, david-arm. Herald added subscribers: psnobl, rkruppe, hiraditya, tschuett. Herald added a project: LLVM.
This patch adds legalisation of extensions where the operand of the extend is a legal scalable type but the result is not. In these cases we can try to use the [S|U]UNPK[HI|LO] operations to extend each half individually and concatenate the result. For example: zext <vscale x 16 x i8> %a to <vscale x 16 x i16> should emit: uunpklo z2.h, z0.b uunpkhi z1.h, z0.b Patch by Richard Sandiford Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D79587 Files: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp llvm/lib/Target/AArch64/AArch64ISelLowering.h llvm/test/CodeGen/AArch64/llvm-ir-to-intrinsic.ll llvm/test/CodeGen/AArch64/sve-arith.ll llvm/test/CodeGen/AArch64/sve-ext.ll
Index: llvm/test/CodeGen/AArch64/sve-ext.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/sve-ext.ll @@ -0,0 +1,127 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s + +; +; SEXT +; + +define <vscale x 16 x i16> @sext_b_to_h(<vscale x 16 x i8> %a) { +; CHECK-LABEL: sext_b_to_h: +; CHECK-DAG: sunpklo z2.h, z0.b +; CHECK-DAG: sunpkhi z1.h, z0.b +; CHECK-DAG: mov z0.d, z2.d +; CHECK-NEXT: ret + %ext = sext <vscale x 16 x i8> %a to <vscale x 16 x i16> + ret <vscale x 16 x i16> %ext +} + +define <vscale x 8 x i32> @sext_h_to_s(<vscale x 8 x i16> %a) { +; CHECK-LABEL: sext_h_to_s: +; CHECK-DAG: sunpklo z2.s, z0.h +; CHECK-DAG: sunpkhi z1.s, z0.h +; CHECK-DAG: mov z0.d, z2.d +; CHECK-NEXT: ret + %ext = sext <vscale x 8 x i16> %a to <vscale x 8 x i32> + ret <vscale x 8 x i32> %ext +} + +define <vscale x 4 x i64> @sext_s_to_d(<vscale x 4 x i32> %a) { +; CHECK-LABEL: sext_s_to_d: +; CHECK-DAG: sunpklo z2.d, z0.s +; CHECK-DAG: sunpkhi z1.d, z0.s +; CHECK-DAG: mov z0.d, z2.d +; CHECK-NEXT: ret + %ext = sext <vscale x 4 x i32> %a to <vscale x 4 x i64> + ret <vscale x 4 x i64> %ext +} + +define <vscale x 16 x i32> @sext_b_to_s(<vscale x 16 x i8> %a) { +; CHECK-LABEL: sext_b_to_s: +; CHECK-DAG: sunpklo [[LO:z[0-9]+]].h, z0.b +; CHECK-DAG: sunpkhi [[HI:z[0-9]+]].h, z0.b +; CHECK-DAG: sunpklo [[LOLO:z[0-9]+]].s, [[LO]].h +; CHECK-DAG: sunpkhi {{z[0-9]+}}.s, [[LO]].h +; CHECK-DAG: sunpklo {{z[0-9]+}}.s, [[HI]].h +; CHECK-DAG: sunpkhi {{z[0-9]+}}.s, [[HI]].h +; CHECK: ret + %ext = sext <vscale x 16 x i8> %a to <vscale x 16 x i32> + ret <vscale x 16 x i32> %ext +} + +define <vscale x 4 x i16> @sext_promote_b_to_s(<vscale x 4 x i8> %in) { +; CHECK-LABEL: @sext_promote +; CHECK-DAG: ptrue p0.s +; CHECK-DAG: sxtb z0.s, p0/m, z0.s +; CHECK-NEXT: ret + %out = sext <vscale x 4 x i8> %in to <vscale x 4 x i16> + ret <vscale x 4 x i16> %out +} + +define <vscale x 2 x i32> @sext_promote_h_to_d(<vscale x 2 x i16> %in) { +; CHECK-LABEL: @sext_promote_h_to_d +; CHECK-DAG: ptrue p0.d +; CHECK-DAG: sxth z0.d, p0/m, z0.d +; CHECK-NEXT: ret + %out = sext <vscale x 2 x i16> %in to <vscale x 2 x i32> + ret <vscale x 2 x i32> %out +} + +; ZEXT + +define <vscale x 16 x i16> @zext_b_to_h(<vscale x 16 x i8> %a) { +; CHECK-LABEL: zext_b_to_h: +; CHECK-DAG: uunpklo z2.h, z0.b +; CHECK-DAG: uunpkhi z1.h, z0.b +; CHECK-DAG: mov z0.d, z2.d +; CHECK-NEXT: ret + %ext = zext <vscale x 16 x i8> %a to <vscale x 16 x i16> + ret <vscale x 16 x i16> %ext +} + +define <vscale x 8 x i32> @zext_h_to_s(<vscale x 8 x i16> %a) { +; CHECK-LABEL: zext_h_to_s: +; CHECK-DAG: uunpklo z2.s, z0.h +; CHECK-DAG: uunpkhi z1.s, z0.h +; CHECK-DAG: mov z0.d, z2.d +; CHECK-NEXT: ret + %ext = zext <vscale x 8 x i16> %a to <vscale x 8 x i32> + ret <vscale x 8 x i32> %ext +} + +define <vscale x 4 x i64> @zext_s_to_d(<vscale x 4 x i32> %a) { +; CHECK-LABEL: zext_s_to_d: +; CHECK-DAG: uunpklo z2.d, z0.s +; CHECK-DAG: uunpkhi z1.d, z0.s +; CHECK-DAG: mov z0.d, z2.d +; CHECK-NEXT: ret + %ext = zext <vscale x 4 x i32> %a to <vscale x 4 x i64> + ret <vscale x 4 x i64> %ext +} + +define <vscale x 16 x i32> @zext_b_to_s(<vscale x 16 x i8> %a) { +; CHECK-LABEL: zext_b_to_s: +; CHECK-DAG: uunpklo [[LO:z[0-9]+]].h, z0.b +; CHECK-DAG: uunpkhi [[HI:z[0-9]+]].h, z0.b +; CHECK-DAG: uunpklo z0.s, [[LO]].h +; CHECK-DAG: uunpkhi z1.s, [[LO]].h +; CHECK-DAG: uunpklo z2.s, [[HI]].h +; CHECK-DAG: uunpkhi z3.s, [[HI]].h +; CHECK: ret + %ext = zext <vscale x 16 x i8> %a to <vscale x 16 x i32> + ret <vscale x 16 x i32> %ext +} + +define <vscale x 4 x i16> @zext_promote_b_to_s(<vscale x 4 x i8> %in) { +; CHECK-LABEL: @zext_promote +; CHECK-DAG: and z0.s, z0.s, #0xff +; CHECK-NEXT: ret + %out = zext <vscale x 4 x i8> %in to <vscale x 4 x i16> + ret <vscale x 4 x i16> %out +} + +define <vscale x 2 x i32> @zext_promote_h_to_d(<vscale x 2 x i16> %in) { +; CHECK-LABEL: @zext_promote_h_to_d +; CHECK-DAG: and z0.d, z0.d, #0xffff +; CHECK-NEXT: ret + %out = zext <vscale x 2 x i16> %in to <vscale x 2 x i32> + ret <vscale x 2 x i32> %out +} Index: llvm/test/CodeGen/AArch64/sve-arith.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/sve-arith.ll @@ -0,0 +1,608 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s + +; +; SDIV +; + +define <vscale x 4 x i32> @sdiv_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { +; CHECK-LABEL: @sdiv_i32 +; CHECK-DAG: ptrue p0.s +; CHECK-DAG: sdiv z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %div = sdiv <vscale x 4 x i32> %a, %b + ret <vscale x 4 x i32> %div +} + +define <vscale x 2 x i64> @sdiv_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { +; CHECK-LABEL: @sdiv_i64 +; CHECK-DAG: ptrue p0.d +; CHECK-DAG: sdiv z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %div = sdiv <vscale x 2 x i64> %a, %b + ret <vscale x 2 x i64> %div +} + +define <vscale x 8 x i32> @sdiv_split_i32(<vscale x 8 x i32> %a, <vscale x 8 x i32> %b) { +; CHECK-LABEL: @sdiv_split_i32 +; CHECK-DAG: ptrue p0.s +; CHECK-DAG: sdiv z0.s, p0/m, z0.s, z2.s +; CHECK-DAG: sdiv z1.s, p0/m, z1.s, z3.s +; CHECK-NEXT: ret + %div = sdiv <vscale x 8 x i32> %a, %b + ret <vscale x 8 x i32> %div +} + +define <vscale x 2 x i32> @sdiv_promote_i32(<vscale x 2 x i32> %a, <vscale x 2 x i32> %b) { +; CHECK-LABEL: @sdiv_promote_i32 +; CHECK-DAG: ptrue p0.d +; CHECK-DAG: sxtw z1.d, p0/m, z1.d +; CHECK-DAG: sxtw z0.d, p0/m, z0.d +; CHECK-DAG: sdiv z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %div = sdiv <vscale x 2 x i32> %a, %b + ret <vscale x 2 x i32> %div +} + +define <vscale x 4 x i64> @sdiv_split_i64(<vscale x 4 x i64> %a, <vscale x 4 x i64> %b) { +; CHECK-LABEL: @sdiv_split_i64 +; CHECK-DAG: ptrue p0.d +; CHECK-DAG: sdiv z0.d, p0/m, z0.d, z2.d +; CHECK-DAG: sdiv z1.d, p0/m, z1.d, z3.d +; CHECK-NEXT: ret + %div = sdiv <vscale x 4 x i64> %a, %b + ret <vscale x 4 x i64> %div +} + +; +; UDIV +; + +define <vscale x 4 x i32> @udiv_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { +; CHECK-LABEL: @udiv_i32 +; CHECK-DAG: ptrue p0.s +; CHECK-DAG: udiv z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %div = udiv <vscale x 4 x i32> %a, %b + ret <vscale x 4 x i32> %div +} + +define <vscale x 2 x i64> @udiv_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { +; CHECK-LABEL: @udiv_i64 +; CHECK-DAG: ptrue p0.d +; CHECK-DAG: udiv z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %div = udiv <vscale x 2 x i64> %a, %b + ret <vscale x 2 x i64> %div +} + +define <vscale x 8 x i32> @udiv_split_i32(<vscale x 8 x i32> %a, <vscale x 8 x i32> %b) { +; CHECK-LABEL: @udiv_split_i32 +; CHECK-DAG: ptrue p0.s +; CHECK-DAG: udiv z0.s, p0/m, z0.s, z2.s +; CHECK-DAG: udiv z1.s, p0/m, z1.s, z3.s +; CHECK-NEXT: ret + %div = udiv <vscale x 8 x i32> %a, %b + ret <vscale x 8 x i32> %div +} + +define <vscale x 2 x i32> @udiv_promote_i32(<vscale x 2 x i32> %a, <vscale x 2 x i32> %b) { +; CHECK-LABEL: @udiv_promote_i32 +; CHECK-DAG: ptrue p0.d +; CHECK-DAG: and z1.d, z1.d, #0xffffffff +; CHECK-DAG: and z0.d, z0.d, #0xffffffff +; CHECK-DAG: udiv z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %div = udiv <vscale x 2 x i32> %a, %b + ret <vscale x 2 x i32> %div +} + +define <vscale x 4 x i64> @udiv_split_i64(<vscale x 4 x i64> %a, <vscale x 4 x i64> %b) { +; CHECK-LABEL: @udiv_split_i64 +; CHECK-DAG: ptrue p0.d +; CHECK-DAG: udiv z0.d, p0/m, z0.d, z2.d +; CHECK-DAG: udiv z1.d, p0/m, z1.d, z3.d +; CHECK-NEXT: ret + %div = udiv <vscale x 4 x i64> %a, %b + ret <vscale x 4 x i64> %div +} + +; +; SMIN +; + +define <vscale x 16 x i8> @smin_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) { +; CHECK-LABEL: @smin_i8 +; CHECK-DAG: ptrue p0.b +; CHECK-DAG: smin z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %cmp = icmp slt <vscale x 16 x i8> %a, %b + %min = select <vscale x 16 x i1> %cmp, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b + ret <vscale x 16 x i8> %min +} + +define <vscale x 8 x i16> @smin_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) { +; CHECK-LABEL: @smin_i16 +; CHECK-DAG: ptrue p0.h +; CHECK-DAG: smin z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %cmp = icmp slt <vscale x 8 x i16> %a, %b + %min = select <vscale x 8 x i1> %cmp, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b + ret <vscale x 8 x i16> %min +} + +define <vscale x 4 x i32> @smin_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) { +; CHECK-LABEL: smin_i32: +; CHECK-DAG: ptrue p0.s +; CHECK-DAG: smin z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %cmp = icmp slt <vscale x 4 x i32> %a, %b + %min = select <vscale x 4 x i1> %cmp, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b + ret <vscale x 4 x i32> %min +} + +define <vscale x 2 x i64> @smin_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) { +; CHECK-LABEL: smin_i64: +; CHECK-DAG: ptrue p0.d +; CHECK-DAG: smin z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %cmp = icmp slt <vscale x 2 x i64> %a, %b + %min = select <vscale x 2 x i1> %cmp, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b + ret <vscale x 2 x i64> %min +} + +define <vscale x 32 x i8> @smin_split_i8(<vscale x 32 x i8> %a, <vscale x 32 x i8> %b, <vscale x 32 x i8> %c) { +; CHECK-LABEL: @smin_split_i8 +; CHECK-DAG: ptrue p0.b +; CHECK-DAG: smin z0.b, p0/m, z0.b, z2.b +; CHECK-DAG: smin z1.b, p0/m, z1.b, z3.b +; CHECK-NEXT: ret + %cmp = icmp slt <vscale x 32 x i8> %a, %b + %min = select <vscale x 32 x i1> %cmp, <vscale x 32 x i8> %a, <vscale x 32 x i8> %b + ret <vscale x 32 x i8> %min +} + +define <vscale x 32 x i16> @smin_split_i16(<vscale x 32 x i16> %a, <vscale x 32 x i16> %b, <vscale x 32 x i16> %c) { +; CHECK-LABEL: smin_split_i16: +; CHECK-DAG: ptrue p0.h +; CHECK-DAG: smin z0.h, p0/m, z0.h, z4.h +; CHECK-DAG: smin z1.h, p0/m, z1.h, z5.h +; CHECK-DAG: smin z2.h, p0/m, z2.h, z6.h +; CHECK-DAG: smin z3.h, p0/m, z3.h, z7.h +; CHECK-NEXT: ret + %cmp = icmp slt <vscale x 32 x i16> %a, %b + %min = select <vscale x 32 x i1> %cmp, <vscale x 32 x i16> %a, <vscale x 32 x i16> %b + ret <vscale x 32 x i16> %min +} + +define <vscale x 8 x i32> @smin_split_i32(<vscale x 8 x i32> %a, <vscale x 8 x i32> %b, <vscale x 8 x i32> %c) { +; CHECK-LABEL: smin_split_i32: +; CHECK-DAG: ptrue p0.s +; CHECK-DAG: smin z0.s, p0/m, z0.s, z2.s +; CHECK-DAG: smin z1.s, p0/m, z1.s, z3.s +; CHECK-NEXT: ret + %cmp = icmp slt <vscale x 8 x i32> %a, %b + %min = select <vscale x 8 x i1> %cmp, <vscale x 8 x i32> %a, <vscale x 8 x i32> %b + ret <vscale x 8 x i32> %min +} + +define <vscale x 4 x i64> @smin_split_i64(<vscale x 4 x i64> %a, <vscale x 4 x i64> %b, <vscale x 4 x i64> %c) { +; CHECK-LABEL: smin_split_i64: +; CHECK-DAG: ptrue p0.d +; CHECK-DAG: smin z0.d, p0/m, z0.d, z2.d +; CHECK-DAG: smin z1.d, p0/m, z1.d, z3.d +; CHECK-NEXT: ret + %cmp = icmp slt <vscale x 4 x i64> %a, %b + %min = select <vscale x 4 x i1> %cmp, <vscale x 4 x i64> %a, <vscale x 4 x i64> %b + ret <vscale x 4 x i64> %min +} + +define <vscale x 8 x i8> @smin_promote_i8(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b, <vscale x 8 x i8> %c) { +; CHECK-LABEL: @smin_promote_i8 +; CHECK-DAG: ptrue p0.h +; CHECK-DAG: sxtb z1.h, p0/m, z1.h +; CHECK-DAG: sxtb z0.h, p0/m, z0.h +; CHECK-DAG: smin z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %cmp = icmp slt <vscale x 8 x i8> %a, %b + %min = select <vscale x 8 x i1> %cmp, <vscale x 8 x i8> %a, <vscale x 8 x i8> %b + ret <vscale x 8 x i8> %min +} + +define <vscale x 4 x i16> @smin_promote_i16(<vscale x 4 x i16> %a, <vscale x 4 x i16> %b, <vscale x 4 x i16> %c) { +; CHECK-LABEL: @smin_promote_i16 +; CHECK-DAG: ptrue p0.s +; CHECK-DAG: sxth z1.s, p0/m, z1.s +; CHECK-DAG: sxth z0.s, p0/m, z0.s +; CHECK-DAG: smin z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %cmp = icmp slt <vscale x 4 x i16> %a, %b + %min = select <vscale x 4 x i1> %cmp, <vscale x 4 x i16> %a, <vscale x 4 x i16> %b + ret <vscale x 4 x i16> %min +} + +define <vscale x 2 x i32> @smin_promote_i32(<vscale x 2 x i32> %a, <vscale x 2 x i32> %b, <vscale x 2 x i32> %c) { +; CHECK-LABEL: @smin_promote_i32 +; CHECK-DAG: ptrue p0.d +; CHECK-DAG: sxtw z1.d, p0/m, z1.d +; CHECK-DAG: sxtw z0.d, p0/m, z0.d +; CHECK-DAG: smin z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %cmp = icmp slt <vscale x 2 x i32> %a, %b + %min = select <vscale x 2 x i1> %cmp, <vscale x 2 x i32> %a, <vscale x 2 x i32> %b + ret <vscale x 2 x i32> %min +} + +; +; UMIN +; + +define <vscale x 16 x i8> @umin_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) { +; CHECK-LABEL: @umin_i8 +; CHECK-DAG: ptrue p0.b +; CHECK-DAG: umin z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %cmp = icmp ult <vscale x 16 x i8> %a, %b + %min = select <vscale x 16 x i1> %cmp, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b + ret <vscale x 16 x i8> %min +} + +define <vscale x 8 x i16> @umin_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) { +; CHECK-LABEL: @umin_i16 +; CHECK-DAG: ptrue p0.h +; CHECK-DAG: umin z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %cmp = icmp ult <vscale x 8 x i16> %a, %b + %min = select <vscale x 8 x i1> %cmp, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b + ret <vscale x 8 x i16> %min +} + +define <vscale x 4 x i32> @umin_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) { +; CHECK-LABEL: umin_i32: +; CHECK-DAG: ptrue p0.s +; CHECK-DAG: umin z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %cmp = icmp ult <vscale x 4 x i32> %a, %b + %min = select <vscale x 4 x i1> %cmp, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b + ret <vscale x 4 x i32> %min +} + +define <vscale x 2 x i64> @umin_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) { +; CHECK-LABEL: umin_i64: +; CHECK-DAG: ptrue p0.d +; CHECK-DAG: umin z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %cmp = icmp ult <vscale x 2 x i64> %a, %b + %min = select <vscale x 2 x i1> %cmp, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b + ret <vscale x 2 x i64> %min +} + +define <vscale x 4 x i64> @umin_split_i64(<vscale x 4 x i64> %a, <vscale x 4 x i64> %b, <vscale x 4 x i64> %c) { +; CHECK-LABEL: umin_split_i64: +; CHECK-DAG: ptrue p0.d +; CHECK-DAG: umin z0.d, p0/m, z0.d, z2.d +; CHECK-DAG: umin z1.d, p0/m, z1.d, z3.d +; CHECK-NEXT: ret + %cmp = icmp ult <vscale x 4 x i64> %a, %b + %min = select <vscale x 4 x i1> %cmp, <vscale x 4 x i64> %a, <vscale x 4 x i64> %b + ret <vscale x 4 x i64> %min +} + +define <vscale x 8 x i8> @umin_promote_i8(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b, <vscale x 8 x i8> %c) { +; CHECK-LABEL: @umin_promote_i8 +; CHECK-DAG: ptrue p0.h +; CHECK-DAG: umin z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %cmp = icmp ult <vscale x 8 x i8> %a, %b + %min = select <vscale x 8 x i1> %cmp, <vscale x 8 x i8> %a, <vscale x 8 x i8> %b + ret <vscale x 8 x i8> %min +} + +; +; SMAX +; + +define <vscale x 16 x i8> @smax_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) { +; CHECK-LABEL: @smax_i8 +; CHECK-DAG: ptrue p0.b +; CHECK-DAG: smax z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %cmp = icmp sgt <vscale x 16 x i8> %a, %b + %max = select <vscale x 16 x i1> %cmp, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b + ret <vscale x 16 x i8> %max +} + +define <vscale x 8 x i16> @smax_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) { +; CHECK-LABEL: @smax_i16 +; CHECK-DAG: ptrue p0.h +; CHECK-DAG: smax z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %cmp = icmp sgt <vscale x 8 x i16> %a, %b + %max = select <vscale x 8 x i1> %cmp, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b + ret <vscale x 8 x i16> %max +} + +define <vscale x 4 x i32> @smax_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) { +; CHECK-LABEL: smax_i32: +; CHECK-DAG: ptrue p0.s +; CHECK-DAG: smax z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %cmp = icmp sgt <vscale x 4 x i32> %a, %b + %max = select <vscale x 4 x i1> %cmp, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b + ret <vscale x 4 x i32> %max +} + +define <vscale x 2 x i64> @smax_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) { +; CHECK-LABEL: smax_i64: +; CHECK-DAG: ptrue p0.d +; CHECK-DAG: smax z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %cmp = icmp sgt <vscale x 2 x i64> %a, %b + %max = select <vscale x 2 x i1> %cmp, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b + ret <vscale x 2 x i64> %max +} + +define <vscale x 8 x i32> @smax_split_i32(<vscale x 8 x i32> %a, <vscale x 8 x i32> %b, <vscale x 8 x i32> %c) { +; CHECK-LABEL: smax_split_i32: +; CHECK-DAG: ptrue p0.s +; CHECK-DAG: smax z0.s, p0/m, z0.s, z2.s +; CHECK-DAG: smax z1.s, p0/m, z1.s, z3.s +; CHECK-NEXT: ret + %cmp = icmp sgt <vscale x 8 x i32> %a, %b + %max = select <vscale x 8 x i1> %cmp, <vscale x 8 x i32> %a, <vscale x 8 x i32> %b + ret <vscale x 8 x i32> %max +} + +define <vscale x 4 x i16> @smax_promote_i16(<vscale x 4 x i16> %a, <vscale x 4 x i16> %b, <vscale x 4 x i16> %c) { +; CHECK-LABEL: @smax_promote_i16 +; CHECK-DAG: ptrue p0.s +; CHECK-DAG: smax z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %cmp = icmp sgt <vscale x 4 x i16> %a, %b + %max = select <vscale x 4 x i1> %cmp, <vscale x 4 x i16> %a, <vscale x 4 x i16> %b + ret <vscale x 4 x i16> %max +} + +; +; UMAX +; + +define <vscale x 16 x i8> @umax_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) { +; CHECK-LABEL: @umax_i8 +; CHECK-DAG: ptrue p0.b +; CHECK-DAG: umax z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %cmp = icmp ugt <vscale x 16 x i8> %a, %b + %max = select <vscale x 16 x i1> %cmp, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b + ret <vscale x 16 x i8> %max +} + +define <vscale x 8 x i16> @umax_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) { +; CHECK-LABEL: @umax_i16 +; CHECK-DAG: ptrue p0.h +; CHECK-DAG: umax z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %cmp = icmp ugt <vscale x 8 x i16> %a, %b + %max = select <vscale x 8 x i1> %cmp, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b + ret <vscale x 8 x i16> %max +} + +define <vscale x 4 x i32> @umax_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) { +; CHECK-LABEL: umax_i32: +; CHECK-DAG: ptrue p0.s +; CHECK-DAG: umax z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %cmp = icmp ugt <vscale x 4 x i32> %a, %b + %max = select <vscale x 4 x i1> %cmp, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b + ret <vscale x 4 x i32> %max +} + +define <vscale x 2 x i64> @umax_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) { +; CHECK-LABEL: umax_i64: +; CHECK-DAG: ptrue p0.d +; CHECK-DAG: umax z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %cmp = icmp ugt <vscale x 2 x i64> %a, %b + %max = select <vscale x 2 x i1> %cmp, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b + ret <vscale x 2 x i64> %max +} + +define <vscale x 16 x i16> @umax_split_i16(<vscale x 16 x i16> %a, <vscale x 16 x i16> %b, <vscale x 16 x i16> %c) { +; CHECK-LABEL: umax_split_i16: +; CHECK-DAG: ptrue p0.h +; CHECK-DAG: umax z0.h, p0/m, z0.h, z2.h +; CHECK-DAG: umax z1.h, p0/m, z1.h, z3.h +; CHECK-NEXT: ret + %cmp = icmp ugt <vscale x 16 x i16> %a, %b + %max = select <vscale x 16 x i1> %cmp, <vscale x 16 x i16> %a, <vscale x 16 x i16> %b + ret <vscale x 16 x i16> %max +} + +define <vscale x 2 x i32> @umax_promote_i32(<vscale x 2 x i32> %a, <vscale x 2 x i32> %b, <vscale x 2 x i32> %c) { +; CHECK-LABEL: @umax_promote_i32 +; CHECK-DAG: ptrue p0.d +; CHECK-DAG: umax z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %cmp = icmp ugt <vscale x 2 x i32> %a, %b + %max = select <vscale x 2 x i1> %cmp, <vscale x 2 x i32> %a, <vscale x 2 x i32> %b + ret <vscale x 2 x i32> %max +} + +; +; ASR +; + +define <vscale x 16 x i8> @asr_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b){ +; CHECK-LABEL: @asr_i8 +; CHECK-DAG: ptrue p0.b +; CHECK-DAG: asr z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %shr = ashr <vscale x 16 x i8> %a, %b + ret <vscale x 16 x i8> %shr +} + +define <vscale x 8 x i16> @asr_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b){ +; CHECK-LABEL: @asr_i16 +; CHECK-DAG: ptrue p0.h +; CHECK-DAG: asr z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %shr = ashr <vscale x 8 x i16> %a, %b + ret <vscale x 8 x i16> %shr +} + +define <vscale x 4 x i32> @asr_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b){ +; CHECK-LABEL: @asr_i32 +; CHECK-DAG: ptrue p0.s +; CHECK-DAG: asr z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %shr = ashr <vscale x 4 x i32> %a, %b + ret <vscale x 4 x i32> %shr +} + +define <vscale x 2 x i64> @asr_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b){ +; CHECK-LABEL: @asr_i64 +; CHECK-DAG: ptrue p0.d +; CHECK-DAG: asr z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %shr = ashr <vscale x 2 x i64> %a, %b + ret <vscale x 2 x i64> %shr +} + +define <vscale x 16 x i16> @asr_split_i16(<vscale x 16 x i16> %a, <vscale x 16 x i16> %b){ +; CHECK-LABEL: @asr_split_i16 +; CHECK-DAG: ptrue p0.h +; CHECK-DAG: asr z0.h, p0/m, z0.h, z2.h +; CHECK-DAG: asr z1.h, p0/m, z1.h, z3.h +; CHECK-NEXT: ret + %shr = ashr <vscale x 16 x i16> %a, %b + ret <vscale x 16 x i16> %shr +} + +define <vscale x 2 x i32> @asr_promote_i32(<vscale x 2 x i32> %a, <vscale x 2 x i32> %b){ +; CHECK-LABEL: @asr_promote_i32 +; CHECK-DAG: ptrue p0.d +; CHECK-DAG: and z1.d, z1.d, #0xffffffff +; CHECK-DAG: asr z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %shr = ashr <vscale x 2 x i32> %a, %b + ret <vscale x 2 x i32> %shr +} + +; +; LSL +; + +define <vscale x 16 x i8> @lsl_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b){ +; CHECK-LABEL: @lsl_i8 +; CHECK-DAG: ptrue p0.b +; CHECK-DAG: lsl z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %shl = shl <vscale x 16 x i8> %a, %b + ret <vscale x 16 x i8> %shl +} + +define <vscale x 8 x i16> @lsl_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b){ +; CHECK-LABEL: @lsl_i16 +; CHECK-DAG: ptrue p0.h +; CHECK-DAG: lsl z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %shl = shl <vscale x 8 x i16> %a, %b + ret <vscale x 8 x i16> %shl +} + +define <vscale x 4 x i32> @lsl_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b){ +; CHECK-LABEL: @lsl_i32 +; CHECK-DAG: ptrue p0.s +; CHECK-DAG: lsl z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %shl = shl <vscale x 4 x i32> %a, %b + ret <vscale x 4 x i32> %shl +} + +define <vscale x 2 x i64> @lsl_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b){ +; CHECK-LABEL: @lsl_i64 +; CHECK-DAG: ptrue p0.d +; CHECK-DAG: lsl z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %shl = shl <vscale x 2 x i64> %a, %b + ret <vscale x 2 x i64> %shl +} + +define <vscale x 4 x i64> @lsl_split_i64(<vscale x 4 x i64> %a, <vscale x 4 x i64> %b){ +; CHECK-LABEL: @lsl_split_i64 +; CHECK-DAG: ptrue p0.d +; CHECK-DAG: lsl z0.d, p0/m, z0.d, z2.d +; CHECK-DAG: lsl z1.d, p0/m, z1.d, z3.d +; CHECK-NEXT: ret + %shl = shl <vscale x 4 x i64> %a, %b + ret <vscale x 4 x i64> %shl +} + +define <vscale x 4 x i16> @lsl_promote_i16(<vscale x 4 x i16> %a, <vscale x 4 x i16> %b){ +; CHECK-LABEL: @lsl_promote_i16 +; CHECK-DAG: ptrue p0.s +; CHECK-DAG: and z1.s, z1.s, #0xffff +; CHECK-DAG: lsl z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %shl = shl <vscale x 4 x i16> %a, %b + ret <vscale x 4 x i16> %shl +} + +; +; LSR +; + +define <vscale x 16 x i8> @lsr_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b){ +; CHECK-LABEL: @lsr_i8 +; CHECK-DAG: ptrue p0.b +; CHECK-DAG: lsr z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %shr = lshr <vscale x 16 x i8> %a, %b + ret <vscale x 16 x i8> %shr +} + +define <vscale x 8 x i16> @lsr_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b){ +; CHECK-LABEL: @lsr_i16 +; CHECK-DAG: ptrue p0.h +; CHECK-DAG: lsr z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %shr = lshr <vscale x 8 x i16> %a, %b + ret <vscale x 8 x i16> %shr +} + +define <vscale x 4 x i32> @lsr_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b){ +; CHECK-LABEL: @lsr_i32 +; CHECK-DAG: ptrue p0.s +; CHECK-DAG: lsr z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %shr = lshr <vscale x 4 x i32> %a, %b + ret <vscale x 4 x i32> %shr +} + +define <vscale x 2 x i64> @lsr_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b){ +; CHECK-LABEL: @lsr_i64 +; CHECK-DAG: ptrue p0.d +; CHECK-DAG: lsr z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %shr = lshr <vscale x 2 x i64> %a, %b + ret <vscale x 2 x i64> %shr +} + +define <vscale x 8 x i8> @lsr_promote_i8(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b){ +; CHECK-LABEL: @lsr_promote_i8 +; CHECK-DAG: ptrue p0.h +; CHECK-DAG: and z1.h, z1.h, #0xff +; CHECK-DAG: lsr z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %shr = lshr <vscale x 8 x i8> %a, %b + ret <vscale x 8 x i8> %shr +} + +define <vscale x 8 x i32> @lsr_split_i32(<vscale x 8 x i32> %a, <vscale x 8 x i32> %b){ +; CHECK-LABEL: @lsr_split_i32 +; CHECK-DAG: ptrue p0.s +; CHECK-DAG: lsr z0.s, p0/m, z0.s, z2.s +; CHECK-DAG: lsr z1.s, p0/m, z1.s, z3.s +; CHECK-NEXT: ret + %shr = lshr <vscale x 8 x i32> %a, %b + ret <vscale x 8 x i32> %shr +} Index: llvm/test/CodeGen/AArch64/llvm-ir-to-intrinsic.ll =================================================================== --- llvm/test/CodeGen/AArch64/llvm-ir-to-intrinsic.ll +++ /dev/null @@ -1,608 +0,0 @@ -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s - -; -; SDIV -; - -define <vscale x 4 x i32> @sdiv_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { -; CHECK-LABEL: @sdiv_i32 -; CHECK-DAG: ptrue p0.s -; CHECK-DAG: sdiv z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: ret - %div = sdiv <vscale x 4 x i32> %a, %b - ret <vscale x 4 x i32> %div -} - -define <vscale x 2 x i64> @sdiv_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { -; CHECK-LABEL: @sdiv_i64 -; CHECK-DAG: ptrue p0.d -; CHECK-DAG: sdiv z0.d, p0/m, z0.d, z1.d -; CHECK-NEXT: ret - %div = sdiv <vscale x 2 x i64> %a, %b - ret <vscale x 2 x i64> %div -} - -define <vscale x 8 x i32> @sdiv_split_i32(<vscale x 8 x i32> %a, <vscale x 8 x i32> %b) { -; CHECK-LABEL: @sdiv_split_i32 -; CHECK-DAG: ptrue p0.s -; CHECK-DAG: sdiv z0.s, p0/m, z0.s, z2.s -; CHECK-DAG: sdiv z1.s, p0/m, z1.s, z3.s -; CHECK-NEXT: ret - %div = sdiv <vscale x 8 x i32> %a, %b - ret <vscale x 8 x i32> %div -} - -define <vscale x 2 x i32> @sdiv_promote_i32(<vscale x 2 x i32> %a, <vscale x 2 x i32> %b) { -; CHECK-LABEL: @sdiv_promote_i32 -; CHECK-DAG: ptrue p0.d -; CHECK-DAG: sxtw z1.d, p0/m, z1.d -; CHECK-DAG: sxtw z0.d, p0/m, z0.d -; CHECK-DAG: sdiv z0.d, p0/m, z0.d, z1.d -; CHECK-NEXT: ret - %div = sdiv <vscale x 2 x i32> %a, %b - ret <vscale x 2 x i32> %div -} - -define <vscale x 4 x i64> @sdiv_split_i64(<vscale x 4 x i64> %a, <vscale x 4 x i64> %b) { -; CHECK-LABEL: @sdiv_split_i64 -; CHECK-DAG: ptrue p0.d -; CHECK-DAG: sdiv z0.d, p0/m, z0.d, z2.d -; CHECK-DAG: sdiv z1.d, p0/m, z1.d, z3.d -; CHECK-NEXT: ret - %div = sdiv <vscale x 4 x i64> %a, %b - ret <vscale x 4 x i64> %div -} - -; -; UDIV -; - -define <vscale x 4 x i32> @udiv_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { -; CHECK-LABEL: @udiv_i32 -; CHECK-DAG: ptrue p0.s -; CHECK-DAG: udiv z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: ret - %div = udiv <vscale x 4 x i32> %a, %b - ret <vscale x 4 x i32> %div -} - -define <vscale x 2 x i64> @udiv_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { -; CHECK-LABEL: @udiv_i64 -; CHECK-DAG: ptrue p0.d -; CHECK-DAG: udiv z0.d, p0/m, z0.d, z1.d -; CHECK-NEXT: ret - %div = udiv <vscale x 2 x i64> %a, %b - ret <vscale x 2 x i64> %div -} - -define <vscale x 8 x i32> @udiv_split_i32(<vscale x 8 x i32> %a, <vscale x 8 x i32> %b) { -; CHECK-LABEL: @udiv_split_i32 -; CHECK-DAG: ptrue p0.s -; CHECK-DAG: udiv z0.s, p0/m, z0.s, z2.s -; CHECK-DAG: udiv z1.s, p0/m, z1.s, z3.s -; CHECK-NEXT: ret - %div = udiv <vscale x 8 x i32> %a, %b - ret <vscale x 8 x i32> %div -} - -define <vscale x 2 x i32> @udiv_promote_i32(<vscale x 2 x i32> %a, <vscale x 2 x i32> %b) { -; CHECK-LABEL: @udiv_promote_i32 -; CHECK-DAG: ptrue p0.d -; CHECK-DAG: and z1.d, z1.d, #0xffffffff -; CHECK-DAG: and z0.d, z0.d, #0xffffffff -; CHECK-DAG: udiv z0.d, p0/m, z0.d, z1.d -; CHECK-NEXT: ret - %div = udiv <vscale x 2 x i32> %a, %b - ret <vscale x 2 x i32> %div -} - -define <vscale x 4 x i64> @udiv_split_i64(<vscale x 4 x i64> %a, <vscale x 4 x i64> %b) { -; CHECK-LABEL: @udiv_split_i64 -; CHECK-DAG: ptrue p0.d -; CHECK-DAG: udiv z0.d, p0/m, z0.d, z2.d -; CHECK-DAG: udiv z1.d, p0/m, z1.d, z3.d -; CHECK-NEXT: ret - %div = udiv <vscale x 4 x i64> %a, %b - ret <vscale x 4 x i64> %div -} - -; -; SMIN -; - -define <vscale x 16 x i8> @smin_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) { -; CHECK-LABEL: @smin_i8 -; CHECK-DAG: ptrue p0.b -; CHECK-DAG: smin z0.b, p0/m, z0.b, z1.b -; CHECK-NEXT: ret - %cmp = icmp slt <vscale x 16 x i8> %a, %b - %min = select <vscale x 16 x i1> %cmp, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b - ret <vscale x 16 x i8> %min -} - -define <vscale x 8 x i16> @smin_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) { -; CHECK-LABEL: @smin_i16 -; CHECK-DAG: ptrue p0.h -; CHECK-DAG: smin z0.h, p0/m, z0.h, z1.h -; CHECK-NEXT: ret - %cmp = icmp slt <vscale x 8 x i16> %a, %b - %min = select <vscale x 8 x i1> %cmp, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b - ret <vscale x 8 x i16> %min -} - -define <vscale x 4 x i32> @smin_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) { -; CHECK-LABEL: smin_i32: -; CHECK-DAG: ptrue p0.s -; CHECK-DAG: smin z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: ret - %cmp = icmp slt <vscale x 4 x i32> %a, %b - %min = select <vscale x 4 x i1> %cmp, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b - ret <vscale x 4 x i32> %min -} - -define <vscale x 2 x i64> @smin_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) { -; CHECK-LABEL: smin_i64: -; CHECK-DAG: ptrue p0.d -; CHECK-DAG: smin z0.d, p0/m, z0.d, z1.d -; CHECK-NEXT: ret - %cmp = icmp slt <vscale x 2 x i64> %a, %b - %min = select <vscale x 2 x i1> %cmp, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b - ret <vscale x 2 x i64> %min -} - -define <vscale x 32 x i8> @smin_split_i8(<vscale x 32 x i8> %a, <vscale x 32 x i8> %b, <vscale x 32 x i8> %c) { -; CHECK-LABEL: @smin_split_i8 -; CHECK-DAG: ptrue p0.b -; CHECK-DAG: smin z0.b, p0/m, z0.b, z2.b -; CHECK-DAG: smin z1.b, p0/m, z1.b, z3.b -; CHECK-NEXT: ret - %cmp = icmp slt <vscale x 32 x i8> %a, %b - %min = select <vscale x 32 x i1> %cmp, <vscale x 32 x i8> %a, <vscale x 32 x i8> %b - ret <vscale x 32 x i8> %min -} - -define <vscale x 32 x i16> @smin_split_i16(<vscale x 32 x i16> %a, <vscale x 32 x i16> %b, <vscale x 32 x i16> %c) { -; CHECK-LABEL: smin_split_i16: -; CHECK-DAG: ptrue p0.h -; CHECK-DAG: smin z0.h, p0/m, z0.h, z4.h -; CHECK-DAG: smin z1.h, p0/m, z1.h, z5.h -; CHECK-DAG: smin z2.h, p0/m, z2.h, z6.h -; CHECK-DAG: smin z3.h, p0/m, z3.h, z7.h -; CHECK-NEXT: ret - %cmp = icmp slt <vscale x 32 x i16> %a, %b - %min = select <vscale x 32 x i1> %cmp, <vscale x 32 x i16> %a, <vscale x 32 x i16> %b - ret <vscale x 32 x i16> %min -} - -define <vscale x 8 x i32> @smin_split_i32(<vscale x 8 x i32> %a, <vscale x 8 x i32> %b, <vscale x 8 x i32> %c) { -; CHECK-LABEL: smin_split_i32: -; CHECK-DAG: ptrue p0.s -; CHECK-DAG: smin z0.s, p0/m, z0.s, z2.s -; CHECK-DAG: smin z1.s, p0/m, z1.s, z3.s -; CHECK-NEXT: ret - %cmp = icmp slt <vscale x 8 x i32> %a, %b - %min = select <vscale x 8 x i1> %cmp, <vscale x 8 x i32> %a, <vscale x 8 x i32> %b - ret <vscale x 8 x i32> %min -} - -define <vscale x 4 x i64> @smin_split_i64(<vscale x 4 x i64> %a, <vscale x 4 x i64> %b, <vscale x 4 x i64> %c) { -; CHECK-LABEL: smin_split_i64: -; CHECK-DAG: ptrue p0.d -; CHECK-DAG: smin z0.d, p0/m, z0.d, z2.d -; CHECK-DAG: smin z1.d, p0/m, z1.d, z3.d -; CHECK-NEXT: ret - %cmp = icmp slt <vscale x 4 x i64> %a, %b - %min = select <vscale x 4 x i1> %cmp, <vscale x 4 x i64> %a, <vscale x 4 x i64> %b - ret <vscale x 4 x i64> %min -} - -define <vscale x 8 x i8> @smin_promote_i8(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b, <vscale x 8 x i8> %c) { -; CHECK-LABEL: @smin_promote_i8 -; CHECK-DAG: ptrue p0.h -; CHECK-DAG: sxtb z1.h, p0/m, z1.h -; CHECK-DAG: sxtb z0.h, p0/m, z0.h -; CHECK-DAG: smin z0.h, p0/m, z0.h, z1.h -; CHECK-NEXT: ret - %cmp = icmp slt <vscale x 8 x i8> %a, %b - %min = select <vscale x 8 x i1> %cmp, <vscale x 8 x i8> %a, <vscale x 8 x i8> %b - ret <vscale x 8 x i8> %min -} - -define <vscale x 4 x i16> @smin_promote_i16(<vscale x 4 x i16> %a, <vscale x 4 x i16> %b, <vscale x 4 x i16> %c) { -; CHECK-LABEL: @smin_promote_i16 -; CHECK-DAG: ptrue p0.s -; CHECK-DAG: sxth z1.s, p0/m, z1.s -; CHECK-DAG: sxth z0.s, p0/m, z0.s -; CHECK-DAG: smin z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: ret - %cmp = icmp slt <vscale x 4 x i16> %a, %b - %min = select <vscale x 4 x i1> %cmp, <vscale x 4 x i16> %a, <vscale x 4 x i16> %b - ret <vscale x 4 x i16> %min -} - -define <vscale x 2 x i32> @smin_promote_i32(<vscale x 2 x i32> %a, <vscale x 2 x i32> %b, <vscale x 2 x i32> %c) { -; CHECK-LABEL: @smin_promote_i32 -; CHECK-DAG: ptrue p0.d -; CHECK-DAG: sxtw z1.d, p0/m, z1.d -; CHECK-DAG: sxtw z0.d, p0/m, z0.d -; CHECK-DAG: smin z0.d, p0/m, z0.d, z1.d -; CHECK-NEXT: ret - %cmp = icmp slt <vscale x 2 x i32> %a, %b - %min = select <vscale x 2 x i1> %cmp, <vscale x 2 x i32> %a, <vscale x 2 x i32> %b - ret <vscale x 2 x i32> %min -} - -; -; UMIN -; - -define <vscale x 16 x i8> @umin_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) { -; CHECK-LABEL: @umin_i8 -; CHECK-DAG: ptrue p0.b -; CHECK-DAG: umin z0.b, p0/m, z0.b, z1.b -; CHECK-NEXT: ret - %cmp = icmp ult <vscale x 16 x i8> %a, %b - %min = select <vscale x 16 x i1> %cmp, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b - ret <vscale x 16 x i8> %min -} - -define <vscale x 8 x i16> @umin_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) { -; CHECK-LABEL: @umin_i16 -; CHECK-DAG: ptrue p0.h -; CHECK-DAG: umin z0.h, p0/m, z0.h, z1.h -; CHECK-NEXT: ret - %cmp = icmp ult <vscale x 8 x i16> %a, %b - %min = select <vscale x 8 x i1> %cmp, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b - ret <vscale x 8 x i16> %min -} - -define <vscale x 4 x i32> @umin_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) { -; CHECK-LABEL: umin_i32: -; CHECK-DAG: ptrue p0.s -; CHECK-DAG: umin z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: ret - %cmp = icmp ult <vscale x 4 x i32> %a, %b - %min = select <vscale x 4 x i1> %cmp, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b - ret <vscale x 4 x i32> %min -} - -define <vscale x 2 x i64> @umin_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) { -; CHECK-LABEL: umin_i64: -; CHECK-DAG: ptrue p0.d -; CHECK-DAG: umin z0.d, p0/m, z0.d, z1.d -; CHECK-NEXT: ret - %cmp = icmp ult <vscale x 2 x i64> %a, %b - %min = select <vscale x 2 x i1> %cmp, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b - ret <vscale x 2 x i64> %min -} - -define <vscale x 4 x i64> @umin_split_i64(<vscale x 4 x i64> %a, <vscale x 4 x i64> %b, <vscale x 4 x i64> %c) { -; CHECK-LABEL: umin_split_i64: -; CHECK-DAG: ptrue p0.d -; CHECK-DAG: umin z0.d, p0/m, z0.d, z2.d -; CHECK-DAG: umin z1.d, p0/m, z1.d, z3.d -; CHECK-NEXT: ret - %cmp = icmp ult <vscale x 4 x i64> %a, %b - %min = select <vscale x 4 x i1> %cmp, <vscale x 4 x i64> %a, <vscale x 4 x i64> %b - ret <vscale x 4 x i64> %min -} - -define <vscale x 8 x i8> @umin_promote_i8(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b, <vscale x 8 x i8> %c) { -; CHECK-LABEL: @umin_promote_i8 -; CHECK-DAG: ptrue p0.h -; CHECK-DAG: umin z0.h, p0/m, z0.h, z1.h -; CHECK-NEXT: ret - %cmp = icmp ult <vscale x 8 x i8> %a, %b - %min = select <vscale x 8 x i1> %cmp, <vscale x 8 x i8> %a, <vscale x 8 x i8> %b - ret <vscale x 8 x i8> %min -} - -; -; SMAX -; - -define <vscale x 16 x i8> @smax_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) { -; CHECK-LABEL: @smax_i8 -; CHECK-DAG: ptrue p0.b -; CHECK-DAG: smax z0.b, p0/m, z0.b, z1.b -; CHECK-NEXT: ret - %cmp = icmp sgt <vscale x 16 x i8> %a, %b - %max = select <vscale x 16 x i1> %cmp, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b - ret <vscale x 16 x i8> %max -} - -define <vscale x 8 x i16> @smax_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) { -; CHECK-LABEL: @smax_i16 -; CHECK-DAG: ptrue p0.h -; CHECK-DAG: smax z0.h, p0/m, z0.h, z1.h -; CHECK-NEXT: ret - %cmp = icmp sgt <vscale x 8 x i16> %a, %b - %max = select <vscale x 8 x i1> %cmp, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b - ret <vscale x 8 x i16> %max -} - -define <vscale x 4 x i32> @smax_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) { -; CHECK-LABEL: smax_i32: -; CHECK-DAG: ptrue p0.s -; CHECK-DAG: smax z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: ret - %cmp = icmp sgt <vscale x 4 x i32> %a, %b - %max = select <vscale x 4 x i1> %cmp, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b - ret <vscale x 4 x i32> %max -} - -define <vscale x 2 x i64> @smax_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) { -; CHECK-LABEL: smax_i64: -; CHECK-DAG: ptrue p0.d -; CHECK-DAG: smax z0.d, p0/m, z0.d, z1.d -; CHECK-NEXT: ret - %cmp = icmp sgt <vscale x 2 x i64> %a, %b - %max = select <vscale x 2 x i1> %cmp, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b - ret <vscale x 2 x i64> %max -} - -define <vscale x 8 x i32> @smax_split_i32(<vscale x 8 x i32> %a, <vscale x 8 x i32> %b, <vscale x 8 x i32> %c) { -; CHECK-LABEL: smax_split_i32: -; CHECK-DAG: ptrue p0.s -; CHECK-DAG: smax z0.s, p0/m, z0.s, z2.s -; CHECK-DAG: smax z1.s, p0/m, z1.s, z3.s -; CHECK-NEXT: ret - %cmp = icmp sgt <vscale x 8 x i32> %a, %b - %max = select <vscale x 8 x i1> %cmp, <vscale x 8 x i32> %a, <vscale x 8 x i32> %b - ret <vscale x 8 x i32> %max -} - -define <vscale x 4 x i16> @smax_promote_i16(<vscale x 4 x i16> %a, <vscale x 4 x i16> %b, <vscale x 4 x i16> %c) { -; CHECK-LABEL: @smax_promote_i16 -; CHECK-DAG: ptrue p0.s -; CHECK-DAG: smax z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: ret - %cmp = icmp sgt <vscale x 4 x i16> %a, %b - %max = select <vscale x 4 x i1> %cmp, <vscale x 4 x i16> %a, <vscale x 4 x i16> %b - ret <vscale x 4 x i16> %max -} - -; -; UMAX -; - -define <vscale x 16 x i8> @umax_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) { -; CHECK-LABEL: @umax_i8 -; CHECK-DAG: ptrue p0.b -; CHECK-DAG: umax z0.b, p0/m, z0.b, z1.b -; CHECK-NEXT: ret - %cmp = icmp ugt <vscale x 16 x i8> %a, %b - %max = select <vscale x 16 x i1> %cmp, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b - ret <vscale x 16 x i8> %max -} - -define <vscale x 8 x i16> @umax_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) { -; CHECK-LABEL: @umax_i16 -; CHECK-DAG: ptrue p0.h -; CHECK-DAG: umax z0.h, p0/m, z0.h, z1.h -; CHECK-NEXT: ret - %cmp = icmp ugt <vscale x 8 x i16> %a, %b - %max = select <vscale x 8 x i1> %cmp, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b - ret <vscale x 8 x i16> %max -} - -define <vscale x 4 x i32> @umax_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) { -; CHECK-LABEL: umax_i32: -; CHECK-DAG: ptrue p0.s -; CHECK-DAG: umax z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: ret - %cmp = icmp ugt <vscale x 4 x i32> %a, %b - %max = select <vscale x 4 x i1> %cmp, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b - ret <vscale x 4 x i32> %max -} - -define <vscale x 2 x i64> @umax_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) { -; CHECK-LABEL: umax_i64: -; CHECK-DAG: ptrue p0.d -; CHECK-DAG: umax z0.d, p0/m, z0.d, z1.d -; CHECK-NEXT: ret - %cmp = icmp ugt <vscale x 2 x i64> %a, %b - %max = select <vscale x 2 x i1> %cmp, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b - ret <vscale x 2 x i64> %max -} - -define <vscale x 16 x i16> @umax_split_i16(<vscale x 16 x i16> %a, <vscale x 16 x i16> %b, <vscale x 16 x i16> %c) { -; CHECK-LABEL: umax_split_i16: -; CHECK-DAG: ptrue p0.h -; CHECK-DAG: umax z0.h, p0/m, z0.h, z2.h -; CHECK-DAG: umax z1.h, p0/m, z1.h, z3.h -; CHECK-NEXT: ret - %cmp = icmp ugt <vscale x 16 x i16> %a, %b - %max = select <vscale x 16 x i1> %cmp, <vscale x 16 x i16> %a, <vscale x 16 x i16> %b - ret <vscale x 16 x i16> %max -} - -define <vscale x 2 x i32> @umax_promote_i32(<vscale x 2 x i32> %a, <vscale x 2 x i32> %b, <vscale x 2 x i32> %c) { -; CHECK-LABEL: @umax_promote_i32 -; CHECK-DAG: ptrue p0.d -; CHECK-DAG: umax z0.d, p0/m, z0.d, z1.d -; CHECK-NEXT: ret - %cmp = icmp ugt <vscale x 2 x i32> %a, %b - %max = select <vscale x 2 x i1> %cmp, <vscale x 2 x i32> %a, <vscale x 2 x i32> %b - ret <vscale x 2 x i32> %max -} - -; -; ASR -; - -define <vscale x 16 x i8> @asr_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b){ -; CHECK-LABEL: @asr_i8 -; CHECK-DAG: ptrue p0.b -; CHECK-DAG: asr z0.b, p0/m, z0.b, z1.b -; CHECK-NEXT: ret - %shr = ashr <vscale x 16 x i8> %a, %b - ret <vscale x 16 x i8> %shr -} - -define <vscale x 8 x i16> @asr_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b){ -; CHECK-LABEL: @asr_i16 -; CHECK-DAG: ptrue p0.h -; CHECK-DAG: asr z0.h, p0/m, z0.h, z1.h -; CHECK-NEXT: ret - %shr = ashr <vscale x 8 x i16> %a, %b - ret <vscale x 8 x i16> %shr -} - -define <vscale x 4 x i32> @asr_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b){ -; CHECK-LABEL: @asr_i32 -; CHECK-DAG: ptrue p0.s -; CHECK-DAG: asr z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: ret - %shr = ashr <vscale x 4 x i32> %a, %b - ret <vscale x 4 x i32> %shr -} - -define <vscale x 2 x i64> @asr_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b){ -; CHECK-LABEL: @asr_i64 -; CHECK-DAG: ptrue p0.d -; CHECK-DAG: asr z0.d, p0/m, z0.d, z1.d -; CHECK-NEXT: ret - %shr = ashr <vscale x 2 x i64> %a, %b - ret <vscale x 2 x i64> %shr -} - -define <vscale x 16 x i16> @asr_split_i16(<vscale x 16 x i16> %a, <vscale x 16 x i16> %b){ -; CHECK-LABEL: @asr_split_i16 -; CHECK-DAG: ptrue p0.h -; CHECK-DAG: asr z0.h, p0/m, z0.h, z2.h -; CHECK-DAG: asr z1.h, p0/m, z1.h, z3.h -; CHECK-NEXT: ret - %shr = ashr <vscale x 16 x i16> %a, %b - ret <vscale x 16 x i16> %shr -} - -define <vscale x 2 x i32> @asr_promote_i32(<vscale x 2 x i32> %a, <vscale x 2 x i32> %b){ -; CHECK-LABEL: @asr_promote_i32 -; CHECK-DAG: ptrue p0.d -; CHECK-DAG: and z1.d, z1.d, #0xffffffff -; CHECK-DAG: asr z0.d, p0/m, z0.d, z1.d -; CHECK-NEXT: ret - %shr = ashr <vscale x 2 x i32> %a, %b - ret <vscale x 2 x i32> %shr -} - -; -; LSL -; - -define <vscale x 16 x i8> @lsl_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b){ -; CHECK-LABEL: @lsl_i8 -; CHECK-DAG: ptrue p0.b -; CHECK-DAG: lsl z0.b, p0/m, z0.b, z1.b -; CHECK-NEXT: ret - %shl = shl <vscale x 16 x i8> %a, %b - ret <vscale x 16 x i8> %shl -} - -define <vscale x 8 x i16> @lsl_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b){ -; CHECK-LABEL: @lsl_i16 -; CHECK-DAG: ptrue p0.h -; CHECK-DAG: lsl z0.h, p0/m, z0.h, z1.h -; CHECK-NEXT: ret - %shl = shl <vscale x 8 x i16> %a, %b - ret <vscale x 8 x i16> %shl -} - -define <vscale x 4 x i32> @lsl_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b){ -; CHECK-LABEL: @lsl_i32 -; CHECK-DAG: ptrue p0.s -; CHECK-DAG: lsl z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: ret - %shl = shl <vscale x 4 x i32> %a, %b - ret <vscale x 4 x i32> %shl -} - -define <vscale x 2 x i64> @lsl_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b){ -; CHECK-LABEL: @lsl_i64 -; CHECK-DAG: ptrue p0.d -; CHECK-DAG: lsl z0.d, p0/m, z0.d, z1.d -; CHECK-NEXT: ret - %shl = shl <vscale x 2 x i64> %a, %b - ret <vscale x 2 x i64> %shl -} - -define <vscale x 4 x i64> @lsl_split_i64(<vscale x 4 x i64> %a, <vscale x 4 x i64> %b){ -; CHECK-LABEL: @lsl_split_i64 -; CHECK-DAG: ptrue p0.d -; CHECK-DAG: lsl z0.d, p0/m, z0.d, z2.d -; CHECK-DAG: lsl z1.d, p0/m, z1.d, z3.d -; CHECK-NEXT: ret - %shl = shl <vscale x 4 x i64> %a, %b - ret <vscale x 4 x i64> %shl -} - -define <vscale x 4 x i16> @lsl_promote_i16(<vscale x 4 x i16> %a, <vscale x 4 x i16> %b){ -; CHECK-LABEL: @lsl_promote_i16 -; CHECK-DAG: ptrue p0.s -; CHECK-DAG: and z1.s, z1.s, #0xffff -; CHECK-DAG: lsl z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: ret - %shl = shl <vscale x 4 x i16> %a, %b - ret <vscale x 4 x i16> %shl -} - -; -; LSR -; - -define <vscale x 16 x i8> @lsr_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b){ -; CHECK-LABEL: @lsr_i8 -; CHECK-DAG: ptrue p0.b -; CHECK-DAG: lsr z0.b, p0/m, z0.b, z1.b -; CHECK-NEXT: ret - %shr = lshr <vscale x 16 x i8> %a, %b - ret <vscale x 16 x i8> %shr -} - -define <vscale x 8 x i16> @lsr_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b){ -; CHECK-LABEL: @lsr_i16 -; CHECK-DAG: ptrue p0.h -; CHECK-DAG: lsr z0.h, p0/m, z0.h, z1.h -; CHECK-NEXT: ret - %shr = lshr <vscale x 8 x i16> %a, %b - ret <vscale x 8 x i16> %shr -} - -define <vscale x 4 x i32> @lsr_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b){ -; CHECK-LABEL: @lsr_i32 -; CHECK-DAG: ptrue p0.s -; CHECK-DAG: lsr z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: ret - %shr = lshr <vscale x 4 x i32> %a, %b - ret <vscale x 4 x i32> %shr -} - -define <vscale x 2 x i64> @lsr_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b){ -; CHECK-LABEL: @lsr_i64 -; CHECK-DAG: ptrue p0.d -; CHECK-DAG: lsr z0.d, p0/m, z0.d, z1.d -; CHECK-NEXT: ret - %shr = lshr <vscale x 2 x i64> %a, %b - ret <vscale x 2 x i64> %shr -} - -define <vscale x 8 x i8> @lsr_promote_i8(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b){ -; CHECK-LABEL: @lsr_promote_i8 -; CHECK-DAG: ptrue p0.h -; CHECK-DAG: and z1.h, z1.h, #0xff -; CHECK-DAG: lsr z0.h, p0/m, z0.h, z1.h -; CHECK-NEXT: ret - %shr = lshr <vscale x 8 x i8> %a, %b - ret <vscale x 8 x i8> %shr -} - -define <vscale x 8 x i32> @lsr_split_i32(<vscale x 8 x i32> %a, <vscale x 8 x i32> %b){ -; CHECK-LABEL: @lsr_split_i32 -; CHECK-DAG: ptrue p0.s -; CHECK-DAG: lsr z0.s, p0/m, z0.s, z2.s -; CHECK-DAG: lsr z1.s, p0/m, z1.s, z3.s -; CHECK-NEXT: ret - %shr = lshr <vscale x 8 x i32> %a, %b - ret <vscale x 8 x i32> %shr -} Index: llvm/lib/Target/AArch64/AArch64ISelLowering.h =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -879,6 +879,10 @@ SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override; + void ReplaceExtensionResults(SDNode *N, SmallVectorImpl<SDValue> &Results, + SelectionDAG &DAG, unsigned HiOpcode, + unsigned LoOpcode) const; + void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const override; Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -890,6 +890,14 @@ setOperationAction(ISD::SHL, VT, Custom); setOperationAction(ISD::SRL, VT, Custom); setOperationAction(ISD::SRA, VT, Custom); + } else { + if (VT.getVectorElementType() != MVT::i1) { + // Use UNPK{LO,HI} sequences to lower extensions from legal SVE + // types to wider-than-legal types. + setOperationAction(ISD::SIGN_EXTEND, VT, Custom); + setOperationAction(ISD::ZERO_EXTEND, VT, Custom); + setOperationAction(ISD::ANY_EXTEND, VT, Custom); + } } } setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom); @@ -3290,6 +3298,15 @@ default: llvm_unreachable("unimplemented operand"); return SDValue(); + case ISD::ANY_EXTEND: + case ISD::SIGN_EXTEND: + case ISD::ZERO_EXTEND: + // Needed because we have selected custom lowering for illegal SVE types. + // The cases we actually want to handle are where the operand is legal + // and the result isn't, which go through ReplaceNodeResults instead. + // This code only sees cases where the result is legal and the operand + // isn't. + return SDValue(); case ISD::BITCAST: return LowerBITCAST(Op, DAG); case ISD::GlobalAddress: @@ -13609,6 +13626,52 @@ Results.push_back(SplitVal); } +// If the node is an extension from a legal SVE type to something wider, +// use HiOpcode and LoOpcode to extend each half individually, then +// concatenate them together. +void AArch64TargetLowering::ReplaceExtensionResults( + SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG, + unsigned HiOpcode, unsigned LoOpcode) const { + SDLoc DL(N); + SDValue In = N->getOperand(0); + EVT InVT = In.getValueType(); + + assert(InVT.isScalableVector() && "Can only lower scalable vectors"); + + if (!isTypeLegal(InVT)) + return; + + EVT InEltVT = InVT.getVectorElementType(); + auto EltCnt = InVT.getVectorElementCount(); + unsigned InEltBits = InEltVT.getSizeInBits(); + + if (InEltBits != 8 && InEltBits != 16 && InEltBits != 32) + return; + + // The result must be at least twice as wide as the input in order for + // this to work. + EVT VT = N->getValueType(0); + EVT EltVT = VT.getVectorElementType(); + if (EltVT.getSizeInBits() < InEltBits * 2) + return; + + // Extend In to a double-width vector. + EVT NewEltVT = EVT::getIntegerVT(*DAG.getContext(), InEltBits * 2); + EVT NewVT = EVT::getVectorVT(*DAG.getContext(), NewEltVT, EltCnt / 2); + assert(isTypeLegal(NewVT) && "Extension result should be legal"); + + SDValue Lo = DAG.getNode(LoOpcode, DL, NewVT, In); + SDValue Hi = DAG.getNode(HiOpcode, DL, NewVT, In); + + // If necessary, extend again using the original code. Such extensions + // will also need legalizing, but at least we're making forward progress. + NewVT = EVT::getVectorVT(*DAG.getContext(), EltVT, EltCnt / 2); + Lo = DAG.getNode(N->getOpcode(), DL, NewVT, Lo); + Hi = DAG.getNode(N->getOpcode(), DL, NewVT, Hi); + + Results.push_back(DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi)); +} + static std::pair<SDValue, SDValue> splitInt128(SDValue N, SelectionDAG &DAG) { SDLoc DL(N); SDValue Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, N); @@ -13749,6 +13812,15 @@ case ISD::ATOMIC_CMP_SWAP: ReplaceCMP_SWAP_128Results(N, Results, DAG, Subtarget); return; + case ISD::SIGN_EXTEND: + ReplaceExtensionResults(N, Results, DAG, + AArch64ISD::SUNPKHI, AArch64ISD::SUNPKLO); + return; + case ISD::ZERO_EXTEND: + case ISD::ANY_EXTEND: + ReplaceExtensionResults(N, Results, DAG, + AArch64ISD::UUNPKHI, AArch64ISD::UUNPKLO); + return; case ISD::LOAD: { assert(SDValue(N, 0).getValueType() == MVT::i128 && "unexpected load's value type");
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits