kmclaughlin created this revision. kmclaughlin added reviewers: sdesmalen, huntergr, dancgr. Herald added subscribers: psnobl, rkruppe, hiraditya, kristof.beyls, tschuett. Herald added a project: LLVM.
Adds intrinsics for the following: - ftssel - fcadd, fcmla - fmla, fmls, fnmla, fnmls - fmad, fmsb, fnmad, fnmsb Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D69707 Files: llvm/include/llvm/IR/IntrinsicsAArch64.td llvm/lib/Target/AArch64/AArch64InstrFormats.td llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td llvm/lib/Target/AArch64/SVEInstrFormats.td llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith.ll
Index: llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith.ll +++ llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith.ll @@ -69,6 +69,111 @@ } ; +; FCADD +; + +define <vscale x 8 x half> @fcadd_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) { +; CHECK-LABEL: fcadd_h: +; CHECK: fcadd z0.h, p0/m, z0.h, z1.h, #90 +; CHECK-NEXT: ret + %out = call <vscale x 8 x half> @llvm.aarch64.sve.fcadd.nxv8f16(<vscale x 8 x i1> %pg, + <vscale x 8 x half> %a, + <vscale x 8 x half> %b, + i32 90) + ret <vscale x 8 x half> %out +} + +define <vscale x 4 x float> @fcadd_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) { +; CHECK-LABEL: fcadd_s: +; CHECK: fcadd z0.s, p0/m, z0.s, z1.s, #270 +; CHECK-NEXT: ret + %out = call <vscale x 4 x float> @llvm.aarch64.sve.fcadd.nxv4f32(<vscale x 4 x i1> %pg, + <vscale x 4 x float> %a, + <vscale x 4 x float> %b, + i32 270) + ret <vscale x 4 x float> %out +} + +define <vscale x 2 x double> @fcadd_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) { +; CHECK-LABEL: fcadd_d: +; CHECK: fcadd z0.d, p0/m, z0.d, z1.d, #90 +; CHECK-NEXT: ret + %out = call <vscale x 2 x double> @llvm.aarch64.sve.fcadd.nxv2f64(<vscale x 2 x i1> %pg, + <vscale x 2 x double> %a, + <vscale x 2 x double> %b, + i32 90) + ret <vscale x 2 x double> %out +} + +; +; FCMLA +; + +define <vscale x 8 x half> @fcmla_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) { +; CHECK-LABEL: fcmla_h: +; CHECK: fcmla z0.h, p0/m, z1.h, z2.h, #90 +; CHECK-NEXT: ret + %out = call <vscale x 8 x half> @llvm.aarch64.sve.fcmla.nxv8f16(<vscale x 8 x i1> %pg, + <vscale x 8 x half> %a, + <vscale x 8 x half> %b, + <vscale x 8 x half> %c, + i32 90) + ret <vscale x 8 x half> %out +} + +define <vscale x 4 x float> @fcmla_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) { +; CHECK-LABEL: fcmla_s: +; CHECK: fcmla z0.s, p0/m, z1.s, z2.s, #180 +; CHECK-NEXT: ret + %out = call <vscale x 4 x float> @llvm.aarch64.sve.fcmla.nxv4f32(<vscale x 4 x i1> %pg, + <vscale x 4 x float> %a, + <vscale x 4 x float> %b, + <vscale x 4 x float> %c, + i32 180) + ret <vscale x 4 x float> %out +} + +define <vscale x 2 x double> @fcmla_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) { +; CHECK-LABEL: fcmla_d: +; CHECK: fcmla z0.d, p0/m, z1.d, z2.d, #270 +; CHECK-NEXT: ret + %out = call <vscale x 2 x double> @llvm.aarch64.sve.fcmla.nxv2f64(<vscale x 2 x i1> %pg, + <vscale x 2 x double> %a, + <vscale x 2 x double> %b, + <vscale x 2 x double> %c, + i32 270) + ret <vscale x 2 x double> %out +} + +; +; FCMLA (Indexed) +; + +define <vscale x 8 x half> @fcmla_lane_h(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) { +; CHECK-LABEL: fcmla_lane_h: +; CHECK: fcmla z0.h, z1.h, z2.h[3], #0 +; CHECK-NEXT: ret + %out = call <vscale x 8 x half> @llvm.aarch64.sve.fcmla.lane.nxv8f16(<vscale x 8 x half> %a, + <vscale x 8 x half> %b, + <vscale x 8 x half> %c, + i32 3, + i32 0) + ret <vscale x 8 x half> %out +} + +define <vscale x 4 x float> @fcmla_lane_s(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) { +; CHECK-LABEL: fcmla_lane_s: +; CHECK: fcmla z0.s, z1.s, z2.s[1], #90 +; CHECK-NEXT: ret + %out = call <vscale x 4 x float> @llvm.aarch64.sve.fcmla.lane.nxv4f32(<vscale x 4 x float> %a, + <vscale x 4 x float> %b, + <vscale x 4 x float> %c, + i32 1, + i32 90) + ret <vscale x 4 x float> %out +} + +; ; FDIV ; @@ -137,6 +242,43 @@ } ; +; FMAD +; + +define <vscale x 8 x half> @fmad_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) { +; CHECK-LABEL: fmad_h: +; CHECK: fmad z0.h, p0/m, z1.h, z2.h +; CHECK-NEXT: ret + %out = call <vscale x 8 x half> @llvm.aarch64.sve.fmad.nxv8f16(<vscale x 8 x i1> %pg, + <vscale x 8 x half> %a, + <vscale x 8 x half> %b, + <vscale x 8 x half> %c) + ret <vscale x 8 x half> %out +} + +define <vscale x 4 x float> @fmad_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) { +; CHECK-LABEL: fmad_s: +; CHECK: fmad z0.s, p0/m, z1.s, z2.s +; CHECK-NEXT: ret + %out = call <vscale x 4 x float> @llvm.aarch64.sve.fmad.nxv4f32(<vscale x 4 x i1> %pg, + <vscale x 4 x float> %a, + <vscale x 4 x float> %b, + <vscale x 4 x float> %c) + ret <vscale x 4 x float> %out +} + +define <vscale x 2 x double> @fmad_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) { +; CHECK-LABEL: fmad_d: +; CHECK: fmad z0.d, p0/m, z1.d, z2.d +; CHECK-NEXT: ret + %out = call <vscale x 2 x double> @llvm.aarch64.sve.fmad.nxv2f64(<vscale x 2 x i1> %pg, + <vscale x 2 x double> %a, + <vscale x 2 x double> %b, + <vscale x 2 x double> %c) + ret <vscale x 2 x double> %out +} + +; ; FMAX ; @@ -273,6 +415,191 @@ } ; +; FMLA +; + +define <vscale x 8 x half> @fmla_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) { +; CHECK-LABEL: fmla_h: +; CHECK: fmla z0.h, p0/m, z1.h, z2.h +; CHECK-NEXT: ret + %out = call <vscale x 8 x half> @llvm.aarch64.sve.fmla.nxv8f16(<vscale x 8 x i1> %pg, + <vscale x 8 x half> %a, + <vscale x 8 x half> %b, + <vscale x 8 x half> %c) + ret <vscale x 8 x half> %out +} + +define <vscale x 4 x float> @fmla_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) { +; CHECK-LABEL: fmla_s: +; CHECK: fmla z0.s, p0/m, z1.s, z2.s +; CHECK-NEXT: ret + %out = call <vscale x 4 x float> @llvm.aarch64.sve.fmla.nxv4f32(<vscale x 4 x i1> %pg, + <vscale x 4 x float> %a, + <vscale x 4 x float> %b, + <vscale x 4 x float> %c) + ret <vscale x 4 x float> %out +} + +define <vscale x 2 x double> @fmla_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) { +; CHECK-LABEL: fmla_d: +; CHECK: fmla z0.d, p0/m, z1.d, z2.d +; CHECK-NEXT: ret + %out = call <vscale x 2 x double> @llvm.aarch64.sve.fmla.nxv2f64(<vscale x 2 x i1> %pg, + <vscale x 2 x double> %a, + <vscale x 2 x double> %b, + <vscale x 2 x double> %c) + ret <vscale x 2 x double> %out +} + +; +; FMLA (Indexed) +; + +define <vscale x 8 x half> @fmla_lane_h(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) { +; CHECK-LABEL: fmla_lane_h: +; CHECK: fmla z0.h, z1.h, z2.h[3] +; CHECK-NEXT: ret + %out = call <vscale x 8 x half> @llvm.aarch64.sve.fmla.lane.nxv8f16(<vscale x 8 x half> %a, + <vscale x 8 x half> %b, + <vscale x 8 x half> %c, + i32 3) + ret <vscale x 8 x half> %out +} + +define <vscale x 4 x float> @fmla_lane_s(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) { +; CHECK-LABEL: fmla_lane_s: +; CHECK: fmla z0.s, z1.s, z2.s[2] +; CHECK-NEXT: ret + %out = call <vscale x 4 x float> @llvm.aarch64.sve.fmla.lane.nxv4f32(<vscale x 4 x float> %a, + <vscale x 4 x float> %b, + <vscale x 4 x float> %c, + i32 2) + ret <vscale x 4 x float> %out +} + +define <vscale x 2 x double> @fmla_lane_d(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) { +; CHECK-LABEL: fmla_lane_d: +; CHECK: fmla z0.d, z1.d, z2.d[1] +; CHECK-NEXT: ret + %out = call <vscale x 2 x double> @llvm.aarch64.sve.fmla.lane.nxv2f64(<vscale x 2 x double> %a, + <vscale x 2 x double> %b, + <vscale x 2 x double> %c, + i32 1) + ret <vscale x 2 x double> %out +} + +; +; FMLS +; + +define <vscale x 8 x half> @fmls_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) { +; CHECK-LABEL: fmls_h: +; CHECK: fmls z0.h, p0/m, z1.h, z2.h +; CHECK-NEXT: ret + %out = call <vscale x 8 x half> @llvm.aarch64.sve.fmls.nxv8f16(<vscale x 8 x i1> %pg, + <vscale x 8 x half> %a, + <vscale x 8 x half> %b, + <vscale x 8 x half> %c) + ret <vscale x 8 x half> %out +} + +define <vscale x 4 x float> @fmls_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) { +; CHECK-LABEL: fmls_s: +; CHECK: fmls z0.s, p0/m, z1.s, z2.s +; CHECK-NEXT: ret + %out = call <vscale x 4 x float> @llvm.aarch64.sve.fmls.nxv4f32(<vscale x 4 x i1> %pg, + <vscale x 4 x float> %a, + <vscale x 4 x float> %b, + <vscale x 4 x float> %c) + ret <vscale x 4 x float> %out +} + +define <vscale x 2 x double> @fmls_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) { +; CHECK-LABEL: fmls_d: +; CHECK: fmls z0.d, p0/m, z1.d, z2.d +; CHECK-NEXT: ret + %out = call <vscale x 2 x double> @llvm.aarch64.sve.fmls.nxv2f64(<vscale x 2 x i1> %pg, + <vscale x 2 x double> %a, + <vscale x 2 x double> %b, + <vscale x 2 x double> %c) + ret <vscale x 2 x double> %out +} + +; +; FMLS (Indexed) +; + +define <vscale x 8 x half> @fmls_lane_h(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) { +; CHECK-LABEL: fmls_lane_h: +; CHECK: fmls z0.h, z1.h, z2.h[3] +; CHECK-NEXT: ret + %out = call <vscale x 8 x half> @llvm.aarch64.sve.fmls.lane.nxv8f16(<vscale x 8 x half> %a, + <vscale x 8 x half> %b, + <vscale x 8 x half> %c, + i32 3) + ret <vscale x 8 x half> %out +} + +define <vscale x 4 x float> @fmls_lane_s(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) { +; CHECK-LABEL: fmls_lane_s: +; CHECK: fmls z0.s, z1.s, z2.s[2] +; CHECK-NEXT: ret + %out = call <vscale x 4 x float> @llvm.aarch64.sve.fmls.lane.nxv4f32(<vscale x 4 x float> %a, + <vscale x 4 x float> %b, + <vscale x 4 x float> %c, + i32 2) + ret <vscale x 4 x float> %out +} + +define <vscale x 2 x double> @fmls_lane_d(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) { +; CHECK-LABEL: fmls_lane_d: +; CHECK: fmls z0.d, z1.d, z2.d[1] +; CHECK-NEXT: ret + %out = call <vscale x 2 x double> @llvm.aarch64.sve.fmls.lane.nxv2f64(<vscale x 2 x double> %a, + <vscale x 2 x double> %b, + <vscale x 2 x double> %c, + i32 1) + ret <vscale x 2 x double> %out +} + +; +; FMSB +; + +define <vscale x 8 x half> @fmsb_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) { +; CHECK-LABEL: fmsb_h: +; CHECK: fmsb z0.h, p0/m, z1.h, z2.h +; CHECK-NEXT: ret + %out = call <vscale x 8 x half> @llvm.aarch64.sve.fmsb.nxv8f16(<vscale x 8 x i1> %pg, + <vscale x 8 x half> %a, + <vscale x 8 x half> %b, + <vscale x 8 x half> %c) + ret <vscale x 8 x half> %out +} + +define <vscale x 4 x float> @fmsb_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) { +; CHECK-LABEL: fmsb_s: +; CHECK: fmsb z0.s, p0/m, z1.s, z2.s +; CHECK-NEXT: ret + %out = call <vscale x 4 x float> @llvm.aarch64.sve.fmsb.nxv4f32(<vscale x 4 x i1> %pg, + <vscale x 4 x float> %a, + <vscale x 4 x float> %b, + <vscale x 4 x float> %c) + ret <vscale x 4 x float> %out +} + +define <vscale x 2 x double> @fmsb_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) { +; CHECK-LABEL: fmsb_d: +; CHECK: fmsb z0.d, p0/m, z1.d, z2.d +; CHECK-NEXT: ret + %out = call <vscale x 2 x double> @llvm.aarch64.sve.fmsb.nxv2f64(<vscale x 2 x i1> %pg, + <vscale x 2 x double> %a, + <vscale x 2 x double> %b, + <vscale x 2 x double> %c) + ret <vscale x 2 x double> %out +} + +; ; FMUL ; @@ -307,6 +634,40 @@ } ; +; FMUL (Indexed) +; + +define <vscale x 8 x half> @fmul_lane_h(<vscale x 8 x half> %a, <vscale x 8 x half> %b) { +; CHECK-LABEL: fmul_lane_h: +; CHECK: fmul z0.h, z0.h, z1.h[3] +; CHECK-NEXT: ret + %out = call <vscale x 8 x half> @llvm.aarch64.sve.fmul.lane.nxv8f16(<vscale x 8 x half> %a, + <vscale x 8 x half> %b, + i32 3) + ret <vscale x 8 x half> %out +} + +define <vscale x 4 x float> @fmul_lane_s(<vscale x 4 x float> %a, <vscale x 4 x float> %b) { +; CHECK-LABEL: fmul_lane_s: +; CHECK: fmul z0.s, z0.s, z1.s[2] +; CHECK-NEXT: ret + %out = call <vscale x 4 x float> @llvm.aarch64.sve.fmul.lane.nxv4f32(<vscale x 4 x float> %a, + <vscale x 4 x float> %b, + i32 2) + ret <vscale x 4 x float> %out +} + +define <vscale x 2 x double> @fmul_lane_d(<vscale x 2 x double> %a, <vscale x 2 x double> %b) { +; CHECK-LABEL: fmul_lane_d: +; CHECK: fmul z0.d, z0.d, z1.d[1] +; CHECK-NEXT: ret + %out = call <vscale x 2 x double> @llvm.aarch64.sve.fmul.lane.nxv2f64(<vscale x 2 x double> %a, + <vscale x 2 x double> %b, + i32 1) + ret <vscale x 2 x double> %out +} + +; ; FMULX ; @@ -375,6 +736,154 @@ } ; +; FNMAD +; + +define <vscale x 8 x half> @fnmad_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) { +; CHECK-LABEL: fnmad_h: +; CHECK: fnmad z0.h, p0/m, z1.h, z2.h +; CHECK-NEXT: ret + %out = call <vscale x 8 x half> @llvm.aarch64.sve.fnmad.nxv8f16(<vscale x 8 x i1> %pg, + <vscale x 8 x half> %a, + <vscale x 8 x half> %b, + <vscale x 8 x half> %c) + ret <vscale x 8 x half> %out +} + +define <vscale x 4 x float> @fnmad_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) { +; CHECK-LABEL: fnmad_s: +; CHECK: fnmad z0.s, p0/m, z1.s, z2.s +; CHECK-NEXT: ret + %out = call <vscale x 4 x float> @llvm.aarch64.sve.fnmad.nxv4f32(<vscale x 4 x i1> %pg, + <vscale x 4 x float> %a, + <vscale x 4 x float> %b, + <vscale x 4 x float> %c) + ret <vscale x 4 x float> %out +} + +define <vscale x 2 x double> @fnmad_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) { +; CHECK-LABEL: fnmad_d: +; CHECK: fnmad z0.d, p0/m, z1.d, z2.d +; CHECK-NEXT: ret + %out = call <vscale x 2 x double> @llvm.aarch64.sve.fnmad.nxv2f64(<vscale x 2 x i1> %pg, + <vscale x 2 x double> %a, + <vscale x 2 x double> %b, + <vscale x 2 x double> %c) + ret <vscale x 2 x double> %out +} + +; +; FNMLA +; + +define <vscale x 8 x half> @fnmla_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) { +; CHECK-LABEL: fnmla_h: +; CHECK: fnmla z0.h, p0/m, z1.h, z2.h +; CHECK-NEXT: ret + %out = call <vscale x 8 x half> @llvm.aarch64.sve.fnmla.nxv8f16(<vscale x 8 x i1> %pg, + <vscale x 8 x half> %a, + <vscale x 8 x half> %b, + <vscale x 8 x half> %c) + ret <vscale x 8 x half> %out +} + +define <vscale x 4 x float> @fnmla_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) { +; CHECK-LABEL: fnmla_s: +; CHECK: fnmla z0.s, p0/m, z1.s, z2.s +; CHECK-NEXT: ret + %out = call <vscale x 4 x float> @llvm.aarch64.sve.fnmla.nxv4f32(<vscale x 4 x i1> %pg, + <vscale x 4 x float> %a, + <vscale x 4 x float> %b, + <vscale x 4 x float> %c) + ret <vscale x 4 x float> %out +} + +define <vscale x 2 x double> @fnmla_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) { +; CHECK-LABEL: fnmla_d: +; CHECK: fnmla z0.d, p0/m, z1.d, z2.d +; CHECK-NEXT: ret + %out = call <vscale x 2 x double> @llvm.aarch64.sve.fnmla.nxv2f64(<vscale x 2 x i1> %pg, + <vscale x 2 x double> %a, + <vscale x 2 x double> %b, + <vscale x 2 x double> %c) + ret <vscale x 2 x double> %out +} + +; +; FNMLS +; + +define <vscale x 8 x half> @fnmls_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) { +; CHECK-LABEL: fnmls_h: +; CHECK: fnmls z0.h, p0/m, z1.h, z2.h +; CHECK-NEXT: ret + %out = call <vscale x 8 x half> @llvm.aarch64.sve.fnmls.nxv8f16(<vscale x 8 x i1> %pg, + <vscale x 8 x half> %a, + <vscale x 8 x half> %b, + <vscale x 8 x half> %c) + ret <vscale x 8 x half> %out +} + +define <vscale x 4 x float> @fnmls_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) { +; CHECK-LABEL: fnmls_s: +; CHECK: fnmls z0.s, p0/m, z1.s, z2.s +; CHECK-NEXT: ret + %out = call <vscale x 4 x float> @llvm.aarch64.sve.fnmls.nxv4f32(<vscale x 4 x i1> %pg, + <vscale x 4 x float> %a, + <vscale x 4 x float> %b, + <vscale x 4 x float> %c) + ret <vscale x 4 x float> %out +} + +define <vscale x 2 x double> @fnmls_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) { +; CHECK-LABEL: fnmls_d: +; CHECK: fnmls z0.d, p0/m, z1.d, z2.d +; CHECK-NEXT: ret + %out = call <vscale x 2 x double> @llvm.aarch64.sve.fnmls.nxv2f64(<vscale x 2 x i1> %pg, + <vscale x 2 x double> %a, + <vscale x 2 x double> %b, + <vscale x 2 x double> %c) + ret <vscale x 2 x double> %out +} + +; +; FNMSB +; + +define <vscale x 8 x half> @fnmsb_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) { +; CHECK-LABEL: fnmsb_h: +; CHECK: fnmsb z0.h, p0/m, z1.h, z2.h +; CHECK-NEXT: ret + %out = call <vscale x 8 x half> @llvm.aarch64.sve.fnmsb.nxv8f16(<vscale x 8 x i1> %pg, + <vscale x 8 x half> %a, + <vscale x 8 x half> %b, + <vscale x 8 x half> %c) + ret <vscale x 8 x half> %out +} + +define <vscale x 4 x float> @fnmsb_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) { +; CHECK-LABEL: fnmsb_s: +; CHECK: fnmsb z0.s, p0/m, z1.s, z2.s +; CHECK-NEXT: ret + %out = call <vscale x 4 x float> @llvm.aarch64.sve.fnmsb.nxv4f32(<vscale x 4 x i1> %pg, + <vscale x 4 x float> %a, + <vscale x 4 x float> %b, + <vscale x 4 x float> %c) + ret <vscale x 4 x float> %out +} + +define <vscale x 2 x double> @fnmsb_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) { +; CHECK-LABEL: fnmsb_d: +; CHECK: fnmsb z0.d, p0/m, z1.d, z2.d +; CHECK-NEXT: ret + %out = call <vscale x 2 x double> @llvm.aarch64.sve.fnmsb.nxv2f64(<vscale x 2 x i1> %pg, + <vscale x 2 x double> %a, + <vscale x 2 x double> %b, + <vscale x 2 x double> %c) + ret <vscale x 2 x double> %out +} + +; ; FSUB ; @@ -443,6 +952,40 @@ } ; +; FTMAD +; + +define <vscale x 8 x half> @ftmad_h(<vscale x 8 x half> %a, <vscale x 8 x half> %b) { +; CHECK-LABEL: ftmad_h: +; CHECK: ftmad z0.h, z0.h, z1.h, #0 +; CHECK-NEXT: ret + %out = call <vscale x 8 x half> @llvm.aarch64.sve.ftmad.x.nxv8f16(<vscale x 8 x half> %a, + <vscale x 8 x half> %b, + i32 0) + ret <vscale x 8 x half> %out +} + +define <vscale x 4 x float> @ftmad_s(<vscale x 4 x float> %a, <vscale x 4 x float> %b) { +; CHECK-LABEL: ftmad_s: +; CHECK: ftmad z0.s, z0.s, z1.s, #0 +; CHECK-NEXT: ret + %out = call <vscale x 4 x float> @llvm.aarch64.sve.ftmad.x.nxv4f32(<vscale x 4 x float> %a, + <vscale x 4 x float> %b, + i32 0) + ret <vscale x 4 x float> %out +} + +define <vscale x 2 x double> @ftmad_d(<vscale x 2 x double> %a, <vscale x 2 x double> %b) { +; CHECK-LABEL: ftmad_d: +; CHECK: ftmad z0.d, z0.d, z1.d, #7 +; CHECK-NEXT: ret + %out = call <vscale x 2 x double> @llvm.aarch64.sve.ftmad.x.nxv2f64(<vscale x 2 x double> %a, + <vscale x 2 x double> %b, + i32 7) + ret <vscale x 2 x double> %out +} + +; ; FTSMUL ; @@ -473,6 +1016,37 @@ ret <vscale x 2 x double> %out } +; +; FTSSEL +; + +define <vscale x 8 x half> @ftssel_h(<vscale x 8 x half> %a, <vscale x 8 x i16> %b) { +; CHECK-LABEL: ftssel_h: +; CHECK: ftssel z0.h, z0.h, z1.h +; CHECK-NEXT: ret + %out = call <vscale x 8 x half> @llvm.aarch64.sve.ftssel.x.nxv8f16(<vscale x 8 x half> %a, + <vscale x 8 x i16> %b) + ret <vscale x 8 x half> %out +} + +define <vscale x 4 x float> @ftssel_s(<vscale x 4 x float> %a, <vscale x 4 x i32> %b) { +; CHECK-LABEL: ftssel_s: +; CHECK: ftssel z0.s, z0.s, z1.s +; CHECK-NEXT: ret + %out = call <vscale x 4 x float> @llvm.aarch64.sve.ftssel.x.nxv4f32(<vscale x 4 x float> %a, + <vscale x 4 x i32> %b) + ret <vscale x 4 x float> %out +} + +define <vscale x 2 x double> @ftssel_d(<vscale x 2 x double> %a, <vscale x 2 x i64> %b) { +; CHECK-LABEL: ftssel_d: +; CHECK: ftssel z0.d, z0.d, z1.d +; CHECK-NEXT: ret + %out = call <vscale x 2 x double> @llvm.aarch64.sve.ftssel.x.nxv2f64(<vscale x 2 x double> %a, + <vscale x 2 x i64> %b) + ret <vscale x 2 x double> %out +} + declare <vscale x 8 x half> @llvm.aarch64.sve.fabd.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>) declare <vscale x 4 x float> @llvm.aarch64.sve.fabd.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>) declare <vscale x 2 x double> @llvm.aarch64.sve.fabd.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>) @@ -481,6 +1055,17 @@ declare <vscale x 4 x float> @llvm.aarch64.sve.fadd.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>) declare <vscale x 2 x double> @llvm.aarch64.sve.fadd.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>) +declare <vscale x 8 x half> @llvm.aarch64.sve.fcadd.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>, i32) +declare <vscale x 4 x float> @llvm.aarch64.sve.fcadd.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>, i32) +declare <vscale x 2 x double> @llvm.aarch64.sve.fcadd.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>, i32) + +declare <vscale x 8 x half> @llvm.aarch64.sve.fcmla.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, i32) +declare <vscale x 4 x float> @llvm.aarch64.sve.fcmla.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, i32) +declare <vscale x 2 x double> @llvm.aarch64.sve.fcmla.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, i32) + +declare <vscale x 8 x half> @llvm.aarch64.sve.fcmla.lane.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, i32, i32) +declare <vscale x 4 x float> @llvm.aarch64.sve.fcmla.lane.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, i32, i32) + declare <vscale x 8 x half> @llvm.aarch64.sve.fdiv.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>) declare <vscale x 4 x float> @llvm.aarch64.sve.fdiv.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>) declare <vscale x 2 x double> @llvm.aarch64.sve.fdiv.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>) @@ -489,6 +1074,10 @@ declare <vscale x 4 x float> @llvm.aarch64.sve.fdivr.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>) declare <vscale x 2 x double> @llvm.aarch64.sve.fdivr.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>) +declare <vscale x 8 x half> @llvm.aarch64.sve.fmad.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>) +declare <vscale x 4 x float> @llvm.aarch64.sve.fmad.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>) +declare <vscale x 2 x double> @llvm.aarch64.sve.fmad.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>) + declare <vscale x 8 x half> @llvm.aarch64.sve.fmax.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>) declare <vscale x 4 x float> @llvm.aarch64.sve.fmax.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>) declare <vscale x 2 x double> @llvm.aarch64.sve.fmax.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>) @@ -505,14 +1094,54 @@ declare <vscale x 4 x float> @llvm.aarch64.sve.fminnm.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>) declare <vscale x 2 x double> @llvm.aarch64.sve.fminnm.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>) +declare <vscale x 8 x half> @llvm.aarch64.sve.fmla.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>) +declare <vscale x 4 x float> @llvm.aarch64.sve.fmla.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>) +declare <vscale x 2 x double> @llvm.aarch64.sve.fmla.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>) + +declare <vscale x 8 x half> @llvm.aarch64.sve.fmla.lane.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, i32) +declare <vscale x 4 x float> @llvm.aarch64.sve.fmla.lane.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, i32) +declare <vscale x 2 x double> @llvm.aarch64.sve.fmla.lane.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, i32) + +declare <vscale x 8 x half> @llvm.aarch64.sve.fmls.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>) +declare <vscale x 4 x float> @llvm.aarch64.sve.fmls.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>) +declare <vscale x 2 x double> @llvm.aarch64.sve.fmls.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>) + +declare <vscale x 8 x half> @llvm.aarch64.sve.fmls.lane.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, i32) +declare <vscale x 4 x float> @llvm.aarch64.sve.fmls.lane.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, i32) +declare <vscale x 2 x double> @llvm.aarch64.sve.fmls.lane.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, i32) + +declare <vscale x 8 x half> @llvm.aarch64.sve.fmsb.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>) +declare <vscale x 4 x float> @llvm.aarch64.sve.fmsb.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>) +declare <vscale x 2 x double> @llvm.aarch64.sve.fmsb.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>) + declare <vscale x 8 x half> @llvm.aarch64.sve.fmul.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>) declare <vscale x 4 x float> @llvm.aarch64.sve.fmul.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>) declare <vscale x 2 x double> @llvm.aarch64.sve.fmul.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>) +declare <vscale x 8 x half> @llvm.aarch64.sve.fmul.lane.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, i32) +declare <vscale x 4 x float> @llvm.aarch64.sve.fmul.lane.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, i32) +declare <vscale x 2 x double> @llvm.aarch64.sve.fmul.lane.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, i32) + declare <vscale x 8 x half> @llvm.aarch64.sve.fmulx.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>) declare <vscale x 4 x float> @llvm.aarch64.sve.fmulx.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>) declare <vscale x 2 x double> @llvm.aarch64.sve.fmulx.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>) +declare <vscale x 8 x half> @llvm.aarch64.sve.fnmad.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>) +declare <vscale x 4 x float> @llvm.aarch64.sve.fnmad.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>) +declare <vscale x 2 x double> @llvm.aarch64.sve.fnmad.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>) + +declare <vscale x 8 x half> @llvm.aarch64.sve.fnmla.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>) +declare <vscale x 4 x float> @llvm.aarch64.sve.fnmla.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>) +declare <vscale x 2 x double> @llvm.aarch64.sve.fnmla.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>) + +declare <vscale x 8 x half> @llvm.aarch64.sve.fnmls.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>) +declare <vscale x 4 x float> @llvm.aarch64.sve.fnmls.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>) +declare <vscale x 2 x double> @llvm.aarch64.sve.fnmls.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>) + +declare <vscale x 8 x half> @llvm.aarch64.sve.fnmsb.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>) +declare <vscale x 4 x float> @llvm.aarch64.sve.fnmsb.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>) +declare <vscale x 2 x double> @llvm.aarch64.sve.fnmsb.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>) + declare <vscale x 8 x half> @llvm.aarch64.sve.fscale.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x i16>) declare <vscale x 4 x float> @llvm.aarch64.sve.fscale.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x i32>) declare <vscale x 2 x double> @llvm.aarch64.sve.fscale.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x i64>) @@ -525,6 +1154,14 @@ declare <vscale x 4 x float> @llvm.aarch64.sve.fsubr.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>) declare <vscale x 2 x double> @llvm.aarch64.sve.fsubr.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>) +declare <vscale x 8 x half> @llvm.aarch64.sve.ftmad.x.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, i32) +declare <vscale x 4 x float> @llvm.aarch64.sve.ftmad.x.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, i32) +declare <vscale x 2 x double> @llvm.aarch64.sve.ftmad.x.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, i32) + declare <vscale x 8 x half> @llvm.aarch64.sve.ftsmul.x.nxv8f16(<vscale x 8 x half>, <vscale x 8 x i16>) declare <vscale x 4 x float> @llvm.aarch64.sve.ftsmul.x.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i32>) declare <vscale x 2 x double> @llvm.aarch64.sve.ftsmul.x.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i64>) + +declare <vscale x 8 x half> @llvm.aarch64.sve.ftssel.x.nxv8f16(<vscale x 8 x half>, <vscale x 8 x i16>) +declare <vscale x 4 x float> @llvm.aarch64.sve.ftssel.x.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i32>) +declare <vscale x 2 x double> @llvm.aarch64.sve.ftssel.x.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i64>) Index: llvm/lib/Target/AArch64/SVEInstrFormats.td =================================================================== --- llvm/lib/Target/AArch64/SVEInstrFormats.td +++ llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -298,6 +298,12 @@ : Pat<(vtd (op vt1:$Op1, vt2:$Op2, vt3:$Op3)), (inst $Op1, $Op2, $Op3)>; +class SVE_4_Op_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1, + ValueType vt2, ValueType vt3, ValueType vt4, + Instruction inst> +: Pat<(vtd (op vt1:$Op1, vt2:$Op2, vt3:$Op3, vt4:$Op4)), + (inst $Op1, $Op2, $Op3, $Op4)>; + def SVEDup0Undef : ComplexPattern<i64, 0, "SelectDupZeroOrUndef", []>; //===----------------------------------------------------------------------===// @@ -1220,7 +1226,7 @@ } class sve_fp_ftmad<bits<2> sz, string asm, ZPRRegOp zprty> -: I<(outs zprty:$Zdn), (ins zprty:$_Zdn, zprty:$Zm, imm0_7:$imm3), +: I<(outs zprty:$Zdn), (ins zprty:$_Zdn, zprty:$Zm, imm32_0_7:$imm3), asm, "\t$Zdn, $_Zdn, $Zm, $imm3", "", []>, Sched<[]> { @@ -1240,10 +1246,17 @@ let ElementSize = ElementSizeNone; } -multiclass sve_fp_ftmad<string asm> { +multiclass sve_fp_ftmad<string asm, SDPatternOperator op> { def _H : sve_fp_ftmad<0b01, asm, ZPR16>; def _S : sve_fp_ftmad<0b10, asm, ZPR32>; def _D : sve_fp_ftmad<0b11, asm, ZPR64>; + + def : Pat<(nxv8f16 (op (nxv8f16 ZPR16:$Zn), (nxv8f16 ZPR16:$Zm), (i32 imm32_0_7:$imm))), + (!cast<Instruction>(NAME # _H) ZPR16:$Zn, ZPR16:$Zm, imm32_0_7:$imm)>; + def : Pat<(nxv4f32 (op (nxv4f32 ZPR32:$Zn), (nxv4f32 ZPR32:$Zm), (i32 imm32_0_7:$imm))), + (!cast<Instruction>(NAME # _S) ZPR32:$Zn, ZPR32:$Zm, imm32_0_7:$imm)>; + def : Pat<(nxv2f64 (op (nxv2f64 ZPR64:$Zn), (nxv2f64 ZPR64:$Zm), (i32 imm32_0_7:$imm))), + (!cast<Instruction>(NAME # _D) ZPR64:$Zn, ZPR64:$Zm, imm32_0_7:$imm)>; } @@ -1318,10 +1331,14 @@ let ElementSize = zprty.ElementSize; } -multiclass sve_fp_3op_p_zds_a<bits<2> opc, string asm> { +multiclass sve_fp_3op_p_zds_a<bits<2> opc, string asm, SDPatternOperator op> { def _H : sve_fp_3op_p_zds_a<0b01, opc, asm, ZPR16>; def _S : sve_fp_3op_p_zds_a<0b10, opc, asm, ZPR32>; def _D : sve_fp_3op_p_zds_a<0b11, opc, asm, ZPR64>; + + def : SVE_4_Op_Pat<nxv8f16, op, nxv8i1, nxv8f16, nxv8f16, nxv8f16, !cast<Instruction>(NAME # _H)>; + def : SVE_4_Op_Pat<nxv4f32, op, nxv4i1, nxv4f32, nxv4f32, nxv4f32, !cast<Instruction>(NAME # _S)>; + def : SVE_4_Op_Pat<nxv2f64, op, nxv2i1, nxv2f64, nxv2f64, nxv2f64, !cast<Instruction>(NAME # _D)>; } class sve_fp_3op_p_zds_b<bits<2> sz, bits<2> opc, string asm, @@ -1349,10 +1366,14 @@ let ElementSize = zprty.ElementSize; } -multiclass sve_fp_3op_p_zds_b<bits<2> opc, string asm> { +multiclass sve_fp_3op_p_zds_b<bits<2> opc, string asm, SDPatternOperator op> { def _H : sve_fp_3op_p_zds_b<0b01, opc, asm, ZPR16>; def _S : sve_fp_3op_p_zds_b<0b10, opc, asm, ZPR32>; def _D : sve_fp_3op_p_zds_b<0b11, opc, asm, ZPR64>; + + def : SVE_4_Op_Pat<nxv8f16, op, nxv8i1, nxv8f16, nxv8f16, nxv8f16, !cast<Instruction>(NAME # _H)>; + def : SVE_4_Op_Pat<nxv4f32, op, nxv4i1, nxv4f32, nxv4f32, nxv4f32, !cast<Instruction>(NAME # _S)>; + def : SVE_4_Op_Pat<nxv2f64, op, nxv2i1, nxv2f64, nxv2f64, nxv2f64, !cast<Instruction>(NAME # _D)>; } //===----------------------------------------------------------------------===// @@ -1379,26 +1400,34 @@ let ElementSize = ElementSizeNone; } -multiclass sve_fp_fma_by_indexed_elem<bit opc, string asm> { - def _H : sve_fp_fma_by_indexed_elem<{0, ?}, opc, asm, ZPR16, ZPR3b16, VectorIndexH> { +multiclass sve_fp_fma_by_indexed_elem<bit opc, string asm, + SDPatternOperator op> { + def _H : sve_fp_fma_by_indexed_elem<{0, ?}, opc, asm, ZPR16, ZPR3b16, VectorIndexH32b> { bits<3> Zm; bits<3> iop; let Inst{22} = iop{2}; let Inst{20-19} = iop{1-0}; let Inst{18-16} = Zm; } - def _S : sve_fp_fma_by_indexed_elem<0b10, opc, asm, ZPR32, ZPR3b32, VectorIndexS> { + def _S : sve_fp_fma_by_indexed_elem<0b10, opc, asm, ZPR32, ZPR3b32, VectorIndexS32b> { bits<3> Zm; bits<2> iop; let Inst{20-19} = iop; let Inst{18-16} = Zm; } - def _D : sve_fp_fma_by_indexed_elem<0b11, opc, asm, ZPR64, ZPR4b64, VectorIndexD> { + def _D : sve_fp_fma_by_indexed_elem<0b11, opc, asm, ZPR64, ZPR4b64, VectorIndexD32b> { bits<4> Zm; bit iop; let Inst{20} = iop; let Inst{19-16} = Zm; } + + def : Pat<(nxv8f16 (op nxv8f16:$Op1, nxv8f16:$Op2, nxv8f16:$Op3, (i32 VectorIndexH32b:$idx))), + (!cast<Instruction>(NAME # _H) $Op1, $Op2, $Op3, VectorIndexH32b:$idx)>; + def : Pat<(nxv4f32 (op nxv4f32:$Op1, nxv4f32:$Op2, nxv4f32:$Op3, (i32 VectorIndexS32b:$idx))), + (!cast<Instruction>(NAME # _S) $Op1, $Op2, $Op3, VectorIndexS32b:$idx)>; + def : Pat<(nxv2f64 (op nxv2f64:$Op1, nxv2f64:$Op2, nxv2f64:$Op3, (i32 VectorIndexD32b:$idx))), + (!cast<Instruction>(NAME # _D) $Op1, $Op2, $Op3, VectorIndexD32b:$idx)>; } @@ -1420,26 +1449,33 @@ let Inst{4-0} = Zd; } -multiclass sve_fp_fmul_by_indexed_elem<string asm> { - def _H : sve_fp_fmul_by_indexed_elem<{0, ?}, asm, ZPR16, ZPR3b16, VectorIndexH> { +multiclass sve_fp_fmul_by_indexed_elem<string asm, SDPatternOperator op> { + def _H : sve_fp_fmul_by_indexed_elem<{0, ?}, asm, ZPR16, ZPR3b16, VectorIndexH32b> { bits<3> Zm; bits<3> iop; let Inst{22} = iop{2}; let Inst{20-19} = iop{1-0}; let Inst{18-16} = Zm; } - def _S : sve_fp_fmul_by_indexed_elem<0b10, asm, ZPR32, ZPR3b32, VectorIndexS> { + def _S : sve_fp_fmul_by_indexed_elem<0b10, asm, ZPR32, ZPR3b32, VectorIndexS32b> { bits<3> Zm; bits<2> iop; let Inst{20-19} = iop; let Inst{18-16} = Zm; } - def _D : sve_fp_fmul_by_indexed_elem<0b11, asm, ZPR64, ZPR4b64, VectorIndexD> { + def _D : sve_fp_fmul_by_indexed_elem<0b11, asm, ZPR64, ZPR4b64, VectorIndexD32b> { bits<4> Zm; bit iop; let Inst{20} = iop; let Inst{19-16} = Zm; } + + def : Pat<(nxv8f16 (op nxv8f16:$Op1, nxv8f16:$Op2, (i32 VectorIndexH32b:$idx))), + (!cast<Instruction>(NAME # _H) $Op1, $Op2, VectorIndexH32b:$idx)>; + def : Pat<(nxv4f32 (op nxv4f32:$Op1, nxv4f32:$Op2, (i32 VectorIndexS32b:$idx))), + (!cast<Instruction>(NAME # _S) $Op1, $Op2, VectorIndexS32b:$idx)>; + def : Pat<(nxv2f64 (op nxv2f64:$Op1, nxv2f64:$Op2, (i32 VectorIndexD32b:$idx))), + (!cast<Instruction>(NAME # _D) $Op1, $Op2, VectorIndexD32b:$idx)>; } //===----------------------------------------------------------------------===// @@ -1471,10 +1507,17 @@ let ElementSize = zprty.ElementSize; } -multiclass sve_fp_fcmla<string asm> { +multiclass sve_fp_fcmla<string asm, SDPatternOperator op> { def _H : sve_fp_fcmla<0b01, asm, ZPR16>; def _S : sve_fp_fcmla<0b10, asm, ZPR32>; def _D : sve_fp_fcmla<0b11, asm, ZPR64>; + + def : Pat<(nxv8f16 (op nxv8i1:$Op1, nxv8f16:$Op2, nxv8f16:$Op3, nxv8f16:$Op4, (i32 complexrotateop:$imm))), + (!cast<Instruction>(NAME # _H) $Op1, $Op2, $Op3, $Op4, complexrotateop:$imm)>; + def : Pat<(nxv4f32 (op nxv4i1:$Op1, nxv4f32:$Op2, nxv4f32:$Op3, nxv4f32:$Op4, (i32 complexrotateop:$imm))), + (!cast<Instruction>(NAME # _S) $Op1, $Op2, $Op3, $Op4, complexrotateop:$imm)>; + def : Pat<(nxv2f64 (op nxv2i1:$Op1, nxv2f64:$Op2, nxv2f64:$Op3, nxv2f64:$Op4, (i32 complexrotateop:$imm))), + (!cast<Instruction>(NAME # _D) $Op1, $Op2, $Op3, $Op4, complexrotateop:$imm)>; } //===----------------------------------------------------------------------===// @@ -1504,19 +1547,24 @@ let ElementSize = ElementSizeNone; } -multiclass sve_fp_fcmla_by_indexed_elem<string asm> { - def _H : sve_fp_fcmla_by_indexed_elem<0b10, asm, ZPR16, ZPR3b16, VectorIndexS> { +multiclass sve_fp_fcmla_by_indexed_elem<string asm, SDPatternOperator op> { + def _H : sve_fp_fcmla_by_indexed_elem<0b10, asm, ZPR16, ZPR3b16, VectorIndexS32b> { bits<3> Zm; bits<2> iop; let Inst{20-19} = iop; let Inst{18-16} = Zm; } - def _S : sve_fp_fcmla_by_indexed_elem<0b11, asm, ZPR32, ZPR4b32, VectorIndexD> { + def _S : sve_fp_fcmla_by_indexed_elem<0b11, asm, ZPR32, ZPR4b32, VectorIndexD32b> { bits<4> Zm; bits<1> iop; let Inst{20} = iop; let Inst{19-16} = Zm; } + + def : Pat<(nxv8f16 (op nxv8f16:$Op1, nxv8f16:$Op2, nxv8f16:$Op3, (i32 VectorIndexS32b:$idx), (i32 complexrotateop:$imm))), + (!cast<Instruction>(NAME # _H) $Op1, $Op2, $Op3, VectorIndexS32b:$idx, complexrotateop:$imm)>; + def : Pat<(nxv4f32 (op nxv4f32:$Op1, nxv4f32:$Op2, nxv4f32:$Op3, (i32 VectorIndexD32b:$idx), (i32 complexrotateop:$imm))), + (!cast<Instruction>(NAME # _S) $Op1, $Op2, $Op3, VectorIndexD32b:$idx, complexrotateop:$imm)>; } //===----------------------------------------------------------------------===// @@ -1547,10 +1595,17 @@ let ElementSize = zprty.ElementSize; } -multiclass sve_fp_fcadd<string asm> { +multiclass sve_fp_fcadd<string asm, SDPatternOperator op> { def _H : sve_fp_fcadd<0b01, asm, ZPR16>; def _S : sve_fp_fcadd<0b10, asm, ZPR32>; def _D : sve_fp_fcadd<0b11, asm, ZPR64>; + + def : Pat<(nxv8f16 (op nxv8i1:$Op1, nxv8f16:$Op2, nxv8f16:$Op3, (i32 complexrotateopodd:$imm))), + (!cast<Instruction>(NAME # _H) $Op1, $Op2, $Op3, complexrotateopodd:$imm)>; + def : Pat<(nxv4f32 (op nxv4i1:$Op1, nxv4f32:$Op2, nxv4f32:$Op3, (i32 complexrotateopodd:$imm))), + (!cast<Instruction>(NAME # _S) $Op1, $Op2, $Op3, complexrotateopodd:$imm)>; + def : Pat<(nxv2f64 (op nxv2i1:$Op1, nxv2f64:$Op2, nxv2f64:$Op3, (i32 complexrotateopodd:$imm))), + (!cast<Instruction>(NAME # _D) $Op1, $Op2, $Op3, complexrotateopodd:$imm)>; } //===----------------------------------------------------------------------===// @@ -5631,10 +5686,14 @@ let Inst{4-0} = Zd; } -multiclass sve_int_bin_cons_misc_0_b<string asm> { +multiclass sve_int_bin_cons_misc_0_b<string asm, SDPatternOperator op> { def _H : sve_int_bin_cons_misc_0_b<0b01, asm, ZPR16>; def _S : sve_int_bin_cons_misc_0_b<0b10, asm, ZPR32>; def _D : sve_int_bin_cons_misc_0_b<0b11, asm, ZPR64>; + + def : SVE_2_Op_Pat<nxv8f16, op, nxv8f16, nxv8i16, !cast<Instruction>(NAME # _H)>; + def : SVE_2_Op_Pat<nxv4f32, op, nxv4f32, nxv4i32, !cast<Instruction>(NAME # _S)>; + def : SVE_2_Op_Pat<nxv2f64, op, nxv2f64, nxv2i64, !cast<Instruction>(NAME # _D)>; } class sve_int_bin_cons_misc_0_c<bits<8> opc, string asm, ZPRRegOp zprty> Index: llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td =================================================================== --- llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -145,28 +145,28 @@ defm FRECPS_ZZZ : sve_fp_3op_u_zd<0b110, "frecps", int_aarch64_sve_frecps_x>; defm FRSQRTS_ZZZ : sve_fp_3op_u_zd<0b111, "frsqrts", int_aarch64_sve_frsqrts_x>; - defm FTSSEL_ZZZ : sve_int_bin_cons_misc_0_b<"ftssel">; + defm FTSSEL_ZZZ : sve_int_bin_cons_misc_0_b<"ftssel", int_aarch64_sve_ftssel_x>; - defm FCADD_ZPmZ : sve_fp_fcadd<"fcadd">; - defm FCMLA_ZPmZZ : sve_fp_fcmla<"fcmla">; + defm FCADD_ZPmZ : sve_fp_fcadd<"fcadd", int_aarch64_sve_fcadd>; + defm FCMLA_ZPmZZ : sve_fp_fcmla<"fcmla", int_aarch64_sve_fcmla>; - defm FMLA_ZPmZZ : sve_fp_3op_p_zds_a<0b00, "fmla">; - defm FMLS_ZPmZZ : sve_fp_3op_p_zds_a<0b01, "fmls">; - defm FNMLA_ZPmZZ : sve_fp_3op_p_zds_a<0b10, "fnmla">; - defm FNMLS_ZPmZZ : sve_fp_3op_p_zds_a<0b11, "fnmls">; + defm FMLA_ZPmZZ : sve_fp_3op_p_zds_a<0b00, "fmla", int_aarch64_sve_fmla>; + defm FMLS_ZPmZZ : sve_fp_3op_p_zds_a<0b01, "fmls", int_aarch64_sve_fmls>; + defm FNMLA_ZPmZZ : sve_fp_3op_p_zds_a<0b10, "fnmla", int_aarch64_sve_fnmla>; + defm FNMLS_ZPmZZ : sve_fp_3op_p_zds_a<0b11, "fnmls", int_aarch64_sve_fnmls>; - defm FMAD_ZPmZZ : sve_fp_3op_p_zds_b<0b00, "fmad">; - defm FMSB_ZPmZZ : sve_fp_3op_p_zds_b<0b01, "fmsb">; - defm FNMAD_ZPmZZ : sve_fp_3op_p_zds_b<0b10, "fnmad">; - defm FNMSB_ZPmZZ : sve_fp_3op_p_zds_b<0b11, "fnmsb">; + defm FMAD_ZPmZZ : sve_fp_3op_p_zds_b<0b00, "fmad", int_aarch64_sve_fmad>; + defm FMSB_ZPmZZ : sve_fp_3op_p_zds_b<0b01, "fmsb", int_aarch64_sve_fmsb>; + defm FNMAD_ZPmZZ : sve_fp_3op_p_zds_b<0b10, "fnmad", int_aarch64_sve_fnmad>; + defm FNMSB_ZPmZZ : sve_fp_3op_p_zds_b<0b11, "fnmsb", int_aarch64_sve_fnmsb>; - defm FTMAD_ZZI : sve_fp_ftmad<"ftmad">; + defm FTMAD_ZZI : sve_fp_ftmad<"ftmad", int_aarch64_sve_ftmad_x>; - defm FMLA_ZZZI : sve_fp_fma_by_indexed_elem<0b0, "fmla">; - defm FMLS_ZZZI : sve_fp_fma_by_indexed_elem<0b1, "fmls">; + defm FMLA_ZZZI : sve_fp_fma_by_indexed_elem<0b0, "fmla", int_aarch64_sve_fmla_lane>; + defm FMLS_ZZZI : sve_fp_fma_by_indexed_elem<0b1, "fmls", int_aarch64_sve_fmls_lane>; - defm FCMLA_ZZZI : sve_fp_fcmla_by_indexed_elem<"fcmla">; - defm FMUL_ZZZI : sve_fp_fmul_by_indexed_elem<"fmul">; + defm FCMLA_ZZZI : sve_fp_fcmla_by_indexed_elem<"fcmla", int_aarch64_sve_fcmla_lane>; + defm FMUL_ZZZI : sve_fp_fmul_by_indexed_elem<"fmul", int_aarch64_sve_fmul_lane>; // SVE floating point reductions. defm FADDA_VPZ : sve_fp_2op_p_vd<0b000, "fadda">; Index: llvm/lib/Target/AArch64/AArch64InstrFormats.td =================================================================== --- llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -758,6 +758,13 @@ let ParserMatchClass = Imm0_7Operand; } +// imm32_0_7 predicate - True if the 32-bit immediate is in the range [0,7] +def imm32_0_7 : Operand<i32>, ImmLeaf<i32, [{ + return ((uint32_t)Imm) < 8; +}]> { + let ParserMatchClass = Imm0_7Operand; +} + // imm32_0_15 predicate - True if the 32-bit immediate is in the range [0,15] def imm32_0_15 : Operand<i32>, ImmLeaf<i32, [{ return ((uint32_t)Imm) < 16; @@ -10052,15 +10059,20 @@ let DiagnosticType = "InvalidComplexRotation" # Type; let Name = "ComplexRotation" # Type; } -def complexrotateop : Operand<i32> { +def complexrotateop : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm <= 270; }], + SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant((N->getSExtValue() / 90), SDLoc(N), MVT::i64); +}]>> { let ParserMatchClass = ComplexRotationOperand<90, 0, "Even">; let PrintMethod = "printComplexRotationOp<90, 0>"; } -def complexrotateopodd : Operand<i32> { +def complexrotateopodd : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm <= 270; }], + SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant(((N->getSExtValue() - 90) / 180), SDLoc(N), MVT::i64); +}]>> { let ParserMatchClass = ComplexRotationOperand<180, 90, "Odd">; let PrintMethod = "printComplexRotationOp<180, 90>"; } - let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in class BaseSIMDThreeSameVectorComplex<bit Q, bit U, bits<2> size, bits<3> opcode, RegisterOperand regtype, Operand rottype, Index: llvm/include/llvm/IR/IntrinsicsAArch64.td =================================================================== --- llvm/include/llvm/IR/IntrinsicsAArch64.td +++ llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -771,6 +771,29 @@ LLVMMatchType<0>], [IntrNoMem]>; + class AdvSIMD_Pred3VectorArg_Intrinsic + : Intrinsic<[llvm_anyvector_ty], + [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMMatchType<0>], + [IntrNoMem]>; + + class AdvSIMD_2VectorArgIndexed_Intrinsic + : Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, + LLVMMatchType<0>, + llvm_i32_ty], + [IntrNoMem]>; + + class AdvSIMD_3VectorArgIndexed_Intrinsic + : Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMMatchType<0>, + llvm_i32_ty], + [IntrNoMem]>; + class AdvSIMD_SVE_CNT_Intrinsic : Intrinsic<[LLVMVectorOfBitcastsToInt<0>], [LLVMVectorOfBitcastsToInt<0>, @@ -783,6 +806,32 @@ [LLVMSubdivide2VectorType<0>], [IntrNoMem]>; + class AdvSIMD_SVE_CADD_Intrinsic + : Intrinsic<[llvm_anyvector_ty], + [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + LLVMMatchType<0>, + LLVMMatchType<0>, + llvm_i32_ty], + [IntrNoMem]>; + + class AdvSIMD_SVE_CMLA_Intrinsic + : Intrinsic<[llvm_anyvector_ty], + [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMMatchType<0>, + llvm_i32_ty], + [IntrNoMem]>; + + class AdvSIMD_SVE_CMLA_LANE_Intrinsic + : Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMMatchType<0>, + llvm_i32_ty, + llvm_i32_ty], + [IntrNoMem]>; + class AdvSIMD_SVE_PUNPKHI_Intrinsic : Intrinsic<[LLVMHalfElementsVectorType<0>], [llvm_anyvector_ty], @@ -896,18 +945,34 @@ def int_aarch64_sve_fabd : AdvSIMD_Pred2VectorArg_Intrinsic; def int_aarch64_sve_fadd : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_fcadd : AdvSIMD_SVE_CADD_Intrinsic; +def int_aarch64_sve_fcmla : AdvSIMD_SVE_CMLA_Intrinsic; +def int_aarch64_sve_fcmla_lane : AdvSIMD_SVE_CMLA_LANE_Intrinsic; def int_aarch64_sve_fdiv : AdvSIMD_Pred2VectorArg_Intrinsic; def int_aarch64_sve_fdivr : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_fmad : AdvSIMD_Pred3VectorArg_Intrinsic; def int_aarch64_sve_fmax : AdvSIMD_Pred2VectorArg_Intrinsic; def int_aarch64_sve_fmaxnm : AdvSIMD_Pred2VectorArg_Intrinsic; def int_aarch64_sve_fmin : AdvSIMD_Pred2VectorArg_Intrinsic; def int_aarch64_sve_fminnm : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_fmla : AdvSIMD_Pred3VectorArg_Intrinsic; +def int_aarch64_sve_fmla_lane : AdvSIMD_3VectorArgIndexed_Intrinsic; +def int_aarch64_sve_fmls : AdvSIMD_Pred3VectorArg_Intrinsic; +def int_aarch64_sve_fmls_lane : AdvSIMD_3VectorArgIndexed_Intrinsic; +def int_aarch64_sve_fmsb : AdvSIMD_Pred3VectorArg_Intrinsic; def int_aarch64_sve_fmul : AdvSIMD_Pred2VectorArg_Intrinsic; def int_aarch64_sve_fmulx : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_fmul_lane : AdvSIMD_2VectorArgIndexed_Intrinsic; +def int_aarch64_sve_fnmad : AdvSIMD_Pred3VectorArg_Intrinsic; +def int_aarch64_sve_fnmla : AdvSIMD_Pred3VectorArg_Intrinsic; +def int_aarch64_sve_fnmls : AdvSIMD_Pred3VectorArg_Intrinsic; +def int_aarch64_sve_fnmsb : AdvSIMD_Pred3VectorArg_Intrinsic; def int_aarch64_sve_fscale : AdvSIMD_SVE_SCALE_Intrinsic; def int_aarch64_sve_fsub : AdvSIMD_Pred2VectorArg_Intrinsic; def int_aarch64_sve_fsubr : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_ftmad_x : AdvSIMD_2VectorArgIndexed_Intrinsic; def int_aarch64_sve_ftsmul_x : AdvSIMD_SVE_TSMUL_Intrinsic; +def int_aarch64_sve_ftssel_x : AdvSIMD_SVE_TSMUL_Intrinsic; // // Floating-point comparisons
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits