================ @@ -0,0 +1,138 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc -force-streaming -verify-machineinstrs < %s | FileCheck %s + +target triple = "aarch64-linux" + +define void @tmopa_za32_s8(<vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zm, <vscale x 16 x i8> %zk) #0 { +; CHECK-LABEL: tmopa_za32_s8: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z28.d, z3.d +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: stmopa za0.s, { z0.b, z1.b }, z2.b, z28[0] +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.stmopa.nxv16i8(i32 0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zm, <vscale x 16 x i8> %zk, i32 0) + ret void +} + +define void @tmopa_za32_u8(<vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zm, <vscale x 16 x i8> %zk) #0 { +; CHECK-LABEL: tmopa_za32_u8: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z28.d, z3.d +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: utmopa za0.s, { z0.b, z1.b }, z2.b, z28[0] +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.utmopa.nxv16i8(i32 0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zm, <vscale x 16 x i8> %zk, i32 0) + ret void +} + +define void @tmopa_za32_u8_s8(<vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zm, <vscale x 16 x i8> %zk) #0 { +; CHECK-LABEL: tmopa_za32_u8_s8: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z28.d, z3.d +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: ustmopa za0.s, { z0.b, z1.b }, z2.b, z28[0] +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.ustmopa.nxv16i8(i32 0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zm, <vscale x 16 x i8> %zk, i32 0) + ret void +} + +define void @tmopa_za32_s8_u8(<vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zm, <vscale x 16 x i8> %zk) #0 { +; CHECK-LABEL: tmopa_za32_s8_u8: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z28.d, z3.d +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: sutmopa za0.s, { z0.b, z1.b }, z2.b, z28[0] +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.sutmopa.nxv16i8(i32 0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zm, <vscale x 16 x i8> %zk, i32 0) + ret void +} + +define void @tmopa_za32_s16(<vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zm, <vscale x 16 x i8> %zk) #0 { +; CHECK-LABEL: tmopa_za32_s16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z28.d, z3.d +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: stmopa za0.s, { z0.h, z1.h }, z2.h, z28[0] +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.stmopa.nxv8i16(i32 0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zm, <vscale x 16 x i8> %zk, i32 0) + ret void +} + +define void @tmopa_za32_u16(<vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zm, <vscale x 16 x i8> %zk) #0 { +; CHECK-LABEL: tmopa_za32_u16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z28.d, z3.d +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: utmopa za0.s, { z0.h, z1.h }, z2.h, z28[0] +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.utmopa.nxv8i16(i32 0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zm, <vscale x 16 x i8> %zk, i32 0) + ret void +} + +define void @tmopa_za32_f16(<vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zm, <vscale x 16 x i8> %zk) #0 { +; CHECK-LABEL: tmopa_za32_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z28.d, z3.d +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: ftmopa za0.s, { z0.h, z1.h }, z2.h, z28[0] +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.tmopa.nxv8f16(i32 0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zm, <vscale x 16 x i8> %zk, i32 0) + ret void +} + +define void @tmopa_za32_bf16(<vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zm, <vscale x 16 x i8> %zk) #0 { +; CHECK-LABEL: tmopa_za32_bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z28.d, z3.d +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: bftmopa za0.s, { z0.h, z1.h }, z2.h, z28[0] +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.tmopa.nxv8bf16(i32 0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zm, <vscale x 16 x i8> %zk, i32 0) + ret void +} + +define void @tmopa_za32_f32(<vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zm, <vscale x 16 x i8> %zk) #0 { +; CHECK-LABEL: tmopa_za32_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z28.d, z3.d +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: ftmopa za0.s, { z0.s, z1.s }, z2.s, z28[0] +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.tmopa.nxv4f32(i32 0, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zm, <vscale x 16 x i8> %zk, i32 0) + ret void +} + +define void @tmopa_za16_f16(<vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zm, <vscale x 16 x i8> %zk) #0 { +; CHECK-LABEL: tmopa_za16_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z28.d, z3.d +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: ftmopa za0.s, { z0.h, z1.h }, z2.h, z28[0] +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.tmopa.nxv8f16(i32 0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zm, <vscale x 16 x i8> %zk, i32 0) + ret void +} + +define void @tmopa_za16_bf16(<vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zm, <vscale x 16 x i8> %zk) #0 { +; CHECK-LABEL: tmopa_za16_bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z28.d, z3.d +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: bftmopa za0.s, { z0.h, z1.h }, z2.h, z28[0] +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.tmopa.nxv8bf16(i32 0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zm, <vscale x 16 x i8> %zk, i32 0) + ret void +} + ---------------- jthackray wrote:
Thanks, I've added new tests for `@llvm.aarch64.sme.za16.ftmopa.nxv16if8` and `@llvm.aarch64.sme.za32.ftmopa.nxv16if8` https://github.com/llvm/llvm-project/pull/135145 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits