tlively created this revision. tlively added reviewers: aheejin, dschuff. Herald added subscribers: wingo, ecnelises, sunfish, hiraditya, jgravelle-google, sbc100. tlively requested review of this revision. Herald added projects: clang, LLVM. Herald added subscribers: llvm-commits, cfe-commits.
Replace the clang builtins and LLVM intrinsics for the SIMD extmul instructions with normal codegen patterns. Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D106724 Files: clang/include/clang/Basic/BuiltinsWebAssembly.def clang/lib/CodeGen/CGBuiltin.cpp clang/lib/Headers/wasm_simd128.h clang/test/CodeGen/builtins-wasm.c clang/test/Headers/wasm.c llvm/include/llvm/IR/IntrinsicsWebAssembly.td llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td llvm/test/CodeGen/WebAssembly/simd-arith.ll llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll
Index: llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll =================================================================== --- llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll +++ llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll @@ -248,54 +248,6 @@ ret <8 x i16> %a } -; CHECK-LABEL: extmul_low_s_v8i16: -; CHECK-NEXT: .functype extmul_low_s_v8i16 (v128, v128) -> (v128){{$}} -; CHECK-NEXT: i16x8.extmul_low_i8x16_s $push[[R:[0-9]+]]=, $0, $1{{$}} -; CHECK-NEXT: return $pop[[R]]{{$}} -declare <8 x i16> @llvm.wasm.extmul.low.signed.v8i16(<16 x i8>, <16 x i8>) -define <8 x i16> @extmul_low_s_v8i16(<16 x i8> %x, <16 x i8> %y) { - %a = call <8 x i16> @llvm.wasm.extmul.low.signed.v8i16( - <16 x i8> %x, <16 x i8> %y - ) - ret <8 x i16> %a -} - -; CHECK-LABEL: extmul_high_s_v8i16: -; CHECK-NEXT: .functype extmul_high_s_v8i16 (v128, v128) -> (v128){{$}} -; CHECK-NEXT: i16x8.extmul_high_i8x16_s $push[[R:[0-9]+]]=, $0, $1{{$}} -; CHECK-NEXT: return $pop[[R]]{{$}} -declare <8 x i16> @llvm.wasm.extmul.high.signed.v8i16(<16 x i8>, <16 x i8>) -define <8 x i16> @extmul_high_s_v8i16(<16 x i8> %x, <16 x i8> %y) { - %a = call <8 x i16> @llvm.wasm.extmul.high.signed.v8i16( - <16 x i8> %x, <16 x i8> %y - ) - ret <8 x i16> %a -} - -; CHECK-LABEL: extmul_low_u_v8i16: -; CHECK-NEXT: .functype extmul_low_u_v8i16 (v128, v128) -> (v128){{$}} -; CHECK-NEXT: i16x8.extmul_low_i8x16_u $push[[R:[0-9]+]]=, $0, $1{{$}} -; CHECK-NEXT: return $pop[[R]]{{$}} -declare <8 x i16> @llvm.wasm.extmul.low.unsigned.v8i16(<16 x i8>, <16 x i8>) -define <8 x i16> @extmul_low_u_v8i16(<16 x i8> %x, <16 x i8> %y) { - %a = call <8 x i16> @llvm.wasm.extmul.low.unsigned.v8i16( - <16 x i8> %x, <16 x i8> %y - ) - ret <8 x i16> %a -} - -; CHECK-LABEL: extmul_high_u_v8i16: -; CHECK-NEXT: .functype extmul_high_u_v8i16 (v128, v128) -> (v128){{$}} -; CHECK-NEXT: i16x8.extmul_high_i8x16_u $push[[R:[0-9]+]]=, $0, $1{{$}} -; CHECK-NEXT: return $pop[[R]]{{$}} -declare <8 x i16> @llvm.wasm.extmul.high.unsigned.v8i16(<16 x i8>, <16 x i8>) -define <8 x i16> @extmul_high_u_v8i16(<16 x i8> %x, <16 x i8> %y) { - %a = call <8 x i16> @llvm.wasm.extmul.high.unsigned.v8i16( - <16 x i8> %x, <16 x i8> %y - ) - ret <8 x i16> %a -} - ; CHECK-LABEL: extadd_pairwise_s_v8i16: ; CHECK-NEXT: .functype extadd_pairwise_s_v8i16 (v128) -> (v128){{$}} ; CHECK-NEXT: i16x8.extadd_pairwise_i8x16_s $push[[R:[0-9]+]]=, $0{{$}} @@ -395,55 +347,6 @@ ret <4 x i32> %a } - -; CHECK-LABEL: extmul_low_s_v4i32: -; CHECK-NEXT: .functype extmul_low_s_v4i32 (v128, v128) -> (v128){{$}} -; CHECK-NEXT: i32x4.extmul_low_i16x8_s $push[[R:[0-9]+]]=, $0, $1{{$}} -; CHECK-NEXT: return $pop[[R]]{{$}} -declare <4 x i32> @llvm.wasm.extmul.low.signed.v4i32(<8 x i16>, <8 x i16>) -define <4 x i32> @extmul_low_s_v4i32(<8 x i16> %x, <8 x i16> %y) { - %a = call <4 x i32> @llvm.wasm.extmul.low.signed.v4i32( - <8 x i16> %x, <8 x i16> %y - ) - ret <4 x i32> %a -} - -; CHECK-LABEL: extmul_high_s_v4i32: -; CHECK-NEXT: .functype extmul_high_s_v4i32 (v128, v128) -> (v128){{$}} -; CHECK-NEXT: i32x4.extmul_high_i16x8_s $push[[R:[0-9]+]]=, $0, $1{{$}} -; CHECK-NEXT: return $pop[[R]]{{$}} -declare <4 x i32> @llvm.wasm.extmul.high.signed.v4i32(<8 x i16>, <8 x i16>) -define <4 x i32> @extmul_high_s_v4i32(<8 x i16> %x, <8 x i16> %y) { - %a = call <4 x i32> @llvm.wasm.extmul.high.signed.v4i32( - <8 x i16> %x, <8 x i16> %y - ) - ret <4 x i32> %a -} - -; CHECK-LABEL: extmul_low_u_v4i32: -; CHECK-NEXT: .functype extmul_low_u_v4i32 (v128, v128) -> (v128){{$}} -; CHECK-NEXT: i32x4.extmul_low_i16x8_u $push[[R:[0-9]+]]=, $0, $1{{$}} -; CHECK-NEXT: return $pop[[R]]{{$}} -declare <4 x i32> @llvm.wasm.extmul.low.unsigned.v4i32(<8 x i16>, <8 x i16>) -define <4 x i32> @extmul_low_u_v4i32(<8 x i16> %x, <8 x i16> %y) { - %a = call <4 x i32> @llvm.wasm.extmul.low.unsigned.v4i32( - <8 x i16> %x, <8 x i16> %y - ) - ret <4 x i32> %a -} - -; CHECK-LABEL: extmul_high_u_v4i32: -; CHECK-NEXT: .functype extmul_high_u_v4i32 (v128, v128) -> (v128){{$}} -; CHECK-NEXT: i32x4.extmul_high_i16x8_u $push[[R:[0-9]+]]=, $0, $1{{$}} -; CHECK-NEXT: return $pop[[R]]{{$}} -declare <4 x i32> @llvm.wasm.extmul.high.unsigned.v4i32(<8 x i16>, <8 x i16>) -define <4 x i32> @extmul_high_u_v4i32(<8 x i16> %x, <8 x i16> %y) { - %a = call <4 x i32> @llvm.wasm.extmul.high.unsigned.v4i32( - <8 x i16> %x, <8 x i16> %y - ) - ret <4 x i32> %a -} - ; CHECK-LABEL: extadd_pairwise_s_v4i32: ; CHECK-NEXT: .functype extadd_pairwise_s_v4i32 (v128) -> (v128){{$}} ; CHECK-NEXT: i32x4.extadd_pairwise_i16x8_s $push[[R:[0-9]+]]=, $0{{$}} @@ -580,54 +483,6 @@ ; ============================================================================== ; 2 x i64 ; ============================================================================== -; CHECK-LABEL: extmul_low_s_v2i64: -; CHECK-NEXT: .functype extmul_low_s_v2i64 (v128, v128) -> (v128){{$}} -; CHECK-NEXT: i64x2.extmul_low_i32x4_s $push[[R:[0-9]+]]=, $0, $1{{$}} -; CHECK-NEXT: return $pop[[R]]{{$}} -declare <2 x i64> @llvm.wasm.extmul.low.signed.v2i64(<4 x i32>, <4 x i32>) -define <2 x i64> @extmul_low_s_v2i64(<4 x i32> %x, <4 x i32> %y) { - %a = call <2 x i64> @llvm.wasm.extmul.low.signed.v2i64( - <4 x i32> %x, <4 x i32> %y - ) - ret <2 x i64> %a -} - -; CHECK-LABEL: extmul_high_s_v2i64: -; CHECK-NEXT: .functype extmul_high_s_v2i64 (v128, v128) -> (v128){{$}} -; CHECK-NEXT: i64x2.extmul_high_i32x4_s $push[[R:[0-9]+]]=, $0, $1{{$}} -; CHECK-NEXT: return $pop[[R]]{{$}} -declare <2 x i64> @llvm.wasm.extmul.high.signed.v2i64(<4 x i32>, <4 x i32>) -define <2 x i64> @extmul_high_s_v2i64(<4 x i32> %x, <4 x i32> %y) { - %a = call <2 x i64> @llvm.wasm.extmul.high.signed.v2i64( - <4 x i32> %x, <4 x i32> %y - ) - ret <2 x i64> %a -} - -; CHECK-LABEL: extmul_low_u_v2i64: -; CHECK-NEXT: .functype extmul_low_u_v2i64 (v128, v128) -> (v128){{$}} -; CHECK-NEXT: i64x2.extmul_low_i32x4_u $push[[R:[0-9]+]]=, $0, $1{{$}} -; CHECK-NEXT: return $pop[[R]]{{$}} -declare <2 x i64> @llvm.wasm.extmul.low.unsigned.v2i64(<4 x i32>, <4 x i32>) -define <2 x i64> @extmul_low_u_v2i64(<4 x i32> %x, <4 x i32> %y) { - %a = call <2 x i64> @llvm.wasm.extmul.low.unsigned.v2i64( - <4 x i32> %x, <4 x i32> %y - ) - ret <2 x i64> %a -} - -; CHECK-LABEL: extmul_high_u_v2i64: -; CHECK-NEXT: .functype extmul_high_u_v2i64 (v128, v128) -> (v128){{$}} -; CHECK-NEXT: i64x2.extmul_high_i32x4_u $push[[R:[0-9]+]]=, $0, $1{{$}} -; CHECK-NEXT: return $pop[[R]]{{$}} -declare <2 x i64> @llvm.wasm.extmul.high.unsigned.v2i64(<4 x i32>, <4 x i32>) -define <2 x i64> @extmul_high_u_v2i64(<4 x i32> %x, <4 x i32> %y) { - %a = call <2 x i64> @llvm.wasm.extmul.high.unsigned.v2i64( - <4 x i32> %x, <4 x i32> %y - ) - ret <2 x i64> %a -} - ; CHECK-LABEL: any_v2i64: ; CHECK-NEXT: .functype any_v2i64 (v128) -> (i32){{$}} ; CHECK-NEXT: v128.any_true $push[[R:[0-9]+]]=, $0{{$}} Index: llvm/test/CodeGen/WebAssembly/simd-arith.ll =================================================================== --- llvm/test/CodeGen/WebAssembly/simd-arith.ll +++ llvm/test/CodeGen/WebAssembly/simd-arith.ll @@ -659,6 +659,70 @@ ret <8 x i16> %a } +; CHECK-LABEL: extmul_low_s_v8i16: +; NO-SIMD128-NOT: i16x8 +; SIMD128-NEXT: .functype extmul_low_s_v8i16 (v128, v128) -> (v128){{$}} +; SIMD128-SLOW-NEXT: i16x8.extmul_low_i8x16_s $push[[R:[0-9]+]]=, $0, $1{{$}} +; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}} +define <8 x i16> @extmul_low_s_v8i16(<16 x i8> %v1, <16 x i8> %v2) { + %low1 = shufflevector <16 x i8> %v1, <16 x i8> undef, + <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> + %low2 = shufflevector <16 x i8> %v2, <16 x i8> undef, + <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> + %extended1 = sext <8 x i8> %low1 to <8 x i16> + %extended2 = sext <8 x i8> %low2 to <8 x i16> + %a = mul <8 x i16> %extended1, %extended2 + ret <8 x i16> %a +} + +; CHECK-LABEL: extmul_high_s_v8i16: +; NO-SIMD128-NOT: i16x8 +; SIMD128-NEXT: .functype extmul_high_s_v8i16 (v128, v128) -> (v128){{$}} +; SIMD128-SLOW-NEXT: i16x8.extmul_high_i8x16_s $push[[R:[0-9]+]]=, $0, $1{{$}} +; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}} +define <8 x i16> @extmul_high_s_v8i16(<16 x i8> %v1, <16 x i8> %v2) { + %high1 = shufflevector <16 x i8> %v1, <16 x i8> undef, + <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> + %high2 = shufflevector <16 x i8> %v2, <16 x i8> undef, + <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> + %extended1 = sext <8 x i8> %high1 to <8 x i16> + %extended2 = sext <8 x i8> %high2 to <8 x i16> + %a = mul <8 x i16> %extended1, %extended2 + ret <8 x i16> %a +} + +; CHECK-LABEL: extmul_low_u_v8i16: +; NO-SIMD128-NOT: i16x8 +; SIMD128-NEXT: .functype extmul_low_u_v8i16 (v128, v128) -> (v128){{$}} +; SIMD128-SLOW-NEXT: i16x8.extmul_low_i8x16_u $push[[R:[0-9]+]]=, $0, $1{{$}} +; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}} +define <8 x i16> @extmul_low_u_v8i16(<16 x i8> %v1, <16 x i8> %v2) { + %low1 = shufflevector <16 x i8> %v1, <16 x i8> undef, + <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> + %low2 = shufflevector <16 x i8> %v2, <16 x i8> undef, + <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> + %extended1 = zext <8 x i8> %low1 to <8 x i16> + %extended2 = zext <8 x i8> %low2 to <8 x i16> + %a = mul <8 x i16> %extended1, %extended2 + ret <8 x i16> %a +} + +; CHECK-LABEL: extmul_high_u_v8i16: +; NO-SIMD128-NOT: i16x8 +; SIMD128-NEXT: .functype extmul_high_u_v8i16 (v128, v128) -> (v128){{$}} +; SIMD128-SLOW-NEXT: i16x8.extmul_high_i8x16_u $push[[R:[0-9]+]]=, $0, $1{{$}} +; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}} +define <8 x i16> @extmul_high_u_v8i16(<16 x i8> %v1, <16 x i8> %v2) { + %high1 = shufflevector <16 x i8> %v1, <16 x i8> undef, + <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> + %high2 = shufflevector <16 x i8> %v2, <16 x i8> undef, + <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> + %extended1 = zext <8 x i8> %high1 to <8 x i16> + %extended2 = zext <8 x i8> %high2 to <8 x i16> + %a = mul <8 x i16> %extended1, %extended2 + ret <8 x i16> %a +} + ; ============================================================================== ; 4 x i32 ; ============================================================================== @@ -934,6 +998,70 @@ ret <4 x i32> %a } +; CHECK-LABEL: extmul_low_s_v4i32: +; NO-SIMD128-NOT: i32x4 +; SIMD128-NEXT: .functype extmul_low_s_v4i32 (v128, v128) -> (v128){{$}} +; SIMD128-SLOW-NEXT: i32x4.extmul_low_i16x8_s $push[[R:[0-9]+]]=, $0, $1{{$}} +; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}} +define <4 x i32> @extmul_low_s_v4i32(<8 x i16> %v1, <8 x i16> %v2) { + %low1 = shufflevector <8 x i16> %v1, <8 x i16> undef, + <4 x i32> <i32 0, i32 1, i32 2, i32 3> + %low2 = shufflevector <8 x i16> %v2, <8 x i16> undef, + <4 x i32> <i32 0, i32 1, i32 2, i32 3> + %extended1 = sext <4 x i16> %low1 to <4 x i32> + %extended2 = sext <4 x i16> %low2 to <4 x i32> + %a = mul <4 x i32> %extended1, %extended2 + ret <4 x i32> %a +} + +; CHECK-LABEL: extmul_high_s_v4i32: +; NO-SIMD128-NOT: i32x4 +; SIMD128-NEXT: .functype extmul_high_s_v4i32 (v128, v128) -> (v128){{$}} +; SIMD128-SLOW-NEXT: i32x4.extmul_high_i16x8_s $push[[R:[0-9]+]]=, $0, $1{{$}} +; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}} +define <4 x i32> @extmul_high_s_v4i32(<8 x i16> %v1, <8 x i16> %v2) { + %high1 = shufflevector <8 x i16> %v1, <8 x i16> undef, + <4 x i32> <i32 4, i32 5, i32 6, i32 7> + %high2 = shufflevector <8 x i16> %v2, <8 x i16> undef, + <4 x i32> <i32 4, i32 5, i32 6, i32 7> + %extended1 = sext <4 x i16> %high1 to <4 x i32> + %extended2 = sext <4 x i16> %high2 to <4 x i32> + %a = mul <4 x i32> %extended1, %extended2 + ret <4 x i32> %a +} + +; CHECK-LABEL: extmul_low_u_v4i32: +; NO-SIMD128-NOT: i32x4 +; SIMD128-NEXT: .functype extmul_low_u_v4i32 (v128, v128) -> (v128){{$}} +; SIMD128-SLOW-NEXT: i32x4.extmul_low_i16x8_u $push[[R:[0-9]+]]=, $0, $1{{$}} +; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}} +define <4 x i32> @extmul_low_u_v4i32(<8 x i16> %v1, <8 x i16> %v2) { + %low1 = shufflevector <8 x i16> %v1, <8 x i16> undef, + <4 x i32> <i32 0, i32 1, i32 2, i32 3> + %low2 = shufflevector <8 x i16> %v2, <8 x i16> undef, + <4 x i32> <i32 0, i32 1, i32 2, i32 3> + %extended1 = zext <4 x i16> %low1 to <4 x i32> + %extended2 = zext <4 x i16> %low2 to <4 x i32> + %a = mul <4 x i32> %extended1, %extended2 + ret <4 x i32> %a +} + +; CHECK-LABEL: extmul_high_u_v4i32: +; NO-SIMD128-NOT: i32x4 +; SIMD128-NEXT: .functype extmul_high_u_v4i32 (v128, v128) -> (v128){{$}} +; SIMD128-SLOW-NEXT: i32x4.extmul_high_i16x8_u $push[[R:[0-9]+]]=, $0, $1{{$}} +; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}} +define <4 x i32> @extmul_high_u_v4i32(<8 x i16> %v1, <8 x i16> %v2) { + %high1 = shufflevector <8 x i16> %v1, <8 x i16> undef, + <4 x i32> <i32 4, i32 5, i32 6, i32 7> + %high2 = shufflevector <8 x i16> %v2, <8 x i16> undef, + <4 x i32> <i32 4, i32 5, i32 6, i32 7> + %extended1 = zext <4 x i16> %high1 to <4 x i32> + %extended2 = zext <4 x i16> %high2 to <4 x i32> + %a = mul <4 x i32> %extended1, %extended2 + ret <4 x i32> %a +} + ; ============================================================================== ; 2 x i64 ; ============================================================================== @@ -1262,6 +1390,62 @@ ret <2 x i64> %a } +; CHECK-LABEL: extmul_low_s_v2i64: +; NO-SIMD128-NOT: i64x2 +; SIMD128-NEXT: .functype extmul_low_s_v2i64 (v128, v128) -> (v128){{$}} +; SIMD128-SLOW-NEXT: i64x2.extmul_low_i32x4_s $push[[R:[0-9]+]]=, $0, $1{{$}} +; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}} +define <2 x i64> @extmul_low_s_v2i64(<4 x i32> %v1, <4 x i32> %v2) { + %low1 = shufflevector <4 x i32> %v1, <4 x i32> undef, <2 x i32> <i32 0, i32 1> + %low2 = shufflevector <4 x i32> %v2, <4 x i32> undef, <2 x i32> <i32 0, i32 1> + %extended1 = sext <2 x i32> %low1 to <2 x i64> + %extended2 = sext <2 x i32> %low2 to <2 x i64> + %a = mul <2 x i64> %extended1, %extended2 + ret <2 x i64> %a +} + +; CHECK-LABEL: extmul_high_s_v2i64: +; NO-SIMD128-NOT: i64x2 +; SIMD128-NEXT: .functype extmul_high_s_v2i64 (v128, v128) -> (v128){{$}} +; SIMD128-SLOW-NEXT: i64x2.extmul_high_i32x4_s $push[[R:[0-9]+]]=, $0, $1{{$}} +; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}} +define <2 x i64> @extmul_high_s_v2i64(<4 x i32> %v1, <4 x i32> %v2) { + %high1 = shufflevector <4 x i32> %v1, <4 x i32> undef, <2 x i32> <i32 2, i32 3> + %high2 = shufflevector <4 x i32> %v2, <4 x i32> undef, <2 x i32> <i32 2, i32 3> + %extended1 = sext <2 x i32> %high1 to <2 x i64> + %extended2 = sext <2 x i32> %high2 to <2 x i64> + %a = mul <2 x i64> %extended1, %extended2 + ret <2 x i64> %a +} + +; CHECK-LABEL: extmul_low_u_v2i64: +; NO-SIMD128-NOT: i64x2 +; SIMD128-NEXT: .functype extmul_low_u_v2i64 (v128, v128) -> (v128){{$}} +; SIMD128-SLOW-NEXT: i64x2.extmul_low_i32x4_u $push[[R:[0-9]+]]=, $0, $1{{$}} +; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}} +define <2 x i64> @extmul_low_u_v2i64(<4 x i32> %v1, <4 x i32> %v2) { + %low1 = shufflevector <4 x i32> %v1, <4 x i32> undef, <2 x i32> <i32 0, i32 1> + %low2 = shufflevector <4 x i32> %v2, <4 x i32> undef, <2 x i32> <i32 0, i32 1> + %extended1 = zext <2 x i32> %low1 to <2 x i64> + %extended2 = zext <2 x i32> %low2 to <2 x i64> + %a = mul <2 x i64> %extended1, %extended2 + ret <2 x i64> %a +} + +; CHECK-LABEL: extmul_high_u_v2i64: +; NO-SIMD128-NOT: i64x2 +; SIMD128-NEXT: .functype extmul_high_u_v2i64 (v128, v128) -> (v128){{$}} +; SIMD128-SLOW-NEXT: i64x2.extmul_high_i32x4_u $push[[R:[0-9]+]]=, $0, $1{{$}} +; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}} +define <2 x i64> @extmul_high_u_v2i64(<4 x i32> %v1, <4 x i32> %v2) { + %high1 = shufflevector <4 x i32> %v1, <4 x i32> undef, <2 x i32> <i32 2, i32 3> + %high2 = shufflevector <4 x i32> %v2, <4 x i32> undef, <2 x i32> <i32 2, i32 3> + %extended1 = zext <2 x i32> %high1 to <2 x i64> + %extended2 = zext <2 x i32> %high2 to <2 x i64> + %a = mul <2 x i64> %extended1, %extended2 + ret <2 x i64> %a +} + ; ============================================================================== ; 4 x float ; ============================================================================== Index: llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td =================================================================== --- llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td +++ llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td @@ -1028,7 +1028,14 @@ 186>; // Extending multiplication: extmul_{low,high}_P, extmul_high -multiclass SIMDExtBinary<Vec vec, Intrinsic node, string name, bits<32> simdop> { +def extend_t : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>; +def extend_low_s : SDNode<"WebAssemblyISD::EXTEND_LOW_S", extend_t>; +def extend_high_s : SDNode<"WebAssemblyISD::EXTEND_HIGH_S", extend_t>; +def extend_low_u : SDNode<"WebAssemblyISD::EXTEND_LOW_U", extend_t>; +def extend_high_u : SDNode<"WebAssemblyISD::EXTEND_HIGH_U", extend_t>; + +multiclass SIMDExtBinary<Vec vec, SDPatternOperator node, string name, + bits<32> simdop> { defm _#vec : SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs), (outs), (ins), [(set (vec.vt V128:$dst), (node @@ -1037,32 +1044,41 @@ vec.prefix#"."#name, simdop>; } +class ExtMulPat<SDNode extend> : + PatFrag<(ops node:$lhs, node:$rhs), + (mul (extend $lhs), (extend $rhs))> {} + +def extmul_low_s : ExtMulPat<extend_low_s>; +def extmul_high_s : ExtMulPat<extend_high_s>; +def extmul_low_u : ExtMulPat<extend_low_u>; +def extmul_high_u : ExtMulPat<extend_high_u>; + defm EXTMUL_LOW_S : - SIMDExtBinary<I16x8, int_wasm_extmul_low_signed, "extmul_low_i8x16_s", 0x9c>; + SIMDExtBinary<I16x8, extmul_low_s, "extmul_low_i8x16_s", 0x9c>; defm EXTMUL_HIGH_S : - SIMDExtBinary<I16x8, int_wasm_extmul_high_signed, "extmul_high_i8x16_s", 0x9d>; + SIMDExtBinary<I16x8, extmul_high_s, "extmul_high_i8x16_s", 0x9d>; defm EXTMUL_LOW_U : - SIMDExtBinary<I16x8, int_wasm_extmul_low_unsigned, "extmul_low_i8x16_u", 0x9e>; + SIMDExtBinary<I16x8, extmul_low_u, "extmul_low_i8x16_u", 0x9e>; defm EXTMUL_HIGH_U : - SIMDExtBinary<I16x8, int_wasm_extmul_high_unsigned, "extmul_high_i8x16_u", 0x9f>; + SIMDExtBinary<I16x8, extmul_high_u, "extmul_high_i8x16_u", 0x9f>; defm EXTMUL_LOW_S : - SIMDExtBinary<I32x4, int_wasm_extmul_low_signed, "extmul_low_i16x8_s", 0xbc>; + SIMDExtBinary<I32x4, extmul_low_s, "extmul_low_i16x8_s", 0xbc>; defm EXTMUL_HIGH_S : - SIMDExtBinary<I32x4, int_wasm_extmul_high_signed, "extmul_high_i16x8_s", 0xbd>; + SIMDExtBinary<I32x4, extmul_high_s, "extmul_high_i16x8_s", 0xbd>; defm EXTMUL_LOW_U : - SIMDExtBinary<I32x4, int_wasm_extmul_low_unsigned, "extmul_low_i16x8_u", 0xbe>; + SIMDExtBinary<I32x4, extmul_low_u, "extmul_low_i16x8_u", 0xbe>; defm EXTMUL_HIGH_U : - SIMDExtBinary<I32x4, int_wasm_extmul_high_unsigned, "extmul_high_i16x8_u", 0xbf>; + SIMDExtBinary<I32x4, extmul_high_u, "extmul_high_i16x8_u", 0xbf>; defm EXTMUL_LOW_S : - SIMDExtBinary<I64x2, int_wasm_extmul_low_signed, "extmul_low_i32x4_s", 0xdc>; + SIMDExtBinary<I64x2, extmul_low_s, "extmul_low_i32x4_s", 0xdc>; defm EXTMUL_HIGH_S : - SIMDExtBinary<I64x2, int_wasm_extmul_high_signed, "extmul_high_i32x4_s", 0xdd>; + SIMDExtBinary<I64x2, extmul_high_s, "extmul_high_i32x4_s", 0xdd>; defm EXTMUL_LOW_U : - SIMDExtBinary<I64x2, int_wasm_extmul_low_unsigned, "extmul_low_i32x4_u", 0xde>; + SIMDExtBinary<I64x2, extmul_low_u, "extmul_low_i32x4_u", 0xde>; defm EXTMUL_HIGH_U : - SIMDExtBinary<I64x2, int_wasm_extmul_high_unsigned, "extmul_high_i32x4_u", 0xdf>; + SIMDExtBinary<I64x2, extmul_high_u, "extmul_high_i32x4_u", 0xdf>; //===----------------------------------------------------------------------===// // Floating-point unary arithmetic @@ -1191,12 +1207,6 @@ defm "" : SIMDConvert<F64x2, I32x4, convert_low_u, "convert_low_i32x4_u", 0xff>; // Extending operations -def extend_t : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>; -def extend_low_s : SDNode<"WebAssemblyISD::EXTEND_LOW_S", extend_t>; -def extend_high_s : SDNode<"WebAssemblyISD::EXTEND_HIGH_S", extend_t>; -def extend_low_u : SDNode<"WebAssemblyISD::EXTEND_LOW_U", extend_t>; -def extend_high_u : SDNode<"WebAssemblyISD::EXTEND_HIGH_U", extend_t>; - // TODO: refactor this to be uniform for i64x2 if the numbering is not changed. multiclass SIMDExtend<Vec vec, bits<32> baseInst> { defm "" : SIMDConvert<vec, vec.split, extend_low_s, Index: llvm/include/llvm/IR/IntrinsicsWebAssembly.td =================================================================== --- llvm/include/llvm/IR/IntrinsicsWebAssembly.td +++ llvm/include/llvm/IR/IntrinsicsWebAssembly.td @@ -162,23 +162,6 @@ [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem, IntrSpeculatable]>; -def int_wasm_extmul_low_signed : - Intrinsic<[llvm_anyvector_ty], - [LLVMSubdivide2VectorType<0>, LLVMSubdivide2VectorType<0>], - [IntrNoMem, IntrSpeculatable]>; -def int_wasm_extmul_high_signed : - Intrinsic<[llvm_anyvector_ty], - [LLVMSubdivide2VectorType<0>, LLVMSubdivide2VectorType<0>], - [IntrNoMem, IntrSpeculatable]>; -def int_wasm_extmul_low_unsigned : - Intrinsic<[llvm_anyvector_ty], - [LLVMSubdivide2VectorType<0>, LLVMSubdivide2VectorType<0>], - [IntrNoMem, IntrSpeculatable]>; -def int_wasm_extmul_high_unsigned : - Intrinsic<[llvm_anyvector_ty], - [LLVMSubdivide2VectorType<0>, LLVMSubdivide2VectorType<0>], - [IntrNoMem, IntrSpeculatable]>; - def int_wasm_extadd_pairwise_signed : Intrinsic<[llvm_anyvector_ty], [LLVMSubdivide2VectorType<0>], Index: clang/test/Headers/wasm.c =================================================================== --- clang/test/Headers/wasm.c +++ clang/test/Headers/wasm.c @@ -2781,10 +2781,14 @@ // CHECK-LABEL: @test_i16x8_extmul_low_i8x16( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> +// CHECK-NEXT: [[VECINIT14_I_I:%.*]] = shufflevector <16 x i8> [[TMP0]], <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> +// CHECK-NEXT: [[CONV_I_I:%.*]] = sext <8 x i8> [[VECINIT14_I_I]] to <8 x i16> // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8> -// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.wasm.extmul.low.signed.v8i16(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR6]] -// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32> -// CHECK-NEXT: ret <4 x i32> [[TMP3]] +// CHECK-NEXT: [[VECINIT14_I2_I:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> +// CHECK-NEXT: [[CONV_I3_I:%.*]] = sext <8 x i8> [[VECINIT14_I2_I]] to <8 x i16> +// CHECK-NEXT: [[MUL_I:%.*]] = mul nsw <8 x i16> [[CONV_I3_I]], [[CONV_I_I]] +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[MUL_I]] to <4 x i32> +// CHECK-NEXT: ret <4 x i32> [[TMP2]] // v128_t test_i16x8_extmul_low_i8x16(v128_t a, v128_t b) { return wasm_i16x8_extmul_low_i8x16(a, b); @@ -2793,10 +2797,14 @@ // CHECK-LABEL: @test_i16x8_extmul_high_i8x16( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> +// CHECK-NEXT: [[VECINIT14_I_I:%.*]] = shufflevector <16 x i8> [[TMP0]], <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> +// CHECK-NEXT: [[CONV_I_I:%.*]] = sext <8 x i8> [[VECINIT14_I_I]] to <8 x i16> // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8> -// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.wasm.extmul.high.signed.v8i16(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR6]] -// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32> -// CHECK-NEXT: ret <4 x i32> [[TMP3]] +// CHECK-NEXT: [[VECINIT14_I2_I:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> +// CHECK-NEXT: [[CONV_I3_I:%.*]] = sext <8 x i8> [[VECINIT14_I2_I]] to <8 x i16> +// CHECK-NEXT: [[MUL_I:%.*]] = mul nsw <8 x i16> [[CONV_I3_I]], [[CONV_I_I]] +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[MUL_I]] to <4 x i32> +// CHECK-NEXT: ret <4 x i32> [[TMP2]] // v128_t test_i16x8_extmul_high_i8x16(v128_t a, v128_t b) { return wasm_i16x8_extmul_high_i8x16(a, b); @@ -2805,10 +2813,14 @@ // CHECK-LABEL: @test_u16x8_extmul_low_u8x16( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> +// CHECK-NEXT: [[VECINIT14_I_I:%.*]] = shufflevector <16 x i8> [[TMP0]], <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> +// CHECK-NEXT: [[CONV_I_I:%.*]] = zext <8 x i8> [[VECINIT14_I_I]] to <8 x i16> // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8> -// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.wasm.extmul.low.unsigned.v8i16(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR6]] -// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32> -// CHECK-NEXT: ret <4 x i32> [[TMP3]] +// CHECK-NEXT: [[VECINIT14_I2_I:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> +// CHECK-NEXT: [[CONV_I3_I:%.*]] = zext <8 x i8> [[VECINIT14_I2_I]] to <8 x i16> +// CHECK-NEXT: [[MUL_I:%.*]] = mul nuw <8 x i16> [[CONV_I3_I]], [[CONV_I_I]] +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[MUL_I]] to <4 x i32> +// CHECK-NEXT: ret <4 x i32> [[TMP2]] // v128_t test_u16x8_extmul_low_u8x16(v128_t a, v128_t b) { return wasm_u16x8_extmul_low_u8x16(a, b); @@ -2817,10 +2829,14 @@ // CHECK-LABEL: @test_u16x8_extmul_high_u8x16( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> +// CHECK-NEXT: [[VECINIT14_I_I:%.*]] = shufflevector <16 x i8> [[TMP0]], <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> +// CHECK-NEXT: [[CONV_I_I:%.*]] = zext <8 x i8> [[VECINIT14_I_I]] to <8 x i16> // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8> -// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.wasm.extmul.high.unsigned.v8i16(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR6]] -// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32> -// CHECK-NEXT: ret <4 x i32> [[TMP3]] +// CHECK-NEXT: [[VECINIT14_I2_I:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> +// CHECK-NEXT: [[CONV_I3_I:%.*]] = zext <8 x i8> [[VECINIT14_I2_I]] to <8 x i16> +// CHECK-NEXT: [[MUL_I:%.*]] = mul nuw <8 x i16> [[CONV_I3_I]], [[CONV_I_I]] +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[MUL_I]] to <4 x i32> +// CHECK-NEXT: ret <4 x i32> [[TMP2]] // v128_t test_u16x8_extmul_high_u8x16(v128_t a, v128_t b) { return wasm_u16x8_extmul_high_u8x16(a, b); @@ -2829,9 +2845,13 @@ // CHECK-LABEL: @test_i32x4_extmul_low_i16x8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16> +// CHECK-NEXT: [[VECINIT6_I_I:%.*]] = shufflevector <8 x i16> [[TMP0]], <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> +// CHECK-NEXT: [[CONV_I_I:%.*]] = sext <4 x i16> [[VECINIT6_I_I]] to <4 x i32> // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16> -// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.wasm.extmul.low.signed.v4i32(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR6]] -// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// CHECK-NEXT: [[VECINIT6_I2_I:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> +// CHECK-NEXT: [[CONV_I3_I:%.*]] = sext <4 x i16> [[VECINIT6_I2_I]] to <4 x i32> +// CHECK-NEXT: [[MUL_I:%.*]] = mul nsw <4 x i32> [[CONV_I3_I]], [[CONV_I_I]] +// CHECK-NEXT: ret <4 x i32> [[MUL_I]] // v128_t test_i32x4_extmul_low_i16x8(v128_t a, v128_t b) { return wasm_i32x4_extmul_low_i16x8(a, b); @@ -2840,9 +2860,13 @@ // CHECK-LABEL: @test_i32x4_extmul_high_i16x8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16> +// CHECK-NEXT: [[VECINIT6_I_I:%.*]] = shufflevector <8 x i16> [[TMP0]], <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> +// CHECK-NEXT: [[CONV_I_I:%.*]] = sext <4 x i16> [[VECINIT6_I_I]] to <4 x i32> // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16> -// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.wasm.extmul.high.signed.v4i32(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR6]] -// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// CHECK-NEXT: [[VECINIT6_I2_I:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> +// CHECK-NEXT: [[CONV_I3_I:%.*]] = sext <4 x i16> [[VECINIT6_I2_I]] to <4 x i32> +// CHECK-NEXT: [[MUL_I:%.*]] = mul nsw <4 x i32> [[CONV_I3_I]], [[CONV_I_I]] +// CHECK-NEXT: ret <4 x i32> [[MUL_I]] // v128_t test_i32x4_extmul_high_i16x8(v128_t a, v128_t b) { return wasm_i32x4_extmul_high_i16x8(a, b); @@ -2851,9 +2875,13 @@ // CHECK-LABEL: @test_u32x4_extmul_low_u16x8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16> +// CHECK-NEXT: [[VECINIT6_I_I:%.*]] = shufflevector <8 x i16> [[TMP0]], <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> +// CHECK-NEXT: [[CONV_I_I:%.*]] = zext <4 x i16> [[VECINIT6_I_I]] to <4 x i32> // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16> -// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.wasm.extmul.low.unsigned.v4i32(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR6]] -// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// CHECK-NEXT: [[VECINIT6_I2_I:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> +// CHECK-NEXT: [[CONV_I3_I:%.*]] = zext <4 x i16> [[VECINIT6_I2_I]] to <4 x i32> +// CHECK-NEXT: [[MUL_I:%.*]] = mul nuw <4 x i32> [[CONV_I3_I]], [[CONV_I_I]] +// CHECK-NEXT: ret <4 x i32> [[MUL_I]] // v128_t test_u32x4_extmul_low_u16x8(v128_t a, v128_t b) { return wasm_u32x4_extmul_low_u16x8(a, b); @@ -2862,9 +2890,13 @@ // CHECK-LABEL: @test_u32x4_extmul_high_u16x8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16> +// CHECK-NEXT: [[VECINIT6_I_I:%.*]] = shufflevector <8 x i16> [[TMP0]], <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> +// CHECK-NEXT: [[CONV_I_I:%.*]] = zext <4 x i16> [[VECINIT6_I_I]] to <4 x i32> // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16> -// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.wasm.extmul.high.unsigned.v4i32(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR6]] -// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// CHECK-NEXT: [[VECINIT6_I2_I:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> +// CHECK-NEXT: [[CONV_I3_I:%.*]] = zext <4 x i16> [[VECINIT6_I2_I]] to <4 x i32> +// CHECK-NEXT: [[MUL_I:%.*]] = mul nuw <4 x i32> [[CONV_I3_I]], [[CONV_I_I]] +// CHECK-NEXT: ret <4 x i32> [[MUL_I]] // v128_t test_u32x4_extmul_high_u16x8(v128_t a, v128_t b) { return wasm_u32x4_extmul_high_u16x8(a, b); @@ -2872,9 +2904,13 @@ // CHECK-LABEL: @test_i64x2_extmul_low_i32x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.wasm.extmul.low.signed.v2i64(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) #[[ATTR6]] -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <4 x i32> -// CHECK-NEXT: ret <4 x i32> [[TMP1]] +// CHECK-NEXT: [[VECINIT2_I_I:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> undef, <2 x i32> <i32 0, i32 1> +// CHECK-NEXT: [[CONV_I_I:%.*]] = sext <2 x i32> [[VECINIT2_I_I]] to <2 x i64> +// CHECK-NEXT: [[VECINIT2_I2_I:%.*]] = shufflevector <4 x i32> [[B:%.*]], <4 x i32> undef, <2 x i32> <i32 0, i32 1> +// CHECK-NEXT: [[CONV_I3_I:%.*]] = sext <2 x i32> [[VECINIT2_I2_I]] to <2 x i64> +// CHECK-NEXT: [[MUL_I:%.*]] = mul nsw <2 x i64> [[CONV_I3_I]], [[CONV_I_I]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[MUL_I]] to <4 x i32> +// CHECK-NEXT: ret <4 x i32> [[TMP0]] // v128_t test_i64x2_extmul_low_i32x4(v128_t a, v128_t b) { return wasm_i64x2_extmul_low_i32x4(a, b); @@ -2882,9 +2918,13 @@ // CHECK-LABEL: @test_i64x2_extmul_high_i32x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.wasm.extmul.high.signed.v2i64(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) #[[ATTR6]] -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <4 x i32> -// CHECK-NEXT: ret <4 x i32> [[TMP1]] +// CHECK-NEXT: [[VECINIT2_I_I:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> undef, <2 x i32> <i32 2, i32 3> +// CHECK-NEXT: [[CONV_I_I:%.*]] = sext <2 x i32> [[VECINIT2_I_I]] to <2 x i64> +// CHECK-NEXT: [[VECINIT2_I2_I:%.*]] = shufflevector <4 x i32> [[B:%.*]], <4 x i32> undef, <2 x i32> <i32 2, i32 3> +// CHECK-NEXT: [[CONV_I3_I:%.*]] = sext <2 x i32> [[VECINIT2_I2_I]] to <2 x i64> +// CHECK-NEXT: [[MUL_I:%.*]] = mul nsw <2 x i64> [[CONV_I3_I]], [[CONV_I_I]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[MUL_I]] to <4 x i32> +// CHECK-NEXT: ret <4 x i32> [[TMP0]] // v128_t test_i64x2_extmul_high_i32x4(v128_t a, v128_t b) { return wasm_i64x2_extmul_high_i32x4(a, b); @@ -2892,9 +2932,13 @@ // CHECK-LABEL: @test_u64x2_extmul_low_u32x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.wasm.extmul.low.unsigned.v2i64(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) #[[ATTR6]] -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <4 x i32> -// CHECK-NEXT: ret <4 x i32> [[TMP1]] +// CHECK-NEXT: [[VECINIT2_I_I:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> undef, <2 x i32> <i32 0, i32 1> +// CHECK-NEXT: [[CONV_I_I:%.*]] = zext <2 x i32> [[VECINIT2_I_I]] to <2 x i64> +// CHECK-NEXT: [[VECINIT2_I2_I:%.*]] = shufflevector <4 x i32> [[B:%.*]], <4 x i32> undef, <2 x i32> <i32 0, i32 1> +// CHECK-NEXT: [[CONV_I3_I:%.*]] = zext <2 x i32> [[VECINIT2_I2_I]] to <2 x i64> +// CHECK-NEXT: [[MUL_I:%.*]] = mul nuw <2 x i64> [[CONV_I3_I]], [[CONV_I_I]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[MUL_I]] to <4 x i32> +// CHECK-NEXT: ret <4 x i32> [[TMP0]] // v128_t test_u64x2_extmul_low_u32x4(v128_t a, v128_t b) { return wasm_u64x2_extmul_low_u32x4(a, b); @@ -2902,9 +2946,13 @@ // CHECK-LABEL: @test_u64x2_extmul_high_u32x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.wasm.extmul.high.unsigned.v2i64(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) #[[ATTR6]] -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <4 x i32> -// CHECK-NEXT: ret <4 x i32> [[TMP1]] +// CHECK-NEXT: [[VECINIT2_I_I:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> undef, <2 x i32> <i32 2, i32 3> +// CHECK-NEXT: [[CONV_I_I:%.*]] = zext <2 x i32> [[VECINIT2_I_I]] to <2 x i64> +// CHECK-NEXT: [[VECINIT2_I2_I:%.*]] = shufflevector <4 x i32> [[B:%.*]], <4 x i32> undef, <2 x i32> <i32 2, i32 3> +// CHECK-NEXT: [[CONV_I3_I:%.*]] = zext <2 x i32> [[VECINIT2_I2_I]] to <2 x i64> +// CHECK-NEXT: [[MUL_I:%.*]] = mul nuw <2 x i64> [[CONV_I3_I]], [[CONV_I_I]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[MUL_I]] to <4 x i32> +// CHECK-NEXT: ret <4 x i32> [[TMP0]] // v128_t test_u64x2_extmul_high_u32x4(v128_t a, v128_t b) { return wasm_u64x2_extmul_high_u32x4(a, b); Index: clang/test/CodeGen/builtins-wasm.c =================================================================== --- clang/test/CodeGen/builtins-wasm.c +++ clang/test/CodeGen/builtins-wasm.c @@ -379,90 +379,6 @@ // WEBASSEMBLY-NEXT: ret } -i16x8 extmul_low_i8x16_s_i16x8(i8x16 x, i8x16 y) { - return __builtin_wasm_extmul_low_i8x16_s_i16x8(x, y); - // WEBASSEMBLY: call <8 x i16> @llvm.wasm.extmul.low.signed.v8i16( - // WEBASSEMBLY-SAME: <16 x i8> %x, <16 x i8> %y) - // WEBASSEMBLY-NEXT: ret -} - -i16x8 extmul_high_i8x16_s_i16x8(i8x16 x, i8x16 y) { - return __builtin_wasm_extmul_high_i8x16_s_i16x8(x, y); - // WEBASSEMBLY: call <8 x i16> @llvm.wasm.extmul.high.signed.v8i16( - // WEBASSEMBLY-SAME: <16 x i8> %x, <16 x i8> %y) - // WEBASSEMBLY-NEXT: ret -} - -u16x8 extmul_low_i8x16_u_i16x8(u8x16 x, u8x16 y) { - return __builtin_wasm_extmul_low_i8x16_u_i16x8(x, y); - // WEBASSEMBLY: call <8 x i16> @llvm.wasm.extmul.low.unsigned.v8i16( - // WEBASSEMBLY-SAME: <16 x i8> %x, <16 x i8> %y) - // WEBASSEMBLY-NEXT: ret -} - -u16x8 extmul_high_i8x16_u_i16x8(u8x16 x, u8x16 y) { - return __builtin_wasm_extmul_high_i8x16_u_i16x8(x, y); - // WEBASSEMBLY: call <8 x i16> @llvm.wasm.extmul.high.unsigned.v8i16( - // WEBASSEMBLY-SAME: <16 x i8> %x, <16 x i8> %y) - // WEBASSEMBLY-NEXT: ret -} - -i32x4 extmul_low_i16x8_s_i32x4(i16x8 x, i16x8 y) { - return __builtin_wasm_extmul_low_i16x8_s_i32x4(x, y); - // WEBASSEMBLY: call <4 x i32> @llvm.wasm.extmul.low.signed.v4i32( - // WEBASSEMBLY-SAME: <8 x i16> %x, <8 x i16> %y) - // WEBASSEMBLY-NEXT: ret -} - -i32x4 extmul_high_i16x8_s_i32x4(i16x8 x, i16x8 y) { - return __builtin_wasm_extmul_high_i16x8_s_i32x4(x, y); - // WEBASSEMBLY: call <4 x i32> @llvm.wasm.extmul.high.signed.v4i32( - // WEBASSEMBLY-SAME: <8 x i16> %x, <8 x i16> %y) - // WEBASSEMBLY-NEXT: ret -} - -u32x4 extmul_low_i16x8_u_i32x4(u16x8 x, u16x8 y) { - return __builtin_wasm_extmul_low_i16x8_u_i32x4(x, y); - // WEBASSEMBLY: call <4 x i32> @llvm.wasm.extmul.low.unsigned.v4i32( - // WEBASSEMBLY-SAME: <8 x i16> %x, <8 x i16> %y) - // WEBASSEMBLY-NEXT: ret -} - -u32x4 extmul_high_i16x8_u_i32x4(u16x8 x, u16x8 y) { - return __builtin_wasm_extmul_high_i16x8_u_i32x4(x, y); - // WEBASSEMBLY: call <4 x i32> @llvm.wasm.extmul.high.unsigned.v4i32( - // WEBASSEMBLY-SAME: <8 x i16> %x, <8 x i16> %y) - // WEBASSEMBLY-NEXT: ret -} - -i64x2 extmul_low_i32x4_s_i64x2(i32x4 x, i32x4 y) { - return __builtin_wasm_extmul_low_i32x4_s_i64x2(x, y); - // WEBASSEMBLY: call <2 x i64> @llvm.wasm.extmul.low.signed.v2i64( - // WEBASSEMBLY-SAME: <4 x i32> %x, <4 x i32> %y) - // WEBASSEMBLY-NEXT: ret -} - -i64x2 extmul_high_i32x4_s_i64x2(i32x4 x, i32x4 y) { - return __builtin_wasm_extmul_high_i32x4_s_i64x2(x, y); - // WEBASSEMBLY: call <2 x i64> @llvm.wasm.extmul.high.signed.v2i64( - // WEBASSEMBLY-SAME: <4 x i32> %x, <4 x i32> %y) - // WEBASSEMBLY-NEXT: ret -} - -u64x2 extmul_low_i32x4_u_i64x2(u32x4 x, u32x4 y) { - return __builtin_wasm_extmul_low_i32x4_u_i64x2(x, y); - // WEBASSEMBLY: call <2 x i64> @llvm.wasm.extmul.low.unsigned.v2i64( - // WEBASSEMBLY-SAME: <4 x i32> %x, <4 x i32> %y) - // WEBASSEMBLY-NEXT: ret -} - -u64x2 extmul_high_i32x4_u_i64x2(u32x4 x, u32x4 y) { - return __builtin_wasm_extmul_high_i32x4_u_i64x2(x, y); - // WEBASSEMBLY: call <2 x i64> @llvm.wasm.extmul.high.unsigned.v2i64( - // WEBASSEMBLY-SAME: <4 x i32> %x, <4 x i32> %y) - // WEBASSEMBLY-NEXT: ret -} - i16x8 extadd_pairwise_i8x16_s_i16x8(i8x16 v) { return __builtin_wasm_extadd_pairwise_i8x16_s_i16x8(v); // WEBASSEMBLY: call <8 x i16> @llvm.wasm.extadd.pairwise.signed.v8i16( Index: clang/lib/Headers/wasm_simd128.h =================================================================== --- clang/lib/Headers/wasm_simd128.h +++ clang/lib/Headers/wasm_simd128.h @@ -1455,74 +1455,74 @@ static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_extmul_low_i8x16(v128_t __a, v128_t __b) { - return (v128_t)__builtin_wasm_extmul_low_i8x16_s_i16x8((__i8x16)__a, - (__i8x16)__b); + return (v128_t)((__i16x8)wasm_i16x8_extend_low_i8x16(__a) * + (__i16x8)wasm_i16x8_extend_low_i8x16(__b)); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_extmul_high_i8x16(v128_t __a, v128_t __b) { - return (v128_t)__builtin_wasm_extmul_high_i8x16_s_i16x8((__i8x16)__a, - (__i8x16)__b); + return (v128_t)((__i16x8)wasm_i16x8_extend_high_i8x16(__a) * + (__i16x8)wasm_i16x8_extend_high_i8x16(__b)); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u16x8_extmul_low_u8x16(v128_t __a, v128_t __b) { - return (v128_t)__builtin_wasm_extmul_low_i8x16_u_i16x8((__u8x16)__a, - (__u8x16)__b); + return (v128_t)((__u16x8)wasm_u16x8_extend_low_u8x16(__a) * + (__u16x8)wasm_u16x8_extend_low_u8x16(__b)); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u16x8_extmul_high_u8x16(v128_t __a, v128_t __b) { - return (v128_t)__builtin_wasm_extmul_high_i8x16_u_i16x8((__u8x16)__a, - (__u8x16)__b); + return (v128_t)((__u16x8)wasm_u16x8_extend_high_u8x16(__a) * + (__u16x8)wasm_u16x8_extend_high_u8x16(__b)); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_extmul_low_i16x8(v128_t __a, v128_t __b) { - return (v128_t)__builtin_wasm_extmul_low_i16x8_s_i32x4((__i16x8)__a, - (__i16x8)__b); + return (v128_t)((__i32x4)wasm_i32x4_extend_low_i16x8(__a) * + (__i32x4)wasm_i32x4_extend_low_i16x8(__b)); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_extmul_high_i16x8(v128_t __a, v128_t __b) { - return (v128_t)__builtin_wasm_extmul_high_i16x8_s_i32x4((__i16x8)__a, - (__i16x8)__b); + return (v128_t)((__i32x4)wasm_i32x4_extend_high_i16x8(__a) * + (__i32x4)wasm_i32x4_extend_high_i16x8(__b)); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u32x4_extmul_low_u16x8(v128_t __a, v128_t __b) { - return (v128_t)__builtin_wasm_extmul_low_i16x8_u_i32x4((__u16x8)__a, - (__u16x8)__b); + return (v128_t)((__u32x4)wasm_u32x4_extend_low_u16x8(__a) * + (__u32x4)wasm_u32x4_extend_low_u16x8(__b)); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u32x4_extmul_high_u16x8(v128_t __a, v128_t __b) { - return (v128_t)__builtin_wasm_extmul_high_i16x8_u_i32x4((__u16x8)__a, - (__u16x8)__b); + return (v128_t)((__u32x4)wasm_u32x4_extend_high_u16x8(__a) * + (__u32x4)wasm_u32x4_extend_high_u16x8(__b)); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i64x2_extmul_low_i32x4(v128_t __a, v128_t __b) { - return (v128_t)__builtin_wasm_extmul_low_i32x4_s_i64x2((__i32x4)__a, - (__i32x4)__b); + return (v128_t)((__i64x2)wasm_i64x2_extend_low_i32x4(__a) * + (__i64x2)wasm_i64x2_extend_low_i32x4(__b)); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i64x2_extmul_high_i32x4(v128_t __a, v128_t __b) { - return (v128_t)__builtin_wasm_extmul_high_i32x4_s_i64x2((__i32x4)__a, - (__i32x4)__b); + return (v128_t)((__i64x2)wasm_i64x2_extend_high_i32x4(__a) * + (__i64x2)wasm_i64x2_extend_high_i32x4(__b)); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u64x2_extmul_low_u32x4(v128_t __a, v128_t __b) { - return (v128_t)__builtin_wasm_extmul_low_i32x4_u_i64x2((__u32x4)__a, - (__u32x4)__b); + return (v128_t)((__u64x2)wasm_u64x2_extend_low_u32x4(__a) * + (__u64x2)wasm_u64x2_extend_low_u32x4(__b)); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u64x2_extmul_high_u32x4(v128_t __a, v128_t __b) { - return (v128_t)__builtin_wasm_extmul_high_i32x4_u_i64x2((__u32x4)__a, - (__u32x4)__b); + return (v128_t)((__u64x2)wasm_u64x2_extend_high_u32x4(__a) * + (__u64x2)wasm_u64x2_extend_high_u32x4(__b)); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_q15mulr_sat(v128_t __a, Index: clang/lib/CodeGen/CGBuiltin.cpp =================================================================== --- clang/lib/CodeGen/CGBuiltin.cpp +++ clang/lib/CodeGen/CGBuiltin.cpp @@ -17727,49 +17727,6 @@ Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_q15mulr_sat_signed); return Builder.CreateCall(Callee, {LHS, RHS}); } - case WebAssembly::BI__builtin_wasm_extmul_low_i8x16_s_i16x8: - case WebAssembly::BI__builtin_wasm_extmul_high_i8x16_s_i16x8: - case WebAssembly::BI__builtin_wasm_extmul_low_i8x16_u_i16x8: - case WebAssembly::BI__builtin_wasm_extmul_high_i8x16_u_i16x8: - case WebAssembly::BI__builtin_wasm_extmul_low_i16x8_s_i32x4: - case WebAssembly::BI__builtin_wasm_extmul_high_i16x8_s_i32x4: - case WebAssembly::BI__builtin_wasm_extmul_low_i16x8_u_i32x4: - case WebAssembly::BI__builtin_wasm_extmul_high_i16x8_u_i32x4: - case WebAssembly::BI__builtin_wasm_extmul_low_i32x4_s_i64x2: - case WebAssembly::BI__builtin_wasm_extmul_high_i32x4_s_i64x2: - case WebAssembly::BI__builtin_wasm_extmul_low_i32x4_u_i64x2: - case WebAssembly::BI__builtin_wasm_extmul_high_i32x4_u_i64x2: { - Value *LHS = EmitScalarExpr(E->getArg(0)); - Value *RHS = EmitScalarExpr(E->getArg(1)); - unsigned IntNo; - switch (BuiltinID) { - case WebAssembly::BI__builtin_wasm_extmul_low_i8x16_s_i16x8: - case WebAssembly::BI__builtin_wasm_extmul_low_i16x8_s_i32x4: - case WebAssembly::BI__builtin_wasm_extmul_low_i32x4_s_i64x2: - IntNo = Intrinsic::wasm_extmul_low_signed; - break; - case WebAssembly::BI__builtin_wasm_extmul_low_i8x16_u_i16x8: - case WebAssembly::BI__builtin_wasm_extmul_low_i16x8_u_i32x4: - case WebAssembly::BI__builtin_wasm_extmul_low_i32x4_u_i64x2: - IntNo = Intrinsic::wasm_extmul_low_unsigned; - break; - case WebAssembly::BI__builtin_wasm_extmul_high_i8x16_s_i16x8: - case WebAssembly::BI__builtin_wasm_extmul_high_i16x8_s_i32x4: - case WebAssembly::BI__builtin_wasm_extmul_high_i32x4_s_i64x2: - IntNo = Intrinsic::wasm_extmul_high_signed; - break; - case WebAssembly::BI__builtin_wasm_extmul_high_i8x16_u_i16x8: - case WebAssembly::BI__builtin_wasm_extmul_high_i16x8_u_i32x4: - case WebAssembly::BI__builtin_wasm_extmul_high_i32x4_u_i64x2: - IntNo = Intrinsic::wasm_extmul_high_unsigned; - break; - default: - llvm_unreachable("unexptected builtin ID"); - } - - Function *Callee = CGM.getIntrinsic(IntNo, ConvertType(E->getType())); - return Builder.CreateCall(Callee, {LHS, RHS}); - } case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_s_i16x8: case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_u_i16x8: case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_s_i32x4: Index: clang/include/clang/Basic/BuiltinsWebAssembly.def =================================================================== --- clang/include/clang/Basic/BuiltinsWebAssembly.def +++ clang/include/clang/Basic/BuiltinsWebAssembly.def @@ -103,21 +103,6 @@ TARGET_BUILTIN(__builtin_wasm_q15mulr_sat_s_i16x8, "V8sV8sV8s", "nc", "simd128") -TARGET_BUILTIN(__builtin_wasm_extmul_low_i8x16_s_i16x8, "V8sV16ScV16Sc", "nc", "simd128") -TARGET_BUILTIN(__builtin_wasm_extmul_high_i8x16_s_i16x8, "V8sV16ScV16Sc", "nc", "simd128") -TARGET_BUILTIN(__builtin_wasm_extmul_low_i8x16_u_i16x8, "V8UsV16UcV16Uc", "nc", "simd128") -TARGET_BUILTIN(__builtin_wasm_extmul_high_i8x16_u_i16x8, "V8UsV16UcV16Uc", "nc", "simd128") - -TARGET_BUILTIN(__builtin_wasm_extmul_low_i16x8_s_i32x4, "V4iV8sV8s", "nc", "simd128") -TARGET_BUILTIN(__builtin_wasm_extmul_high_i16x8_s_i32x4, "V4iV8sV8s", "nc", "simd128") -TARGET_BUILTIN(__builtin_wasm_extmul_low_i16x8_u_i32x4, "V4UiV8UsV8Us", "nc", "simd128") -TARGET_BUILTIN(__builtin_wasm_extmul_high_i16x8_u_i32x4, "V4UiV8UsV8Us", "nc", "simd128") - -TARGET_BUILTIN(__builtin_wasm_extmul_low_i32x4_s_i64x2, "V2LLiV4iV4i", "nc", "simd128") -TARGET_BUILTIN(__builtin_wasm_extmul_high_i32x4_s_i64x2, "V2LLiV4iV4i", "nc", "simd128") -TARGET_BUILTIN(__builtin_wasm_extmul_low_i32x4_u_i64x2, "V2ULLiV4UiV4Ui", "nc", "simd128") -TARGET_BUILTIN(__builtin_wasm_extmul_high_i32x4_u_i64x2, "V2ULLiV4UiV4Ui", "nc", "simd128") - TARGET_BUILTIN(__builtin_wasm_extadd_pairwise_i8x16_s_i16x8, "V8sV16Sc", "nc", "simd128") TARGET_BUILTIN(__builtin_wasm_extadd_pairwise_i8x16_u_i16x8, "V8UsV16Uc", "nc", "simd128")
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits