tlively created this revision. tlively added reviewers: aheejin, dschuff. Herald added subscribers: llvm-commits, cfe-commits, sunfish, hiraditya, jgravelle-google, sbc100. Herald added projects: clang, LLVM.
Although using `__builtin_shufflevector` and the `shufflevector` instruction works fine, they are not opaque to the optimizer. As a result, DAGCombine can potentially reduce the number of shuffles and change the shuffle masks. This is unexpected behavior for users of the WebAssembly SIMD intrinsics, so this CL solves the problem by adding a new shuffle intrinsic that is opaque to the optimizers. Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D66983 Files: clang/include/clang/Basic/BuiltinsWebAssembly.def clang/lib/CodeGen/CGBuiltin.cpp clang/test/CodeGen/builtins-wasm.c llvm/include/llvm/IR/IntrinsicsWebAssembly.td llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll
Index: llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll =================================================================== --- llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll +++ llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll @@ -87,6 +87,36 @@ ret <16 x i8> %a } +; CHECK-LABEL: shuffle_v16i8: +; NO-SIMD128-NOT: v8x16 +; SIMD128-NEXT: .functype shuffle_v16i8 (v128, v128) -> (v128){{$}} +; SIMD128-NEXT: v8x16.shuffle $push[[R:[0-9]+]]=, $0, $1, +; SIMD128-SAME: 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15{{$}} +; SIMD128-NEXT: return $pop[[R]]{{$}} +declare <16 x i8> @llvm.wasm.shuffle( + <16 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, + i32, i32, i32, i32, i32) +define <16 x i8> @shuffle_v16i8(<16 x i8> %x, <16 x i8> %y) { + %res = call <16 x i8> @llvm.wasm.shuffle(<16 x i8> %x, <16 x i8> %y, + i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, + i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15) + ret <16 x i8> %res +} + +; CHECK-LABEL: shuffle_undef_v16i8: +; NO-SIMD128-NOT: v8x16 +; SIMD128-NEXT: .functype shuffle_undef_v16i8 (v128, v128) -> (v128){{$}} +; SIMD128-NEXT: v8x16.shuffle $push[[R:[0-9]+]]=, $0, $1, +; SIMD128-SAME: 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2{{$}} +; SIMD128-NEXT: return $pop[[R]]{{$}} +define <16 x i8> @shuffle_undef_v16i8(<16 x i8> %x, <16 x i8> %y) { + %res = call <16 x i8> @llvm.wasm.shuffle(<16 x i8> %x, <16 x i8> %y, + i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, + i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, + i32 undef, i32 undef, i32 undef, i32 2) + ret <16 x i8> %res +} + ; ============================================================================== ; 8 x i16 ; ============================================================================== Index: llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp =================================================================== --- llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -1235,6 +1235,20 @@ Op.getOperand(3) // thrown value }); } + + case Intrinsic::wasm_shuffle: { + // Drop in-chain and replace undefs, but otherwise pass through unchanged + SDValue Ops[18]; + size_t OpIdx = 0; + Ops[OpIdx++] = Op.getOperand(1); + Ops[OpIdx++] = Op.getOperand(2); + while (OpIdx < 18) { + const SDValue &MaskIdx = Op.getOperand(OpIdx + 1); + Ops[OpIdx++] = + MaskIdx.isUndef() ? DAG.getConstant(0, DL, MVT::i32) : MaskIdx; + } + return DAG.getNode(WebAssemblyISD::SHUFFLE, DL, Op.getValueType(), Ops); + } } } Index: llvm/include/llvm/IR/IntrinsicsWebAssembly.td =================================================================== --- llvm/include/llvm/IR/IntrinsicsWebAssembly.td +++ llvm/include/llvm/IR/IntrinsicsWebAssembly.td @@ -109,6 +109,13 @@ Intrinsic<[llvm_i32_ty], [llvm_anyvector_ty], [IntrNoMem, IntrSpeculatable]>; +def int_wasm_shuffle : + Intrinsic<[llvm_v16i8_ty], + [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrSpeculatable]>; //===----------------------------------------------------------------------===// // Bulk memory intrinsics Index: clang/test/CodeGen/builtins-wasm.c =================================================================== --- clang/test/CodeGen/builtins-wasm.c +++ clang/test/CodeGen/builtins-wasm.c @@ -435,3 +435,13 @@ // WEBASSEMBLY: call <2 x i64> @llvm.wasm.trunc.saturate.unsigned.v2i64.v2f64(<2 x double> %f) // WEBASSEMBLY-NEXT: ret } + +i8x16 shuffle(i8x16 x, i8x16 y) { + return __builtin_wasm_shuffle_v8x16(x, y, 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15); + // WEBASSEMBLY: call <16 x i8> @llvm.wasm.shuffle(<16 x i8> %x, <16 x i8> %y, + // WEBASSEMBLY-SAME: i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, + // WEBASSEMBLY-SAME: i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, + // WEBASSEMBLY-SAME: i32 15 + // WEBASSEMBLY-NEXT: ret +} Index: clang/lib/CodeGen/CGBuiltin.cpp =================================================================== --- clang/lib/CodeGen/CGBuiltin.cpp +++ clang/lib/CodeGen/CGBuiltin.cpp @@ -14173,7 +14173,20 @@ Function *Callee = CGM.getIntrinsic(Intrinsic::sqrt, Vec->getType()); return Builder.CreateCall(Callee, {Vec}); } - + case WebAssembly::BI__builtin_wasm_shuffle_v8x16: { + Value *Ops[18]; + size_t OpIdx = 0; + Ops[OpIdx++] = EmitScalarExpr(E->getArg(0)); + Ops[OpIdx++] = EmitScalarExpr(E->getArg(1)); + while (OpIdx < 18) { + llvm::APSInt LaneConst; + if (!E->getArg(OpIdx)->isIntegerConstantExpr(LaneConst, getContext())) + llvm_unreachable("Constant arg isn't actually constant?"); + Ops[OpIdx++] = llvm::ConstantInt::get(getLLVMContext(), LaneConst); + } + Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_shuffle); + return Builder.CreateCall(Callee, Ops); + } default: return nullptr; } Index: clang/include/clang/Basic/BuiltinsWebAssembly.def =================================================================== --- clang/include/clang/Basic/BuiltinsWebAssembly.def +++ clang/include/clang/Basic/BuiltinsWebAssembly.def @@ -113,5 +113,7 @@ TARGET_BUILTIN(__builtin_wasm_trunc_saturate_s_i64x2_f64x2, "V2LLiV2d", "nc", "unimplemented-simd128") TARGET_BUILTIN(__builtin_wasm_trunc_saturate_u_i64x2_f64x2, "V2LLiV2d", "nc", "unimplemented-simd128") +TARGET_BUILTIN(__builtin_wasm_shuffle_v8x16, "V16cV16cV16cIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIi", "nc", "simd128") + #undef BUILTIN #undef TARGET_BUILTIN
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits