tlively updated this revision to Diff 357983.
tlively added a comment.
- Check types of splats as well
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D105755/new/
https://reviews.llvm.org/D105755
Files:
clang/include/clang/Basic/BuiltinsWebAssembly.def
clang/lib/CodeGen/CGBuiltin.cpp
clang/lib/Headers/wasm_simd128.h
clang/test/CodeGen/builtins-wasm.c
clang/test/Headers/wasm.c
llvm/include/llvm/IR/IntrinsicsWebAssembly.td
llvm/lib/Target/WebAssembly/WebAssemblyISD.def
llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
llvm/test/CodeGen/WebAssembly/simd-conversions.ll
llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll
Index: llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll
===================================================================
--- llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll
+++ llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128 | FileCheck %s
+; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128 | FileCheck %s --check-prefixes=CHECK,SLOW
; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128 -fast-isel | FileCheck %s
; Test that SIMD128 intrinsics lower as expected. These intrinsics are
@@ -542,6 +542,18 @@
ret <4 x i32> %a
}
+; CHECK-LABEL: trunc_sat_zero_s_v4i32_2:
+; CHECK-NEXT: .functype trunc_sat_zero_s_v4i32_2 (v128) -> (v128){{$}}
+; SLOW-NEXT: i32x4.trunc_sat_zero_f64x2_s $push[[R:[0-9]+]]=, $0{{$}}
+; SLOW-NEXT: return $pop[[R]]{{$}}
+declare <4 x i32> @llvm.fptosi.sat.v4i32.v4f64(<4 x double>)
+define <4 x i32> @trunc_sat_zero_s_v4i32_2(<2 x double> %x) {
+ %v = shufflevector <2 x double> %x, <2 x double> zeroinitializer,
+ <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %a = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f64(<4 x double> %v)
+ ret <4 x i32> %a
+}
+
; CHECK-LABEL: trunc_sat_zero_u_v4i32:
; CHECK-NEXT: .functype trunc_sat_zero_u_v4i32 (v128) -> (v128){{$}}
; CHECK-NEXT: i32x4.trunc_sat_zero_f64x2_u $push[[R:[0-9]+]]=, $0{{$}}
@@ -554,6 +566,18 @@
ret <4 x i32> %a
}
+; CHECK-LABEL: trunc_sat_zero_u_v4i32_2:
+; CHECK-NEXT: .functype trunc_sat_zero_u_v4i32_2 (v128) -> (v128){{$}}
+; SLOW-NEXT: i32x4.trunc_sat_zero_f64x2_u $push[[R:[0-9]+]]=, $0{{$}}
+; SLOW-NEXT: return $pop[[R]]{{$}}
+declare <4 x i32> @llvm.fptoui.sat.v4i32.v4f64(<4 x double>)
+define <4 x i32> @trunc_sat_zero_u_v4i32_2(<2 x double> %x) {
+ %v = shufflevector <2 x double> %x, <2 x double> zeroinitializer,
+ <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %a = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f64(<4 x double> %v)
+ ret <4 x i32> %a
+}
+
; ==============================================================================
; 2 x i64
; ==============================================================================
@@ -722,16 +746,6 @@
ret <4 x float> %v
}
-; CHECK-LABEL: demote_zero_v4f32:
-; CHECK-NEXT: .functype demote_zero_v4f32 (v128) -> (v128){{$}}
-; CHECK-NEXT: f32x4.demote_zero_f64x2 $push[[R:[0-9]+]]=, $0{{$}}
-; CHECK-NEXT: return $pop[[R]]{{$}}
-declare <4 x float> @llvm.wasm.demote.zero(<2 x double>)
-define <4 x float> @demote_zero_v4f32(<2 x double> %a) {
- %v = call <4 x float> @llvm.wasm.demote.zero(<2 x double> %a)
- ret <4 x float> %v
-}
-
; ==============================================================================
; 2 x f64
; ==============================================================================
Index: llvm/test/CodeGen/WebAssembly/simd-conversions.ll
===================================================================
--- llvm/test/CodeGen/WebAssembly/simd-conversions.ll
+++ llvm/test/CodeGen/WebAssembly/simd-conversions.ll
@@ -82,6 +82,30 @@
ret <2 x i64> %a
}
+; CHECK-LABEL: demote_zero_v4f32:
+; NO-SIMD128-NOT: f32x4
+; SIMD128-NEXT: .functype demote_zero_v4f32 (v128) -> (v128){{$}}
+; SIMD128-NEXT: f32x4.demote_zero_f64x2 $push[[R:[0-9]+]]=, $0
+; SIMD128-NEXT: return $pop[[R]]
+define <4 x float> @demote_zero_v4f32(<2 x double> %x) {
+ %v = shufflevector <2 x double> %x, <2 x double> zeroinitializer,
+ <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %a = fptrunc <4 x double> %v to <4 x float>
+ ret <4 x float> %a
+}
+
+; CHECK-LABEL: demote_zero_v4f32_2:
+; NO-SIMD128-NOT: f32x4
+; SIMD128-NEXT: .functype demote_zero_v4f32_2 (v128) -> (v128){{$}}
+; SIMD128-NEXT: f32x4.demote_zero_f64x2 $push[[R:[0-9]+]]=, $0
+; SIMD128-NEXT: return $pop[[R]]
+define <4 x float> @demote_zero_v4f32_2(<2 x double> %x) {
+ %v = fptrunc <2 x double> %x to <2 x float>
+ %a = shufflevector <2 x float> %v, <2 x float> zeroinitializer,
+ <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ ret <4 x float> %a
+}
+
; CHECK-LABEL: convert_low_s_v2f64:
; NO-SIMD128-NOT: f64x2
; SIMD128-NEXT: .functype convert_low_s_v2f64 (v128) -> (v128){{$}}
Index: llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
===================================================================
--- llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
+++ llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@@ -1289,7 +1289,9 @@
"extadd_pairwise_i16x8_u", 0x7f>;
// f64x2 <-> f32x4 conversions
-defm "" : SIMDConvert<F32x4, F64x2, int_wasm_demote_zero,
+def demote_t : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>;
+def demote_zero : SDNode<"WebAssemblyISD::DEMOTE_ZERO", demote_t>;
+defm "" : SIMDConvert<F32x4, F64x2, demote_zero,
"demote_zero_f64x2", 0x5e>;
def promote_t : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>;
Index: llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
===================================================================
--- llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -156,7 +156,11 @@
setTargetDAGCombine(ISD::FP_EXTEND);
setTargetDAGCombine(ISD::EXTRACT_SUBVECTOR);
- // Combine concat of {s,u}int_to_fp_sat to i32x4.trunc_sat_f64x2_zero_{s,u}
+ // Combine fp_to_{s,u}int_sat or fp_round of concat_vectors or vice versa
+ // into conversion ops
+ setTargetDAGCombine(ISD::FP_TO_SINT_SAT);
+ setTargetDAGCombine(ISD::FP_TO_UINT_SAT);
+ setTargetDAGCombine(ISD::FP_ROUND);
setTargetDAGCombine(ISD::CONCAT_VECTORS);
// Support saturating add for i8x16 and i16x8
@@ -2294,45 +2298,121 @@
}
static SDValue
-performVectorTruncSatLowCombine(SDNode *N,
- TargetLowering::DAGCombinerInfo &DCI) {
+performVectorTruncZeroCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
auto &DAG = DCI.DAG;
- assert(N->getOpcode() == ISD::CONCAT_VECTORS);
+
+ auto GetWasmConversionOp = [](unsigned Op) {
+ switch (Op) {
+ case ISD::FP_TO_SINT_SAT:
+ return WebAssemblyISD::TRUNC_SAT_ZERO_S;
+ case ISD::FP_TO_UINT_SAT:
+ return WebAssemblyISD::TRUNC_SAT_ZERO_U;
+ case ISD::FP_ROUND:
+ return WebAssemblyISD::DEMOTE_ZERO;
+ }
+ llvm_unreachable("unexpected op");
+ };
+
+ auto IsZeroSplat = [](SDValue SplatVal) {
+ auto *Splat = dyn_cast<BuildVectorSDNode>(SplatVal.getNode());
+ APInt SplatValue, SplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+ return Splat &&
+ Splat->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
+ HasAnyUndefs) &&
+ SplatValue == 0;
+ };
+
+ if (N->getOpcode() == ISD::CONCAT_VECTORS) {
+ // Combine this:
+ //
+ // (concat_vectors (v2i32 (fp_to_{s,u}int_sat $x, 32)), (v2i32 (splat 0)))
+ //
+ // into (i32x4.trunc_sat_f64x2_zero_{s,u} $x).
+ //
+ // Or this:
+ //
+ // (concat_vectors (v2f32 (fp_round (v2f64 $x))), (v2f32 (splat 0)))
+ //
+ // into (f32x4.demote_zero_f64x2 $x).
+ EVT ResVT;
+ EVT ExpectedConversionType;
+ auto Conversion = N->getOperand(0);
+ auto ConversionOp = Conversion.getOpcode();
+ switch (ConversionOp) {
+ case ISD::FP_TO_SINT_SAT:
+ case ISD::FP_TO_UINT_SAT:
+ ResVT = MVT::v4i32;
+ ExpectedConversionType = MVT::v2i32;
+ break;
+ case ISD::FP_ROUND:
+ ResVT = MVT::v4f32;
+ ExpectedConversionType = MVT::v2f32;
+ break;
+ default:
+ return SDValue();
+ }
+
+ if (N->getValueType(0) != ResVT)
+ return SDValue();
+
+ if (Conversion.getValueType() != ExpectedConversionType)
+ return SDValue();
+
+ auto Source = Conversion.getOperand(0);
+ if (Source.getValueType() != MVT::v2f64)
+ return SDValue();
+
+ if (!IsZeroSplat(N->getOperand(1)) ||
+ N->getOperand(1).getValueType() != ExpectedConversionType)
+ return SDValue();
+
+ unsigned Op = GetWasmConversionOp(ConversionOp);
+ return DAG.getNode(Op, SDLoc(N), ResVT, Source);
+ }
// Combine this:
//
- // (concat_vectors (v2i32 (fp_to_{s,u}int_sat $x, 32)), (v2i32 (splat 0)))
+ // (fp_to_{s,u}int_sat (concat_vectors $x, (v2f64 (splat 0))), 32)
//
// into (i32x4.trunc_sat_f64x2_zero_{s,u} $x).
- EVT ResVT = N->getValueType(0);
- if (ResVT != MVT::v4i32)
- return SDValue();
+ //
+ // Or this:
+ //
+ // (v4f32 (fp_round (concat_vectors $x, (v2f64 (splat 0)))))
+ //
+ // into (f32x4.demote_zero_f64x2 $x).
+ EVT ResVT;
+ auto ConversionOp = N->getOpcode();
+ switch (ConversionOp) {
+ case ISD::FP_TO_SINT_SAT:
+ case ISD::FP_TO_UINT_SAT:
+ ResVT = MVT::v4i32;
+ break;
+ case ISD::FP_ROUND:
+ ResVT = MVT::v4f32;
+ break;
+ default:
+ llvm_unreachable("unexpected op");
+ }
- auto FPToInt = N->getOperand(0);
- auto FPToIntOp = FPToInt.getOpcode();
- if (FPToIntOp != ISD::FP_TO_SINT_SAT && FPToIntOp != ISD::FP_TO_UINT_SAT)
+ if (N->getValueType(0) != ResVT)
return SDValue();
- if (cast<VTSDNode>(FPToInt.getOperand(1))->getVT() != MVT::i32)
+
+ auto Concat = N->getOperand(0);
+ if (Concat.getValueType() != MVT::v4f64)
return SDValue();
- auto Source = FPToInt.getOperand(0);
+ auto Source = Concat.getOperand(0);
if (Source.getValueType() != MVT::v2f64)
return SDValue();
- auto *Splat = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
- APInt SplatValue, SplatUndef;
- unsigned SplatBitSize;
- bool HasAnyUndefs;
- if (!Splat || !Splat->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
- HasAnyUndefs))
+ if (!IsZeroSplat(Concat.getOperand(1)) ||
+ Concat.getOperand(1).getValueType() != MVT::v2f64)
return SDValue();
- if (SplatValue != 0)
- return SDValue();
-
- unsigned Op = FPToIntOp == ISD::FP_TO_SINT_SAT
- ? WebAssemblyISD::TRUNC_SAT_ZERO_S
- : WebAssemblyISD::TRUNC_SAT_ZERO_U;
+ unsigned Op = GetWasmConversionOp(ConversionOp);
return DAG.getNode(Op, SDLoc(N), ResVT, Source);
}
@@ -2352,7 +2432,10 @@
case ISD::FP_EXTEND:
case ISD::EXTRACT_SUBVECTOR:
return performVectorConvertLowCombine(N, DCI);
+ case ISD::FP_TO_SINT_SAT:
+ case ISD::FP_TO_UINT_SAT:
+ case ISD::FP_ROUND:
case ISD::CONCAT_VECTORS:
- return performVectorTruncSatLowCombine(N, DCI);
+ return performVectorTruncZeroCombine(N, DCI);
}
}
Index: llvm/lib/Target/WebAssembly/WebAssemblyISD.def
===================================================================
--- llvm/lib/Target/WebAssembly/WebAssemblyISD.def
+++ llvm/lib/Target/WebAssembly/WebAssemblyISD.def
@@ -40,6 +40,7 @@
HANDLE_NODETYPE(PROMOTE_LOW)
HANDLE_NODETYPE(TRUNC_SAT_ZERO_S)
HANDLE_NODETYPE(TRUNC_SAT_ZERO_U)
+HANDLE_NODETYPE(DEMOTE_ZERO)
HANDLE_NODETYPE(THROW)
HANDLE_NODETYPE(CATCH)
HANDLE_NODETYPE(MEMORY_COPY)
Index: llvm/include/llvm/IR/IntrinsicsWebAssembly.td
===================================================================
--- llvm/include/llvm/IR/IntrinsicsWebAssembly.td
+++ llvm/include/llvm/IR/IntrinsicsWebAssembly.td
@@ -263,11 +263,6 @@
[LLVMSubdivide2VectorType<0>],
[IntrNoMem, IntrSpeculatable]>;
-// TODO: Remove this if possible.
-def int_wasm_demote_zero :
- Intrinsic<[llvm_v4f32_ty], [llvm_v2f64_ty],
- [IntrNoMem, IntrSpeculatable]>;
-
//===----------------------------------------------------------------------===//
// Thread-local storage intrinsics
//===----------------------------------------------------------------------===//
Index: clang/test/Headers/wasm.c
===================================================================
--- clang/test/Headers/wasm.c
+++ clang/test/Headers/wasm.c
@@ -2465,9 +2465,10 @@
// CHECK-LABEL: @test_f32x4_demote_f64x2_zero(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.wasm.demote.zero(<2 x double> [[TMP0]]) #[[ATTR10]]
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to <4 x i32>
-// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK-NEXT: [[CONV_I:%.*]] = fptrunc <4 x double> [[SHUFFLE_I]] to <4 x float>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[CONV_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP1]]
//
v128_t test_f32x4_demote_f64x2_zero(v128_t a) {
return wasm_f32x4_demote_f64x2_zero(a);
Index: clang/test/CodeGen/builtins-wasm.c
===================================================================
--- clang/test/CodeGen/builtins-wasm.c
+++ clang/test/CodeGen/builtins-wasm.c
@@ -892,12 +892,6 @@
// WEBASSEMBLY: ret <4 x i32> %1
}
-f32x4 wasm_demote_zero_f64x2_f32x4(f64x2 x) {
- return __builtin_wasm_demote_zero_f64x2_f32x4(x);
- // WEBASSEMBLY: call <4 x float> @llvm.wasm.demote.zero(<2 x double> %x)
- // WEBASSEMBLY: ret
-}
-
i32x4 load32_zero(const int *p) {
return __builtin_wasm_load32_zero(p);
// WEBASSEMBLY: call <4 x i32> @llvm.wasm.load32.zero(i32* %p)
Index: clang/lib/Headers/wasm_simd128.h
===================================================================
--- clang/lib/Headers/wasm_simd128.h
+++ clang/lib/Headers/wasm_simd128.h
@@ -1151,7 +1151,9 @@
static __inline__ v128_t __DEFAULT_FN_ATTRS
wasm_f32x4_demote_f64x2_zero(v128_t __a) {
- return (v128_t)__builtin_wasm_demote_zero_f64x2_f32x4((__f64x2)__a);
+ return (v128_t) __builtin_convertvector(
+ __builtin_shufflevector((__f64x2)__a, (__f64x2){0, 0}, 0, 1, 2, 3),
+ __f32x4);
}
static __inline__ v128_t __DEFAULT_FN_ATTRS
Index: clang/lib/CodeGen/CGBuiltin.cpp
===================================================================
--- clang/lib/CodeGen/CGBuiltin.cpp
+++ clang/lib/CodeGen/CGBuiltin.cpp
@@ -17723,11 +17723,6 @@
Builder.getInt32(2), Builder.getInt32(3)});
return Builder.CreateShuffleVector(Trunc, Splat, ConcatMask);
}
- case WebAssembly::BI__builtin_wasm_demote_zero_f64x2_f32x4: {
- Value *Vec = EmitScalarExpr(E->getArg(0));
- Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_demote_zero);
- return Builder.CreateCall(Callee, Vec);
- }
case WebAssembly::BI__builtin_wasm_load32_zero: {
Value *Ptr = EmitScalarExpr(E->getArg(0));
Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_load32_zero);
Index: clang/include/clang/Basic/BuiltinsWebAssembly.def
===================================================================
--- clang/include/clang/Basic/BuiltinsWebAssembly.def
+++ clang/include/clang/Basic/BuiltinsWebAssembly.def
@@ -191,7 +191,6 @@
TARGET_BUILTIN(__builtin_wasm_trunc_sat_zero_s_f64x2_i32x4, "V4iV2d", "nc", "simd128")
TARGET_BUILTIN(__builtin_wasm_trunc_sat_zero_u_f64x2_i32x4, "V4UiV2d", "nc", "simd128")
-TARGET_BUILTIN(__builtin_wasm_demote_zero_f64x2_f32x4, "V4fV2d", "nc", "simd128")
TARGET_BUILTIN(__builtin_wasm_load32_zero, "V4iiC*", "n", "simd128")
TARGET_BUILTIN(__builtin_wasm_load64_zero, "V2LLiLLiC*", "n", "simd128")
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits