llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-backend-x86 Author: Brendan Dahl (brendandahl) <details> <summary>Changes</summary> Getting this to work required a few additional changes: - Add builtins for any instructions that can't be done with plain C currently. - Add support for the saturating version of fp_to_<s,i>_I16x8. Other vector sizes supported this already. - Support bitcast of f16x8 to v128. Needed to return a __f16x8 as v128_t. --- Patch is 23.86 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/106465.diff 7 Files Affected: - (modified) clang/include/clang/Basic/BuiltinsWebAssembly.def (+9) - (modified) clang/lib/CodeGen/CGBuiltin.cpp (+12) - (modified) clang/lib/Headers/wasm_simd128.h (+147) - (modified) cross-project-tests/intrinsic-header-tests/wasm_simd128.c (+137-1) - (modified) llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp (+8-1) - (modified) llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td (+17-11) - (modified) llvm/test/CodeGen/WebAssembly/half-precision.ll (+18) ``````````diff diff --git a/clang/include/clang/Basic/BuiltinsWebAssembly.def b/clang/include/clang/Basic/BuiltinsWebAssembly.def index 034d32c6291b3d..2e80eef2c8b9bc 100644 --- a/clang/include/clang/Basic/BuiltinsWebAssembly.def +++ b/clang/include/clang/Basic/BuiltinsWebAssembly.def @@ -124,6 +124,7 @@ TARGET_BUILTIN(__builtin_wasm_bitmask_i16x8, "UiV8s", "nc", "simd128") TARGET_BUILTIN(__builtin_wasm_bitmask_i32x4, "UiV4i", "nc", "simd128") TARGET_BUILTIN(__builtin_wasm_bitmask_i64x2, "UiV2LLi", "nc", "simd128") +TARGET_BUILTIN(__builtin_wasm_abs_f16x8, "V8hV8h", "nc", "fp16") TARGET_BUILTIN(__builtin_wasm_abs_f32x4, "V4fV4f", "nc", "simd128") TARGET_BUILTIN(__builtin_wasm_abs_f64x2, "V2dV2d", "nc", "simd128") @@ -140,6 +141,10 @@ TARGET_BUILTIN(__builtin_wasm_max_f16x8, "V8hV8hV8h", "nc", "fp16") TARGET_BUILTIN(__builtin_wasm_pmin_f16x8, "V8hV8hV8h", "nc", "fp16") TARGET_BUILTIN(__builtin_wasm_pmax_f16x8, "V8hV8hV8h", "nc", "fp16") +TARGET_BUILTIN(__builtin_wasm_ceil_f16x8, "V8hV8h", "nc", "fp16") +TARGET_BUILTIN(__builtin_wasm_floor_f16x8, "V8hV8h", "nc", "fp16") +TARGET_BUILTIN(__builtin_wasm_trunc_f16x8, "V8hV8h", "nc", "fp16") +TARGET_BUILTIN(__builtin_wasm_nearest_f16x8, "V8hV8h", "nc", "fp16") TARGET_BUILTIN(__builtin_wasm_ceil_f32x4, "V4fV4f", "nc", "simd128") TARGET_BUILTIN(__builtin_wasm_floor_f32x4, "V4fV4f", "nc", "simd128") TARGET_BUILTIN(__builtin_wasm_trunc_f32x4, "V4fV4f", "nc", "simd128") @@ -151,9 +156,13 @@ TARGET_BUILTIN(__builtin_wasm_nearest_f64x2, "V2dV2d", "nc", "simd128") TARGET_BUILTIN(__builtin_wasm_dot_s_i32x4_i16x8, "V4iV8sV8s", "nc", "simd128") +TARGET_BUILTIN(__builtin_wasm_sqrt_f16x8, "V8hV8h", "nc", "fp16") TARGET_BUILTIN(__builtin_wasm_sqrt_f32x4, "V4fV4f", "nc", "simd128") TARGET_BUILTIN(__builtin_wasm_sqrt_f64x2, "V2dV2d", "nc", "simd128") +TARGET_BUILTIN(__builtin_wasm_trunc_saturate_s_i16x8_f16x8, "V8sV8h", "nc", "simd128") +TARGET_BUILTIN(__builtin_wasm_trunc_saturate_u_i16x8_f16x8, "V8sV8h", "nc", "simd128") + TARGET_BUILTIN(__builtin_wasm_trunc_saturate_s_i32x4_f32x4, "V4iV4f", "nc", "simd128") TARGET_BUILTIN(__builtin_wasm_trunc_saturate_u_i32x4_f32x4, "V4iV4f", "nc", "simd128") diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 2a733e4d834cfa..bb5367c29b1c3a 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -21208,6 +21208,7 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f64: case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f32: case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f64: + case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i16x8_f16x8: case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32x4_f32x4: { Value *Src = EmitScalarExpr(E->getArg(0)); llvm::Type *ResT = ConvertType(E->getType()); @@ -21219,6 +21220,7 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32_f64: case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f32: case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f64: + case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i16x8_f16x8: case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32x4_f32x4: { Value *Src = EmitScalarExpr(E->getArg(0)); llvm::Type *ResT = ConvertType(E->getType()); @@ -21266,6 +21268,10 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, CGM.getIntrinsic(Intrinsic::wasm_pmax, ConvertType(E->getType())); return Builder.CreateCall(Callee, {LHS, RHS}); } + case WebAssembly::BI__builtin_wasm_ceil_f16x8: + case WebAssembly::BI__builtin_wasm_floor_f16x8: + case WebAssembly::BI__builtin_wasm_trunc_f16x8: + case WebAssembly::BI__builtin_wasm_nearest_f16x8: case WebAssembly::BI__builtin_wasm_ceil_f32x4: case WebAssembly::BI__builtin_wasm_floor_f32x4: case WebAssembly::BI__builtin_wasm_trunc_f32x4: @@ -21276,18 +21282,22 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, case WebAssembly::BI__builtin_wasm_nearest_f64x2: { unsigned IntNo; switch (BuiltinID) { + case WebAssembly::BI__builtin_wasm_ceil_f16x8: case WebAssembly::BI__builtin_wasm_ceil_f32x4: case WebAssembly::BI__builtin_wasm_ceil_f64x2: IntNo = Intrinsic::ceil; break; + case WebAssembly::BI__builtin_wasm_floor_f16x8: case WebAssembly::BI__builtin_wasm_floor_f32x4: case WebAssembly::BI__builtin_wasm_floor_f64x2: IntNo = Intrinsic::floor; break; + case WebAssembly::BI__builtin_wasm_trunc_f16x8: case WebAssembly::BI__builtin_wasm_trunc_f32x4: case WebAssembly::BI__builtin_wasm_trunc_f64x2: IntNo = Intrinsic::trunc; break; + case WebAssembly::BI__builtin_wasm_nearest_f16x8: case WebAssembly::BI__builtin_wasm_nearest_f32x4: case WebAssembly::BI__builtin_wasm_nearest_f64x2: IntNo = Intrinsic::nearbyint; @@ -21486,12 +21496,14 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, CGM.getIntrinsic(Intrinsic::wasm_bitmask, Vec->getType()); return Builder.CreateCall(Callee, {Vec}); } + case WebAssembly::BI__builtin_wasm_abs_f16x8: case WebAssembly::BI__builtin_wasm_abs_f32x4: case WebAssembly::BI__builtin_wasm_abs_f64x2: { Value *Vec = EmitScalarExpr(E->getArg(0)); Function *Callee = CGM.getIntrinsic(Intrinsic::fabs, Vec->getType()); return Builder.CreateCall(Callee, {Vec}); } + case WebAssembly::BI__builtin_wasm_sqrt_f16x8: case WebAssembly::BI__builtin_wasm_sqrt_f32x4: case WebAssembly::BI__builtin_wasm_sqrt_f64x2: { Value *Vec = EmitScalarExpr(E->getArg(0)); diff --git a/clang/lib/Headers/wasm_simd128.h b/clang/lib/Headers/wasm_simd128.h index 2327bec52522d2..8d19609bf2168e 100644 --- a/clang/lib/Headers/wasm_simd128.h +++ b/clang/lib/Headers/wasm_simd128.h @@ -33,6 +33,7 @@ typedef unsigned long long __u64x2 __attribute__((__vector_size__(16), __aligned__(16))); typedef float __f32x4 __attribute__((__vector_size__(16), __aligned__(16))); typedef double __f64x2 __attribute__((__vector_size__(16), __aligned__(16))); +typedef __fp16 __f16x8 __attribute__((__vector_size__(16), __aligned__(16))); typedef signed char __i8x8 __attribute__((__vector_size__(8), __aligned__(8))); typedef unsigned char __u8x8 @@ -1878,6 +1879,152 @@ wasm_i32x4_relaxed_dot_i8x16_i7x16_add(v128_t __a, v128_t __b, v128_t __c) { (__i8x16)__a, (__i8x16)__b, (__i32x4)__c); } +// FP16 intrinsics +#define __FP16_FN_ATTRS \ + __attribute__((__always_inline__, __nodebug__, __target__("fp16"), \ + __min_vector_width__(128))) + +static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_splat(float __a) { + return (v128_t)__builtin_wasm_splat_f16x8(__a); +} + +static __inline__ float __FP16_FN_ATTRS wasm_f16x8_extract_lane(v128_t __a, + int __i) + __REQUIRE_CONSTANT(__i) { + return __builtin_wasm_extract_lane_f16x8(__a, __i); +} + +static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_replace_lane(v128_t __a, + int __i, + float __b) + __REQUIRE_CONSTANT(__i) { + return (v128_t)__builtin_wasm_replace_lane_f16x8(__a, __i, __b); +} + +static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_abs(v128_t __a) { + return (v128_t)__builtin_wasm_abs_f16x8((__f16x8)__a); +} + +static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_neg(v128_t __a) { + return (v128_t)(-(__f16x8)__a); +} + +static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_sqrt(v128_t __a) { + return (v128_t)__builtin_wasm_sqrt_f16x8((__f16x8)__a); +} + +static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_ceil(v128_t __a) { + return (v128_t)__builtin_wasm_ceil_f16x8((__f16x8)__a); +} + +static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_floor(v128_t __a) { + return (v128_t)__builtin_wasm_floor_f16x8((__f16x8)__a); +} + +static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_trunc(v128_t __a) { + return (v128_t)__builtin_wasm_trunc_f16x8((__f16x8)__a); +} + +static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_nearest(v128_t __a) { + return (v128_t)__builtin_wasm_nearest_f16x8((__f16x8)__a); +} + +static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_eq(v128_t __a, v128_t __b) { + return (v128_t)((__f16x8)__a == (__f16x8)__b); +} + +static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_ne(v128_t __a, v128_t __b) { + return (v128_t)((__f16x8)__a != (__f16x8)__b); +} + +static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_lt(v128_t __a, v128_t __b) { + return (v128_t)((__f16x8)__a < (__f16x8)__b); +} + +static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_gt(v128_t __a, v128_t __b) { + return (v128_t)((__f16x8)__a > (__f16x8)__b); +} + +static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_le(v128_t __a, v128_t __b) { + return (v128_t)((__f16x8)__a <= (__f16x8)__b); +} + +static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_ge(v128_t __a, v128_t __b) { + return (v128_t)((__f16x8)__a >= (__f16x8)__b); +} + +static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_add(v128_t __a, + v128_t __b) { + return (v128_t)((__f16x8)__a + (__f16x8)__b); +} + +static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_sub(v128_t __a, + v128_t __b) { + return (v128_t)((__f16x8)__a - (__f16x8)__b); +} + +static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_mul(v128_t __a, + v128_t __b) { + return (v128_t)((__f16x8)__a * (__f16x8)__b); +} + +static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_div(v128_t __a, + v128_t __b) { + return (v128_t)((__f16x8)__a / (__f16x8)__b); +} + +static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_min(v128_t __a, + v128_t __b) { + return (v128_t)__builtin_wasm_min_f16x8((__f16x8)__a, (__f16x8)__b); +} + +static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_max(v128_t __a, + v128_t __b) { + return (v128_t)__builtin_wasm_max_f16x8((__f16x8)__a, (__f16x8)__b); +} + +static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_pmin(v128_t __a, + v128_t __b) { + return (v128_t)__builtin_wasm_pmin_f16x8((__f16x8)__a, (__f16x8)__b); +} + +static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_pmax(v128_t __a, + v128_t __b) { + return (v128_t)__builtin_wasm_pmax_f16x8((__f16x8)__a, (__f16x8)__b); +} + +static __inline__ v128_t __FP16_FN_ATTRS +wasm_i16x8_trunc_sat_f16x8(v128_t __a) { + return (v128_t)__builtin_wasm_trunc_saturate_s_i16x8_f16x8((__f16x8)__a); +} + +static __inline__ v128_t __FP16_FN_ATTRS +wasm_u16x8_trunc_sat_f16x8(v128_t __a) { + return (v128_t)__builtin_wasm_trunc_saturate_u_i16x8_f16x8((__f16x8)__a); +} + +static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_convert_i16x8(v128_t __a) { + return (v128_t) __builtin_convertvector((__i16x8)__a, __f16x8); +} + +static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_convert_u16x8(v128_t __a) { + return (v128_t) __builtin_convertvector((__u16x8)__a, __f16x8); +} + +static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_relaxed_madd(v128_t __a, + v128_t __b, + v128_t __c) { + return (v128_t)__builtin_wasm_relaxed_madd_f16x8((__f16x8)__a, (__f16x8)__b, + (__f16x8)__c); +} + +static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_relaxed_nmadd(v128_t __a, + v128_t __b, + v128_t __c) { + return (v128_t)__builtin_wasm_relaxed_nmadd_f16x8((__f16x8)__a, (__f16x8)__b, + (__f16x8)__c); +} + // Deprecated intrinsics static __inline__ v128_t __DEPRECATED_FN_ATTRS("wasm_i8x16_swizzle") diff --git a/cross-project-tests/intrinsic-header-tests/wasm_simd128.c b/cross-project-tests/intrinsic-header-tests/wasm_simd128.c index fb15e0143d3653..b601d90cfcc927 100644 --- a/cross-project-tests/intrinsic-header-tests/wasm_simd128.c +++ b/cross-project-tests/intrinsic-header-tests/wasm_simd128.c @@ -2,7 +2,7 @@ // expected-no-diagnostics // RUN: %clang %s -O2 -S -o - -target wasm32-unknown-unknown \ -// RUN: -msimd128 -mrelaxed-simd -Wcast-qual -Werror | FileCheck %s +// RUN: -msimd128 -mrelaxed-simd -mfp16 -Wcast-qual -Werror | FileCheck %s #include <wasm_simd128.h> @@ -1385,3 +1385,139 @@ v128_t test_i16x8_relaxed_dot_i8x16_i7x16(v128_t a, v128_t b) { v128_t test_i32x4_relaxed_dot_i8x16_i7x16_add(v128_t a, v128_t b, v128_t c) { return wasm_i32x4_relaxed_dot_i8x16_i7x16_add(a, b, c); } + +// CHECK-LABEL: test_f16x8_splat: +// CHECK: f16x8.splat{{$}} +v128_t test_f16x8_splat(float a) { return wasm_f16x8_splat(a); } + +// CHECK-LABEL: test_f16x8_extract_lane: +// CHECK: f16x8.extract_lane 7{{$}} +int16_t test_f16x8_extract_lane(v128_t a) { + return wasm_f16x8_extract_lane(a, 7); +} + +// CHECK-LABEL: test_f16x8_replace_lane: +// CHECK: f16x8.replace_lane 7{{$}} +v128_t test_f16x8_replace_lane(v128_t a, float b) { + return wasm_f16x8_replace_lane(a, 7, b); +} + +// CHECK-LABEL: test_f16x8_abs: +// CHECK: f16x8.abs{{$}} +v128_t test_f16x8_abs(v128_t a) { return wasm_f16x8_abs(a); } + +// CHECK-LABEL: test_f16x8_neg: +// CHECK: f16x8.neg{{$}} +v128_t test_f16x8_neg(v128_t a) { return wasm_f16x8_neg(a); } + +// CHECK-LABEL: test_f16x8_sqrt: +// CHECK: f16x8.sqrt{{$}} +v128_t test_f16x8_sqrt(v128_t a) { return wasm_f16x8_sqrt(a); } + +// CHECK-LABEL: test_f16x8_ceil: +// CHECK: f16x8.ceil{{$}} +v128_t test_f16x8_ceil(v128_t a) { return wasm_f16x8_ceil(a); } + +// CHECK-LABEL: test_f16x8_floor: +// CHECK: f16x8.floor{{$}} +v128_t test_f16x8_floor(v128_t a) { return wasm_f16x8_floor(a); } + +// CHECK-LABEL: test_f16x8_trunc: +// CHECK: f16x8.trunc{{$}} +v128_t test_f16x8_trunc(v128_t a) { return wasm_f16x8_trunc(a); } + +// CHECK-LABEL: test_f16x8_nearest: +// CHECK: f16x8.nearest{{$}} +v128_t test_f16x8_nearest(v128_t a) { return wasm_f16x8_nearest(a); } + +// CHECK-LABEL: test_f16x8_add: +// CHECK: f16x8.add{{$}} +v128_t test_f16x8_add(v128_t a, v128_t b) { return wasm_f16x8_add(a, b); } + +// CHECK-LABEL: test_f16x8_sub: +// CHECK: f16x8.sub{{$}} +v128_t test_f16x8_sub(v128_t a, v128_t b) { return wasm_f16x8_sub(a, b); } + +// CHECK-LABEL: test_f16x8_mul: +// CHECK: f16x8.mul{{$}} +v128_t test_f16x8_mul(v128_t a, v128_t b) { return wasm_f16x8_mul(a, b); } + +// CHECK-LABEL: test_f16x8_div: +// CHECK: f16x8.div{{$}} +v128_t test_f16x8_div(v128_t a, v128_t b) { return wasm_f16x8_div(a, b); } + +// CHECK-LABEL: test_f16x8_min: +// CHECK: f16x8.min{{$}} +v128_t test_f16x8_min(v128_t a, v128_t b) { return wasm_f16x8_min(a, b); } + +// CHECK-LABEL: test_f16x8_max: +// CHECK: f16x8.max{{$}} +v128_t test_f16x8_max(v128_t a, v128_t b) { return wasm_f16x8_max(a, b); } + +// CHECK-LABEL: test_f16x8_pmin: +// CHECK: f16x8.pmin{{$}} +v128_t test_f16x8_pmin(v128_t a, v128_t b) { return wasm_f16x8_pmin(a, b); } + +// CHECK-LABEL: test_f16x8_pmax: +// CHECK: f16x8.pmax{{$}} +v128_t test_f16x8_pmax(v128_t a, v128_t b) { return wasm_f16x8_pmax(a, b); } + +// CHECK-LABEL: test_f16x8_eq: +// CHECK: f16x8.eq{{$}} +v128_t test_f16x8_eq(v128_t a, v128_t b) { return wasm_f16x8_eq(a, b); } + +// CHECK-LABEL: test_f16x8_ne: +// CHECK: f16x8.ne{{$}} +v128_t test_f16x8_ne(v128_t a, v128_t b) { return wasm_f16x8_ne(a, b); } + +// CHECK-LABEL: test_f16x8_lt: +// CHECK: f16x8.lt{{$}} +v128_t test_f16x8_lt(v128_t a, v128_t b) { return wasm_f16x8_lt(a, b); } + +// CHECK-LABEL: test_f16x8_gt: +// CHECK: f16x8.gt{{$}} +v128_t test_f16x8_gt(v128_t a, v128_t b) { return wasm_f16x8_gt(a, b); } + +// CHECK-LABEL: test_f16x8_le: +// CHECK: f16x8.le{{$}} +v128_t test_f16x8_le(v128_t a, v128_t b) { return wasm_f16x8_le(a, b); } + +// CHECK-LABEL: test_f16x8_ge: +// CHECK: f16x8.ge{{$}} +v128_t test_f16x8_ge(v128_t a, v128_t b) { return wasm_f16x8_ge(a, b); } + +// CHECK-LABEL: test_i16x8_trunc_sat_f16x8: +// CHECK: i16x8.trunc_sat_f16x8_s{{$}} +v128_t test_i16x8_trunc_sat_f16x8(v128_t a) { + return wasm_i16x8_trunc_sat_f16x8(a); +} + +// CHECK-LABEL: test_u16x8_trunc_sat_f16x8: +// CHECK: i16x8.trunc_sat_f16x8_u{{$}} +v128_t test_u16x8_trunc_sat_f16x8(v128_t a) { + return wasm_u16x8_trunc_sat_f16x8(a); +} + +// CHECK-LABEL: test_f16x8_convert_i16x8: +// CHECK: f16x8.convert_i16x8_s{{$}} +v128_t test_f16x8_convert_i16x8(v128_t a) { + return wasm_f16x8_convert_i16x8(a); +} + +// CHECK-LABEL: test_f16x8_convert_u16x8: +// CHECK: f16x8.convert_i16x8_u{{$}} +v128_t test_f16x8_convert_u16x8(v128_t a) { + return wasm_f16x8_convert_u16x8(a); +} + +// CHECK-LABEL: test_f16x8_relaxed_madd: +// CHECK: f16x8.relaxed_madd{{$}} +v128_t test_f16x8_relaxed_madd(v128_t a, v128_t b, v128_t c) { + return wasm_f16x8_relaxed_madd(a, b, c); +} + +// CHECK-LABEL: test_f16x8_relaxed_nmadd: +// CHECK: f16x8.relaxed_nmadd{{$}} +v128_t test_f16x8_relaxed_nmadd(v128_t a, v128_t b, v128_t c) { + return wasm_f16x8_relaxed_nmadd(a, b, c); +} diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index 4578ff7f715146..5cc084f3ab1387 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -275,8 +275,12 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering( setOperationAction(Op, T, Expand); // But saturating fp_to_int converstions are - for (auto Op : {ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}) + for (auto Op : {ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}) { setOperationAction(Op, MVT::v4i32, Custom); + if (Subtarget->hasFP16()) { + setOperationAction(Op, MVT::v8i16, Custom); + } + } // Support vector extending for (auto T : MVT::integer_fixedlen_vector_valuetypes()) { @@ -2475,6 +2479,9 @@ SDValue WebAssemblyTargetLowering::LowerFP_TO_INT_SAT(SDValue Op, if (ResT == MVT::v4i32 && SatVT == MVT::i32) return Op; + if (ResT == MVT::v8i16 && SatVT == MVT::i16) + return Op; + return SDValue(); } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td index 887278e9c12ef3..da4b8d228f627d 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td @@ -165,8 +165,9 @@ def F16x8 : Vec { let prefix = "f16x8"; } -// TODO: Include F16x8 here when half precision is better supported. -defvar AllVecs = [I8x16, I16x8, I32x4, I64x2, F32x4, F64x2]; +// TODO: Remove StdVecs when the F16x8 works every where StdVecs is used. +defvar StdVecs = [I8x16, I16x8, I32x4, I64x2, F32x4, F64x2]; +defvar AllVecs = !listconcat(StdVecs, [F16x8]); defvar IntVecs = [I8x16, I16x8, I32x4, I64x2]; //===----------------------------------------------------------------------===// @@ -188,7 +189,7 @@ defm LOAD_V128_A64 : } // Def load patterns from WebAssemblyInstrMemory.td for vector types -foreach vec = AllVecs in { +foreach vec = StdVecs in { defm : LoadPat<vec.vt, load, "LOAD_V128">; } @@ -217,7 +218,7 @@ defm "" : SIMDLoadSplat<16, 8>; defm "" : SIMDLoadSplat<32, 9>; defm "" : SIMDLoadSplat<64, 10>; -foreach vec = AllVecs in { +foreach vec = StdVecs in { defvar inst = "LOAD"#vec.lane_bits#"_SPLAT"; defm : LoadPat<vec.vt, PatFrag<(ops node:$addr), (splat_vector (vec.lane_vt (vec.lane_load node:$addr)))>, @@ -389,7 +390,7 @@ defm STORE_V128_A64 : } // Def store patterns from WebAssemblyInstrMemory.td for vector types -foreach vec = AllVecs in { +foreach vec = StdVecs in { defm : StorePat<vec.vt, store, "STORE_V128">; } @@ -513,7 +514,7 @@ defm "" : ConstVec<F64x2, "$i0, $i1">; // Match splat(x) -> const.v128(x, ..., x) -foreach vec = AllVecs in { +for... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/106465 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits