tlively created this revision. tlively added reviewers: aheejin, ngzhian. Herald added subscribers: wingo, ecnelises, sunfish, hiraditya, jgravelle-google, sbc100, dschuff. tlively requested review of this revision. Herald added projects: clang, LLVM. Herald added subscribers: llvm-commits, cfe-commits.
Add experimental clang builtins, LLVM intrinsics, and backend definitions for the new {f32x4,f64x2}.{fma,fms} instructions in the relaxed SIMD proposal: https://github.com/WebAssembly/relaxed-simd/blob/main/proposals/relaxed-simd/Overview.md. Do not allow these instructions to be selected without explicit user opt-in. Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D110295 Files: clang/include/clang/Basic/BuiltinsWebAssembly.def clang/lib/CodeGen/CGBuiltin.cpp clang/test/CodeGen/builtins-wasm.c llvm/include/llvm/IR/IntrinsicsWebAssembly.td llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll llvm/test/MC/WebAssembly/simd-encodings.s
Index: llvm/test/MC/WebAssembly/simd-encodings.s =================================================================== --- llvm/test/MC/WebAssembly/simd-encodings.s +++ llvm/test/MC/WebAssembly/simd-encodings.s @@ -1,4 +1,4 @@ -# RUN: llvm-mc -no-type-check -show-encoding -triple=wasm32-unknown-unknown -mattr=+simd128 < %s | FileCheck %s +# RUN: llvm-mc -no-type-check -show-encoding -triple=wasm32-unknown-unknown -mattr=+simd128,+relaxed-simd < %s | FileCheck %s main: .functype main () -> () @@ -779,4 +779,16 @@ # CHECK: f64x2.convert_low_i32x4_u # encoding: [0xfd,0xff,0x01] f64x2.convert_low_i32x4_u + # CHECK: f32x4.fma # encoding: [0xfd,0xaf,0x01] + f32x4.fma + + # CHECK: f32x4.fms # encoding: [0xfd,0xb0,0x01] + f32x4.fms + + # CHECK: f64x2.fma # encoding: [0xfd,0xcf,0x01] + f64x2.fma + + # CHECK: f64x2.fms # encoding: [0xfd,0xd0,0x01] + f64x2.fms + end_function Index: llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll =================================================================== --- llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll +++ llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128 | FileCheck %s --check-prefixes=CHECK,SLOW -; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128 -fast-isel | FileCheck %s +; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128,+relaxed-simd | FileCheck %s --check-prefixes=CHECK,SLOW +; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128,+relaxed-simd -fast-isel | FileCheck %s ; Test that SIMD128 intrinsics lower as expected. These intrinsics are ; only expected to lower successfully if the simd128 attribute is @@ -600,6 +600,30 @@ ret <4 x float> %v } +; CHECK-LABEL: fma_v4f32: +; CHECK-NEXT: .functype fma_v4f32 (v128, v128, v128) -> (v128){{$}} +; CHECK-NEXT: f32x4.fma $push[[R:[0-9]+]]=, $0, $1, $2{{$}} +; CHECK-NEXT: return $pop[[R]]{{$}} +declare <4 x float> @llvm.wasm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) +define <4 x float> @fma_v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) { + %v = call <4 x float> @llvm.wasm.fma.v4f32( + <4 x float> %a, <4 x float> %b, <4 x float> %c + ) + ret <4 x float> %v +} + +; CHECK-LABEL: fms_v4f32: +; CHECK-NEXT: .functype fms_v4f32 (v128, v128, v128) -> (v128){{$}} +; CHECK-NEXT: f32x4.fms $push[[R:[0-9]+]]=, $0, $1, $2{{$}} +; CHECK-NEXT: return $pop[[R]]{{$}} +declare <4 x float> @llvm.wasm.fms.v4f32(<4 x float>, <4 x float>, <4 x float>) +define <4 x float> @fms_v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) { + %v = call <4 x float> @llvm.wasm.fms.v4f32( + <4 x float> %a, <4 x float> %b, <4 x float> %c + ) + ret <4 x float> %v +} + ; ============================================================================== ; 2 x f64 ; ============================================================================== @@ -674,3 +698,27 @@ %v = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %a) ret <2 x double> %v } + +; CHECK-LABEL: fma_v2f64: +; CHECK-NEXT: .functype fma_v2f64 (v128, v128, v128) -> (v128){{$}} +; CHECK-NEXT: f64x2.fma $push[[R:[0-9]+]]=, $0, $1, $2{{$}} +; CHECK-NEXT: return $pop[[R]]{{$}} +declare <2 x double> @llvm.wasm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) +define <2 x double> @fma_v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c) { + %v = call <2 x double> @llvm.wasm.fma.v2f64( + <2 x double> %a, <2 x double> %b, <2 x double> %c + ) + ret <2 x double> %v +} + +; CHECK-LABEL: fms_v2f64: +; CHECK-NEXT: .functype fms_v2f64 (v128, v128, v128) -> (v128){{$}} +; CHECK-NEXT: f64x2.fms $push[[R:[0-9]+]]=, $0, $1, $2{{$}} +; CHECK-NEXT: return $pop[[R]]{{$}} +declare <2 x double> @llvm.wasm.fms.v2f64(<2 x double>, <2 x double>, <2 x double>) +define <2 x double> @fms_v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c) { + %v = call <2 x double> @llvm.wasm.fms.v2f64( + <2 x double> %a, <2 x double> %b, <2 x double> %c + ) + ret <2 x double> %v +} Index: llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td =================================================================== --- llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td +++ llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td @@ -12,16 +12,32 @@ //===----------------------------------------------------------------------===// // Instructions requiring HasSIMD128 and the simd128 prefix byte -multiclass SIMD_I<dag oops_r, dag iops_r, dag oops_s, dag iops_s, - list<dag> pattern_r, string asmstr_r = "", - string asmstr_s = "", bits<32> simdop = -1> { +multiclass ABSTRACT_SIMD_I<dag oops_r, dag iops_r, dag oops_s, dag iops_s, + list<dag> pattern_r, string asmstr_r, + string asmstr_s, bits<32> simdop, + Predicate simd_level> { defm "" : I<oops_r, iops_r, oops_s, iops_s, pattern_r, asmstr_r, asmstr_s, !if(!ge(simdop, 0x100), !or(0xfd0000, !and(0xffff, simdop)), !or(0xfd00, !and(0xff, simdop)))>, - Requires<[HasSIMD128]>; + Requires<[simd_level]>; } +multiclass SIMD_I<dag oops_r, dag iops_r, dag oops_s, dag iops_s, + list<dag> pattern_r, string asmstr_r = "", + string asmstr_s = "", bits<32> simdop = -1> { + defm "" : ABSTRACT_SIMD_I<oops_r, iops_r, oops_s, iops_s, pattern_r, asmstr_r, + asmstr_s, simdop, HasSIMD128>; +} + +multiclass RELAXED_I<dag oops_r, dag iops_r, dag oops_s, dag iops_s, + list<dag> pattern_r, string asmstr_r = "", + string asmstr_s = "", bits<32> simdop = -1> { + defm "" : ABSTRACT_SIMD_I<oops_r, iops_r, oops_s, iops_s, pattern_r, asmstr_r, + asmstr_s, simdop, HasRelaxedSIMD>; +} + + defm "" : ARGUMENT<V128, v16i8>; defm "" : ARGUMENT<V128, v8i16>; defm "" : ARGUMENT<V128, v4i32>; @@ -1307,3 +1323,23 @@ defm Q15MULR_SAT_S : SIMDBinary<I16x8, int_wasm_q15mulr_sat_signed, "q15mulr_sat_s", 0x82>; + +//===----------------------------------------------------------------------===// +// Fused Multiply- Add and Subtract (FMA/FMS) +//===----------------------------------------------------------------------===// + +multiclass SIMDFM<Vec vec, bits<32> simdopA, bits<32> simdopS> { + defm FMA_#vec : + RELAXED_I<(outs V128:$dst), (ins V128:$a, V128:$b, V128:$c), (outs), (ins), + [(set (vec.vt V128:$dst), (int_wasm_fma + (vec.vt V128:$a), (vec.vt V128:$b), (vec.vt V128:$c)))], + vec.prefix#".fma\t$dst, $a, $b, $c", vec.prefix#".fma", simdopA>; + defm FMS_#vec : + RELAXED_I<(outs V128:$dst), (ins V128:$a, V128:$b, V128:$c), (outs), (ins), + [(set (vec.vt V128:$dst), (int_wasm_fms + (vec.vt V128:$a), (vec.vt V128:$b), (vec.vt V128:$c)))], + vec.prefix#".fms\t$dst, $a, $b, $c", vec.prefix#".fms", simdopS>; +} + +defm "" : SIMDFM<F32x4, 0xaf, 0xb0>; +defm "" : SIMDFM<F64x2, 0xcf, 0xd0>; Index: llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td =================================================================== --- llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td +++ llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td @@ -26,6 +26,10 @@ Predicate<"Subtarget->hasSIMD128()">, AssemblerPredicate<(all_of FeatureSIMD128), "simd128">; +def HasRelaxedSIMD : + Predicate<"Subtarget->hasRelaxedSIMD()">, + AssemblerPredicate<(all_of FeatureRelaxedSIMD), "relaxed-simd">; + def HasAtomics : Predicate<"Subtarget->hasAtomics()">, AssemblerPredicate<(all_of FeatureAtomics), "atomics">; Index: llvm/include/llvm/IR/IntrinsicsWebAssembly.td =================================================================== --- llvm/include/llvm/IR/IntrinsicsWebAssembly.td +++ llvm/include/llvm/IR/IntrinsicsWebAssembly.td @@ -182,6 +182,19 @@ [LLVMSubdivide2VectorType<0>], [IntrNoMem, IntrSpeculatable]>; +//===----------------------------------------------------------------------===// +// Relaxed SIMD intrinsics (experimental) +//===----------------------------------------------------------------------===// + +def int_wasm_fma : + Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem, IntrSpeculatable]>; +def int_wasm_fms : + Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem, IntrSpeculatable]>; + //===----------------------------------------------------------------------===// // Thread-local storage intrinsics //===----------------------------------------------------------------------===// Index: clang/test/CodeGen/builtins-wasm.c =================================================================== --- clang/test/CodeGen/builtins-wasm.c +++ clang/test/CodeGen/builtins-wasm.c @@ -1,5 +1,5 @@ -// RUN: %clang_cc1 -triple wasm32-unknown-unknown -target-feature +simd128 -target-feature +nontrapping-fptoint -target-feature +exception-handling -target-feature +bulk-memory -target-feature +atomics -flax-vector-conversions=none -O3 -emit-llvm -o - %s | FileCheck %s -check-prefixes WEBASSEMBLY,WEBASSEMBLY32 -// RUN: %clang_cc1 -triple wasm64-unknown-unknown -target-feature +simd128 -target-feature +nontrapping-fptoint -target-feature +exception-handling -target-feature +bulk-memory -target-feature +atomics -flax-vector-conversions=none -O3 -emit-llvm -o - %s | FileCheck %s -check-prefixes WEBASSEMBLY,WEBASSEMBLY64 +// RUN: %clang_cc1 -triple wasm32-unknown-unknown -target-feature +simd128 -target-feature +relaxed-simd -target-feature +nontrapping-fptoint -target-feature +exception-handling -target-feature +bulk-memory -target-feature +atomics -flax-vector-conversions=none -O3 -emit-llvm -o - %s | FileCheck %s -check-prefixes WEBASSEMBLY,WEBASSEMBLY32 +// RUN: %clang_cc1 -triple wasm64-unknown-unknown -target-feature +simd128 -target-feature +relaxed-simd -target-feature +nontrapping-fptoint -target-feature +exception-handling -target-feature +bulk-memory -target-feature +atomics -flax-vector-conversions=none -O3 -emit-llvm -o - %s | FileCheck %s -check-prefixes WEBASSEMBLY,WEBASSEMBLY64 // RUN: not %clang_cc1 -triple wasm64-unknown-unknown -target-feature +nontrapping-fptoint -target-feature +exception-handling -target-feature +bulk-memory -target-feature +atomics -flax-vector-conversions=none -O3 -emit-llvm -o - %s 2>&1 | FileCheck %s -check-prefixes MISSING-SIMD // SIMD convenience types @@ -676,3 +676,31 @@ // WEBASSEMBLY-SAME: i32 15 // WEBASSEMBLY-NEXT: ret } + +f32x4 fma_f32x4(f32x4 a, f32x4 b, f32x4 c) { + return __builtin_wasm_fma_f32x4(a, b, c); + // WEBASSEMBLY: call <4 x float> @llvm.wasm.fma.v4f32( + // WEBASSEMBLY-SAME: <4 x float> %a, <4 x float> %b, <4 x float> %c) + // WEBASSEMBLY-NEXT: ret +} + +f32x4 fms_f32x4(f32x4 a, f32x4 b, f32x4 c) { + return __builtin_wasm_fms_f32x4(a, b, c); + // WEBASSEMBLY: call <4 x float> @llvm.wasm.fms.v4f32( + // WEBASSEMBLY-SAME: <4 x float> %a, <4 x float> %b, <4 x float> %c) + // WEBASSEMBLY-NEXT: ret +} + +f64x2 fma_f64x2(f64x2 a, f64x2 b, f64x2 c) { + return __builtin_wasm_fma_f64x2(a, b, c); + // WEBASSEMBLY: call <2 x double> @llvm.wasm.fma.v2f64( + // WEBASSEMBLY-SAME: <2 x double> %a, <2 x double> %b, <2 x double> %c) + // WEBASSEMBLY-NEXT: ret +} + +f64x2 fms_f64x2(f64x2 a, f64x2 b, f64x2 c) { + return __builtin_wasm_fms_f64x2(a, b, c); + // WEBASSEMBLY: call <2 x double> @llvm.wasm.fms.v2f64( + // WEBASSEMBLY-SAME: <2 x double> %a, <2 x double> %b, <2 x double> %c) + // WEBASSEMBLY-NEXT: ret +} Index: clang/lib/CodeGen/CGBuiltin.cpp =================================================================== --- clang/lib/CodeGen/CGBuiltin.cpp +++ clang/lib/CodeGen/CGBuiltin.cpp @@ -18222,6 +18222,29 @@ Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_shuffle); return Builder.CreateCall(Callee, Ops); } + case WebAssembly::BI__builtin_wasm_fma_f32x4: + case WebAssembly::BI__builtin_wasm_fms_f32x4: + case WebAssembly::BI__builtin_wasm_fma_f64x2: + case WebAssembly::BI__builtin_wasm_fms_f64x2: { + Value *A = EmitScalarExpr(E->getArg(0)); + Value *B = EmitScalarExpr(E->getArg(1)); + Value *C = EmitScalarExpr(E->getArg(2)); + unsigned IntNo; + switch (BuiltinID) { + case WebAssembly::BI__builtin_wasm_fma_f32x4: + case WebAssembly::BI__builtin_wasm_fma_f64x2: + IntNo = Intrinsic::wasm_fma; + break; + case WebAssembly::BI__builtin_wasm_fms_f32x4: + case WebAssembly::BI__builtin_wasm_fms_f64x2: + IntNo = Intrinsic::wasm_fms; + break; + default: + llvm_unreachable("unexpected builtin ID"); + } + Function *Callee = CGM.getIntrinsic(IntNo, A->getType()); + return Builder.CreateCall(Callee, {A, B, C}); + } default: return nullptr; } Index: clang/include/clang/Basic/BuiltinsWebAssembly.def =================================================================== --- clang/include/clang/Basic/BuiltinsWebAssembly.def +++ clang/include/clang/Basic/BuiltinsWebAssembly.def @@ -161,5 +161,11 @@ TARGET_BUILTIN(__builtin_wasm_trunc_sat_zero_s_f64x2_i32x4, "V4iV2d", "nc", "simd128") TARGET_BUILTIN(__builtin_wasm_trunc_sat_zero_u_f64x2_i32x4, "V4UiV2d", "nc", "simd128") +// Relaxed SIMD builtins (experimental) +TARGET_BUILTIN(__builtin_wasm_fma_f32x4, "V4fV4fV4fV4f", "nc", "relaxed-simd") +TARGET_BUILTIN(__builtin_wasm_fms_f32x4, "V4fV4fV4fV4f", "nc", "relaxed-simd") +TARGET_BUILTIN(__builtin_wasm_fma_f64x2, "V2dV2dV2dV2d", "nc", "relaxed-simd") +TARGET_BUILTIN(__builtin_wasm_fms_f64x2, "V2dV2dV2dV2d", "nc", "relaxed-simd") + #undef BUILTIN #undef TARGET_BUILTIN
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits