https://github.com/JonPsson1 updated https://github.com/llvm/llvm-project/pull/109164
>From c70d421c42417a3f1a266907ef36057e51183d97 Mon Sep 17 00:00:00 2001 From: Jonas Paulsson <pauls...@linux.ibm.com> Date: Tue, 17 Sep 2024 19:34:34 +0200 Subject: [PATCH] Initial experiments --- clang/lib/Basic/Targets/SystemZ.h | 9 + .../Target/SystemZ/SystemZISelLowering.cpp | 7 + llvm/lib/Target/SystemZ/SystemZISelLowering.h | 1 + llvm/test/CodeGen/SystemZ/fp-half.ll | 158 ++++++++++++++++++ 4 files changed, 175 insertions(+) create mode 100644 llvm/test/CodeGen/SystemZ/fp-half.ll diff --git a/clang/lib/Basic/Targets/SystemZ.h b/clang/lib/Basic/Targets/SystemZ.h index f05ea473017bec..6566b63d4587ee 100644 --- a/clang/lib/Basic/Targets/SystemZ.h +++ b/clang/lib/Basic/Targets/SystemZ.h @@ -91,11 +91,20 @@ class LLVM_LIBRARY_VISIBILITY SystemZTargetInfo : public TargetInfo { "-v128:64-a:8:16-n32:64"); } MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 128; + + HasLegalHalfType = false; // Default=false + HalfArgsAndReturns = false; // Default=false + HasFloat16 = true; // Default=false + HasStrictFP = true; } unsigned getMinGlobalAlign(uint64_t Size, bool HasNonWeakDef) const override; + bool useFP16ConversionIntrinsics() const override { + return false; + } + void getTargetDefines(const LangOptions &Opts, MacroBuilder &Builder) const override; diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index 582a8c139b2937..fd3dcebba1eca7 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -704,6 +704,13 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, setOperationAction(ISD::BITCAST, MVT::f32, Custom); } + // Expand FP16 <=> FP32 conversions to libcalls and handle FP16 loads and + // stores in GPRs. + setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand); + setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand); + setTruncStoreAction(MVT::f32, MVT::f16, Expand); + // VASTART and VACOPY need to deal with the SystemZ-specific varargs // structure, but VAEND is a no-op. setOperationAction(ISD::VASTART, MVT::Other, Custom); diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h index 4a18bde00a0b98..682cfb94f540d6 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h @@ -476,6 +476,7 @@ class SystemZTargetLowering : public TargetLowering { // LD, and having the full constant in memory enables reg/mem opcodes. return VT != MVT::f64; } + bool softPromoteHalfType() const override { return true; } bool hasInlineStackProbe(const MachineFunction &MF) const override; AtomicExpansionKind shouldCastAtomicLoadInIR(LoadInst *LI) const override; AtomicExpansionKind shouldCastAtomicStoreInIR(StoreInst *SI) const override; diff --git a/llvm/test/CodeGen/SystemZ/fp-half.ll b/llvm/test/CodeGen/SystemZ/fp-half.ll new file mode 100644 index 00000000000000..5853dbc9184048 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/fp-half.ll @@ -0,0 +1,158 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s +; +; Tests for FP16 (Half). + +; A function where everything is done in Half. +define void @fun0(ptr %Op0, ptr %Op1, ptr %Dst) { +; CHECK-LABEL: fun0: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: stmg %r12, %r15, 96(%r15) +; CHECK-NEXT: .cfi_offset %r12, -64 +; CHECK-NEXT: .cfi_offset %r13, -56 +; CHECK-NEXT: .cfi_offset %r14, -48 +; CHECK-NEXT: .cfi_offset %r15, -40 +; CHECK-NEXT: aghi %r15, -168 +; CHECK-NEXT: .cfi_def_cfa_offset 328 +; CHECK-NEXT: std %f8, 160(%r15) # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset %f8, -168 +; CHECK-NEXT: llgh %r0, 0(%r3) +; CHECK-NEXT: llgh %r13, 0(%r2) +; CHECK-NEXT: lgr %r12, %r4 +; CHECK-NEXT: lgr %r2, %r0 +; CHECK-NEXT: brasl %r14, __gnu_h2f_ieee@PLT +; CHECK-NEXT: ler %f8, %f0 +; CHECK-NEXT: lgr %r2, %r13 +; CHECK-NEXT: brasl %r14, __gnu_h2f_ieee@PLT +; CHECK-NEXT: aebr %f0, %f8 +; CHECK-NEXT: brasl %r14, __gnu_f2h_ieee@PLT +; CHECK-NEXT: sth %r2, 0(%r12) +; CHECK-NEXT: ld %f8, 160(%r15) # 8-byte Folded Reload +; CHECK-NEXT: lmg %r12, %r15, 264(%r15) +; CHECK-NEXT: br %r14 +entry: + %0 = load half, ptr %Op0, align 2 + %1 = load half, ptr %Op1, align 2 + %add = fadd half %0, %1 + store half %add, ptr %Dst, align 2 + ret void +} + +; A function where Half values are loaded and extended to float and then +; operated on. +define void @fun1(ptr %Op0, ptr %Op1, ptr %Dst) { +; CHECK-LABEL: fun1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: stmg %r12, %r15, 96(%r15) +; CHECK-NEXT: .cfi_offset %r12, -64 +; CHECK-NEXT: .cfi_offset %r13, -56 +; CHECK-NEXT: .cfi_offset %r14, -48 +; CHECK-NEXT: .cfi_offset %r15, -40 +; CHECK-NEXT: aghi %r15, -168 +; CHECK-NEXT: .cfi_def_cfa_offset 328 +; CHECK-NEXT: std %f8, 160(%r15) # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset %f8, -168 +; CHECK-NEXT: llgh %r2, 0(%r2) +; CHECK-NEXT: lgr %r13, %r4 +; CHECK-NEXT: lgr %r12, %r3 +; CHECK-NEXT: brasl %r14, __gnu_h2f_ieee@PLT +; CHECK-NEXT: llgh %r2, 0(%r12) +; CHECK-NEXT: ler %f8, %f0 +; CHECK-NEXT: brasl %r14, __gnu_h2f_ieee@PLT +; CHECK-NEXT: aebr %f0, %f8 +; CHECK-NEXT: brasl %r14, __gnu_f2h_ieee@PLT +; CHECK-NEXT: sth %r2, 0(%r13) +; CHECK-NEXT: ld %f8, 160(%r15) # 8-byte Folded Reload +; CHECK-NEXT: lmg %r12, %r15, 264(%r15) +; CHECK-NEXT: br %r14 +entry: + %0 = load half, ptr %Op0, align 2 + %ext = fpext half %0 to float + %1 = load half, ptr %Op1, align 2 + %ext1 = fpext half %1 to float + %add = fadd float %ext, %ext1 + %res = fptrunc float %add to half + store half %res, ptr %Dst, align 2 + ret void +} + +; Test case with a Half incoming argument. +define zeroext i1 @fun2(half noundef %f) { +; CHECK-LABEL: fun2: +; CHECK: # %bb.0: # %start +; CHECK-NEXT: stmg %r14, %r15, 112(%r15) +; CHECK-NEXT: .cfi_offset %r14, -48 +; CHECK-NEXT: .cfi_offset %r15, -40 +; CHECK-NEXT: aghi %r15, -160 +; CHECK-NEXT: .cfi_def_cfa_offset 320 +; CHECK-NEXT: llgfr %r2, %r2 +; CHECK-NEXT: brasl %r14, __gnu_h2f_ieee@PLT +; CHECK-NEXT: larl %r1, .LCPI2_0 +; CHECK-NEXT: deb %f0, 0(%r1) +; CHECK-NEXT: brasl %r14, __gnu_f2h_ieee@PLT +; CHECK-NEXT: risbg %r2, %r2, 63, 191, 49 +; CHECK-NEXT: lmg %r14, %r15, 272(%r15) +; CHECK-NEXT: br %r14 +start: + %self = fdiv half %f, 0xHC700 + %_4 = bitcast half %self to i16 + %_0 = icmp slt i16 %_4, 0 + ret i1 %_0 +} + +; Test a chain of Half operations which should have each operation surrounded +; by conversions to/from fp32 to properly emulate Half operations. +define void @fun3(ptr %Op0, ptr %Op1, ptr %Op2, ptr %Op3, ptr %Dst) { +; CHECK-LABEL: fun3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: stmg %r11, %r15, 88(%r15) +; CHECK-NEXT: .cfi_offset %r11, -72 +; CHECK-NEXT: .cfi_offset %r12, -64 +; CHECK-NEXT: .cfi_offset %r13, -56 +; CHECK-NEXT: .cfi_offset %r14, -48 +; CHECK-NEXT: .cfi_offset %r15, -40 +; CHECK-NEXT: aghi %r15, -168 +; CHECK-NEXT: .cfi_def_cfa_offset 328 +; CHECK-NEXT: std %f8, 160(%r15) # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset %f8, -168 +; CHECK-NEXT: llgh %r0, 0(%r3) +; CHECK-NEXT: llgh %r13, 0(%r2) +; CHECK-NEXT: lgr %r12, %r5 +; CHECK-NEXT: lgr %r11, %r4 +; CHECK-NEXT: lgr %r2, %r0 +; CHECK-NEXT: brasl %r14, __gnu_h2f_ieee@PLT +; CHECK-NEXT: ler %f8, %f0 +; CHECK-NEXT: lgr %r2, %r13 +; CHECK-NEXT: brasl %r14, __gnu_h2f_ieee@PLT +; CHECK-NEXT: aebr %f0, %f8 +; CHECK-NEXT: brasl %r14, __gnu_f2h_ieee@PLT +; CHECK-NEXT: llgh %r13, 0(%r11) +; CHECK-NEXT: brasl %r14, __gnu_h2f_ieee@PLT +; CHECK-NEXT: ler %f8, %f0 +; CHECK-NEXT: lgr %r2, %r13 +; CHECK-NEXT: brasl %r14, __gnu_h2f_ieee@PLT +; CHECK-NEXT: aebr %f0, %f8 +; CHECK-NEXT: brasl %r14, __gnu_f2h_ieee@PLT +; CHECK-NEXT: llgh %r13, 0(%r12) +; CHECK-NEXT: brasl %r14, __gnu_h2f_ieee@PLT +; CHECK-NEXT: ler %f8, %f0 +; CHECK-NEXT: lgr %r2, %r13 +; CHECK-NEXT: brasl %r14, __gnu_h2f_ieee@PLT +; CHECK-NEXT: sebr %f8, %f0 +; CHECK-NEXT: ler %f0, %f8 +; CHECK-NEXT: brasl %r14, __gnu_f2h_ieee@PLT +; CHECK-NEXT: sth %r2, 0(%r6) +; CHECK-NEXT: ld %f8, 160(%r15) # 8-byte Folded Reload +; CHECK-NEXT: lmg %r11, %r15, 256(%r15) +; CHECK-NEXT: br %r14 +entry: + %0 = load half, ptr %Op0, align 2 + %1 = load half, ptr %Op1, align 2 + %add = fadd half %0, %1 + %2 = load half, ptr %Op2, align 2 + %add1 = fadd half %add, %2 + %3 = load half, ptr %Op3, align 2 + %sub = fsub half %add1, %3 + store half %sub, ptr %Dst, align 2 + ret void +} _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits