================ @@ -0,0 +1,2177 @@ +//===-------------------- HexagonXQFloatGenerator.cpp --------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass enables generation of XQFloat instructions. XQF instructions +// are more efficient, but can be less precise in comparison to IEEE ones. +// Based on the accuracy preservation of the generated code, we enabled four +// modes - Strict IEEE-754 compliant, IEEE-754 compliant, Lossy subnormals and +// legacy mode. +// +// Strict IEEE mode adheres to similar accuracy and precision as of IEEE-754. +// +// IEEE-754 compliant mode excludes IEEE-754 overflows and lower precision +// subnormals due to larger dynamic range than IEEE-754. +// All subnormals have extra precision. +// +// Lossy subnormals mode without normalization result in a loss of accuracy. +// This provides greater precision than a clamp of subnormals to 0. +// If dataset excludes subnormals, it behavas as IEEE-754 compliant mode. +// +// The direct mode has a loss of 1 bit of accuracy compared to IEEE-754. +// +// V79 replaces the prior internal HVX floating point format for floating-point +// arithmetic. The new internal HVX floating-point format yields results +// identical to IEEE-754 round-to-even mode. The new format contains more bits +// than IEEE-754, which optionally produces results with greater range and +// accuracy. Only the HVX vector registers use the HVX floating-point format. +// Memory maintains all floating-point data in IEEE-754 format, +// and all loads/stores use the IEEE-754 format. A subset of HVX floating-point +// operations transform IEEE-754 floating-point data to HVX floating-point data. +// Subsequent HVX floating-point instructions may consume operands in the HVX +// floating-point without conversion to IEEE-754, which allows for performant +// & energy efficient code. The program does not need to switch between formats +// continuously. The program must convert the HVX floating-point results to +// IEEE-754 prior to storing to memory. + +// HVX floating-point achieves IEEE-754 compliance through normalization. +// The program may skip normalization when faster calculation is desired, and +// IEEE-754 compliance isn’t required. HVX floating-point contains two input +// types: qf32, single precision floating point, and qf16, half precision +// floating point. In Hexagon, IEEE-754 contains two input types: sf, single +// precision floating point, and hf, half precision floating point. +// +// Only HVX floating-point source and destination instructions use HVX +// floating-point values. Instructions specify the HVX floating-point format +// with the qf16 and qf32 identifier. A source vector register will drop the +// extended state of a HVX floating-point value when an instruction reads the +// source vector register without the qf16 or qf32 identifier. A destination +// vector register will reset its extended state when an instruction writes to +// a vector register without the qf16 or qf32 identifier. When dropping the +// extended state, the floating-point value loses accuracy. The program may +// preserve the floating-point value by converting HVX floating-point values +// to IEEE-754 values. Compiler must convert HVX floating-point values to +// IEEE-754 values before using as an input to stores, permutes, shifts, and +// any other operations that do not source the HVX floating-point format. +// +// Depending on the desired results, HVX floating-point operations may have +// some requirements on the input sources. The HVX floating-point values +// require normalization to achieve IEEE-754 compliance, while faster operations +// may skip normalization. The program normalizes HVX floating-point values +// before subsequent HVX floating-point operations, so the floating-point value +// does not lose precision. The program also obtains results identical to +// IEEE-754 by converting all HVX floating-point results to IEEE-754 format +// before consumed in any subsequent operation. There are however cases where +// this conversion is redundant, or the differences between IEEE-754 and HVX +// floating-point may not be a concern. +// +// The conversion logic can be understood by the table below: +// +// ================================================================================================================================================ +// | | | | +// | Inputs to add/subtarct | Inputs to +// multiplication instuctions | Non-HVX floating +// point | | instructions | | instruction +// | | | | | +// ===============================================================================================================================================| +// Sources | IEEE- | HVX | HVX | sf | qf32 | qf32 | hf +// | qf16 | qf16 | IEE-754 | HVX | HVX | +// | 754 | floating | floating | | from | from | | +// from | from | | floating | floating | | | +// point | point | | mult | adder | | mult +// | adder | | point | point | | | from | +// from | | | | | | | | +// from | from | | | multi | adder | | +// | | | | | | mult | +// adder | | | | | | | | | | | +// | | | +// ===============================================================================================================================================| +// Strict | Direct | Convert | Convert | Normalize | Convert | Convert +// | widening | Convert | Convert | Direct | Convert | Convert | IEEE-754 +// | Use | to | to | | to IEEE | to IEEE | multiply +// | to IEEE, | to IEEE, | use | to | to | compliance | | +// IEEE | IEEE | | then | then | then | widening +// | widening | | IEEE | IEEE | +// | | | | | normalize | normalize +// | convert | multiply,| multiply,| | | | +// | | | | | | | to IEEE +// | convert | convert | | | | | | +// | | | | | | to +// IEEE | to IEEE | | | | +// -----------------------------------------------------------------------------------------------------------------------------------------------| +// IEEE-754 | Direct | Direct | Direct | Normalize | Direct | Normalize +// | Widening | Direct | Widening | Direct | Convert | Convert | compliance +// | Use | Use | Use | | use | | multiply +// | use | multiply | use | to IEEE | to IEEE | +// -----------------------------------------------------------------------------------------------------------------------------------------------| +// Lossy | Direct | Direct | Direct | Direct | Direct | Normalize +// | Direct | Direct | Widening | Direct | Convert | Convert | Subnormals +// | Use | Use | Use | Use | use | | use | +// use | multiply | use | to IEEE | to IEEE | +// -----------------------------------------------------------------------------------------------------------------------------------------------| +// Direct | Direct | Direct | Direct | Direct | Direct | Direct | +// Direct | Direct | Direct | Direct | Direct | Direct | Lossy | +// Use | Use | Use | Use | use | use | use | +// use | use | use | use | use | +// -----------------------------------------------------------------------------------------------------------------------------------------------| +// +// For v81, the normalization sequence changes. Instead of multiplying 0 +// and -0, a simple copy operation normalizes the unnormal value. Both +// qf and IEEE-754 value can be unnormal. +// Additionally for v81, we have two new vsub instructions which are handled. + +#define HEXAGON_XQFLOAT_GENERATOR "XQFloat Generator pass" + +#include "Hexagon.h" +#include "HexagonInstrInfo.h" +#include "HexagonSubtarget.h" +#include "HexagonTargetMachine.h" +#include "vector" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +#define DEBUG_TYPE "hexagon-xqf-gen" + +using namespace llvm; + +extern cl::opt<QFloatMode> QFloatModeValue; + +// Master flag to enable XQF generations +cl::opt<bool> EnableHVXXQFloat("enable-xqf-gen", cl::init(false), + cl::desc("Enable XQFloat generations")); +// Master flag to remove extraneous qf to sf/hf conversions +cl::opt<bool> + EnableConversionsRemoval("enable-rem-conv", cl::init(false), + cl::desc("Enable extraneous conversions removal")); + +// Diagnostic flags +cl::opt<bool> PrintDebug("debug-print", cl::init(false), + cl::desc("Print function mir after transformation")); +cl::opt<bool> + EnableConvDiag("enable-diag-conv", cl::init(false), + cl::desc("Print function after conversion removal.")); + +// This vector contains the opcodes which generate qf32 from add/subtract +SmallVector<unsigned short, 7> XQFPAdd32 = { + // vector add instructions + Hexagon::V6_vadd_sf, Hexagon::V6_vadd_qf32, Hexagon::V6_vadd_qf32_mix, + + // vector subtract instructions + Hexagon::V6_vsub_qf32, Hexagon::V6_vsub_qf32_mix, Hexagon::V6_vsub_sf, + Hexagon::V6_vsub_sf_mix}; + +// This vector contains the opcodes which generate qf16 from add/subtract +SmallVector<unsigned short, 7> XQFPAdd16 = { + // vector add instructions + Hexagon::V6_vadd_hf, Hexagon::V6_vadd_qf16, Hexagon::V6_vadd_qf16_mix, + + // vector subtract intrutions + Hexagon::V6_vsub_hf, Hexagon::V6_vsub_qf16, Hexagon::V6_vsub_qf16_mix, + Hexagon::V6_vsub_hf_mix}; + +// This vector contains the opcodes which generate qf32 from multiplication +SmallVector<unsigned short, 5> XQFPMult32 = { + Hexagon::V6_vmpy_qf32, Hexagon::V6_vmpy_qf32_qf16, Hexagon::V6_vmpy_qf32_hf, + Hexagon::V6_vmpy_qf32_sf, Hexagon::V6_vmpy_qf32_mix_hf}; +// This vector contains the opcodes which generate qf16 from multiplication +SmallVector<unsigned short, 3> XQFPMult16 = {Hexagon::V6_vmpy_qf16, + Hexagon::V6_vmpy_qf16_hf, + Hexagon::V6_vmpy_qf16_mix_hf}; + ---------------- quic-santdas wrote:
Ideally, these should be moved to HexagonInstrInfo and queries via instance of that class. https://github.com/llvm/llvm-project/pull/198902 _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
