[llvm-branch-commits] [clang] 57d83c3 - [PowerPC] Enable paired vector type and intrinsics when MMA is disabled
Author: Baptiste Saleil Date: 2020-12-15T15:14:11-06:00 New Revision: 57d83c3a90c427ad0975803feb5b348d1ad34e29 URL: https://github.com/llvm/llvm-project/commit/57d83c3a90c427ad0975803feb5b348d1ad34e29 DIFF: https://github.com/llvm/llvm-project/commit/57d83c3a90c427ad0975803feb5b348d1ad34e29.diff LOG: [PowerPC] Enable paired vector type and intrinsics when MMA is disabled This patch enables the Clang type __vector_pair and its associated LLVM intrinsics even when MMA is disabled. With this patch, the type is now controlled by the PPC paired-vector-memops option. The builtins and intrinsics will be renamed to drop the mma prefix in another patch. Differential Revision: https://reviews.llvm.org/D91819 Added: clang/test/AST/ast-dump-ppc-types.c llvm/test/CodeGen/PowerPC/paired-vector-intrinsics-without-mma.ll Modified: clang/include/clang/AST/ASTContext.h clang/include/clang/AST/Type.h clang/include/clang/AST/TypeProperties.td clang/include/clang/Basic/PPCTypes.def clang/include/clang/Serialization/ASTBitCodes.h clang/lib/AST/ASTContext.cpp clang/lib/AST/ASTImporter.cpp clang/lib/AST/ExprConstant.cpp clang/lib/AST/ItaniumMangle.cpp clang/lib/AST/MicrosoftMangle.cpp clang/lib/AST/NSAPI.cpp clang/lib/AST/PrintfFormatString.cpp clang/lib/AST/Type.cpp clang/lib/AST/TypeLoc.cpp clang/lib/CodeGen/CGDebugInfo.cpp clang/lib/CodeGen/CodeGenTypes.cpp clang/lib/CodeGen/ItaniumCXXABI.cpp clang/lib/Index/USRGeneration.cpp clang/lib/Sema/Sema.cpp clang/lib/Sema/SemaChecking.cpp clang/lib/Sema/SemaExpr.cpp clang/lib/Serialization/ASTCommon.cpp clang/lib/Serialization/ASTReader.cpp clang/tools/libclang/CIndex.cpp llvm/lib/Target/PowerPC/PPCInstrPrefix.td Removed: clang/test/AST/ast-dump-ppc-mma-types.c diff --git a/clang/include/clang/AST/ASTContext.h b/clang/include/clang/AST/ASTContext.h index 71f824b69bc8..ff84eb52e96e 100644 --- a/clang/include/clang/AST/ASTContext.h +++ b/clang/include/clang/AST/ASTContext.h @@ -1007,7 +1007,7 @@ class ASTContext : public RefCountedBase { #define SVE_TYPE(Name, Id, SingletonId) \ CanQualType SingletonId; #include "clang/Basic/AArch64SVEACLETypes.def" -#define PPC_MMA_VECTOR_TYPE(Name, Id, Size) \ +#define PPC_VECTOR_TYPE(Name, Id, Size) \ CanQualType Id##Ty; #include "clang/Basic/PPCTypes.def" diff --git a/clang/include/clang/AST/Type.h b/clang/include/clang/AST/Type.h index 99cfa3ae76f5..945ea7a600c0 100644 --- a/clang/include/clang/AST/Type.h +++ b/clang/include/clang/AST/Type.h @@ -2489,7 +2489,7 @@ class BuiltinType : public Type { #define SVE_TYPE(Name, Id, SingletonId) Id, #include "clang/Basic/AArch64SVEACLETypes.def" // PPC MMA Types -#define PPC_MMA_VECTOR_TYPE(Name, Id, Size) Id, +#define PPC_VECTOR_TYPE(Name, Id, Size) Id, #include "clang/Basic/PPCTypes.def" // All other builtin types #define BUILTIN_TYPE(Id, SingletonId) Id, diff --git a/clang/include/clang/AST/TypeProperties.td b/clang/include/clang/AST/TypeProperties.td index b582395c44a6..ffcc8290938f 100644 --- a/clang/include/clang/AST/TypeProperties.td +++ b/clang/include/clang/AST/TypeProperties.td @@ -765,7 +765,7 @@ let Class = BuiltinType in { case BuiltinType::ID: return ctx.SINGLETON_ID; #include "clang/Basic/AArch64SVEACLETypes.def" -#define PPC_MMA_VECTOR_TYPE(NAME, ID, SIZE) \ +#define PPC_VECTOR_TYPE(NAME, ID, SIZE) \ case BuiltinType::ID: return ctx.ID##Ty; #include "clang/Basic/PPCTypes.def" diff --git a/clang/include/clang/Basic/PPCTypes.def b/clang/include/clang/Basic/PPCTypes.def index 86656f3568f8..9e2cb2aedc9f 100644 --- a/clang/include/clang/Basic/PPCTypes.def +++ b/clang/include/clang/Basic/PPCTypes.def @@ -7,14 +7,32 @@ //===--===// // // This file defines PPC types. -// Custom code should define this macro: +// Custom code should define one of these macros: // -//PPC_MMA_VECTOR_TYPE(Name, Id, Size) - A MMA vector type of a given size +//PPC_VECTOR_TYPE(Name, Id, Size) - A PPC vector type of a given size //(in bits). // +//PPC_VECTOR_MMA_TYPE(Name, Id, Size) - A PPC MMA vector type of a given +//size (in bits). +// +//PPC_VECTOR_VSX_TYPE(Name, Id, Size) - A PPC VSX vector type of a given +//size (in bits). +// //===--===// -PPC_MMA_VECTOR_TYPE(__vector_quad, VectorQuad, 512) -PPC_MMA_VECTOR_TYPE(__vector_pair, VectorPair, 256) +#if defined(PPC_VECTOR_TYPE) + #define PPC_VECTOR_MMA_TYPE(Name, Id, Size) PPC_VECTOR_TYPE(Name, Id, Size) + #define PPC_VECTOR_VSX_TYPE(Name, Id, Size) PPC_VECTOR_TYPE(Name, Id, Size) +#elif defined(PPC_VECTOR_MMA_TYPE) + #define PPC_VECTOR_VSX_TYPE(Name, Id, Size) +#elif defined(PPC_VECTOR_VSX_TYPE) + #define PPC_VECTOR_MMA_TYPE(Name, Id, Size)
[llvm-branch-commits] [llvm] 45ec3a3 - [PowerPC] Fix for excessive ACC copies due to PHI nodes
Author: Baptiste Saleil Date: 2020-12-03T09:51:23-06:00 New Revision: 45ec3a37b0a54e34e8f47cdac2be495838f93675 URL: https://github.com/llvm/llvm-project/commit/45ec3a37b0a54e34e8f47cdac2be495838f93675 DIFF: https://github.com/llvm/llvm-project/commit/45ec3a37b0a54e34e8f47cdac2be495838f93675.diff LOG: [PowerPC] Fix for excessive ACC copies due to PHI nodes When using accumulators in loops, they are passed around in PHI nodes of unprimed accumulators, causing the generation of additional prime/unprime instructions. This patch detects these cases and changes these PHI nodes to primed accumulator PHI nodes. We also add IR and MIR test cases for several PHI node cases. Differential Revision: https://reviews.llvm.org/D91391 Added: llvm/test/CodeGen/PowerPC/mma-phi-accs.ll llvm/test/CodeGen/PowerPC/peephole-phi-acc.mir Modified: llvm/lib/Target/PowerPC/PPCMIPeephole.cpp Removed: diff --git a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp index 827d3c4693b9..633f216388d0 100644 --- a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp +++ b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp @@ -267,6 +267,113 @@ void PPCMIPeephole::UpdateTOCSaves( TOCSaves[MI] = Keep; } +// This function returns a list of all PHI nodes in the tree starting from +// the RootPHI node. We perform a BFS traversal to get an ordered list of nodes. +// The list initially only contains the root PHI. When we visit a PHI node, we +// add it to the list. We continue to look for other PHI node operands while +// there are nodes to visit in the list. The function returns false if the +// optimization cannot be applied on this tree. +static bool collectUnprimedAccPHIs(MachineRegisterInfo *MRI, + MachineInstr *RootPHI, + SmallVectorImpl &PHIs) { + PHIs.push_back(RootPHI); + unsigned VisitedIndex = 0; + while (VisitedIndex < PHIs.size()) { +MachineInstr *VisitedPHI = PHIs[VisitedIndex]; +for (unsigned PHIOp = 1, NumOps = VisitedPHI->getNumOperands(); + PHIOp != NumOps; PHIOp += 2) { + Register RegOp = VisitedPHI->getOperand(PHIOp).getReg(); + if (!Register::isVirtualRegister(RegOp)) +return false; + MachineInstr *Instr = MRI->getVRegDef(RegOp); + // While collecting the PHI nodes, we check if they can be converted (i.e. + // all the operands are either copies, implicit defs or PHI nodes). + unsigned Opcode = Instr->getOpcode(); + if (Opcode == PPC::COPY) { +Register Reg = Instr->getOperand(1).getReg(); +if (!Register::isVirtualRegister(Reg) || +MRI->getRegClass(Reg) != &PPC::ACCRCRegClass) + return false; + } else if (Opcode != PPC::IMPLICIT_DEF && Opcode != PPC::PHI) +return false; + // If we detect a cycle in the PHI nodes, we exit. It would be + // possible to change cycles as well, but that would add a lot + // of complexity for a case that is unlikely to occur with MMA + // code. + if (Opcode != PPC::PHI) +continue; + if (std::find(PHIs.begin(), PHIs.end(), Instr) != PHIs.end()) +return false; + PHIs.push_back(Instr); +} +VisitedIndex++; + } + return true; +} + +// This function changes the unprimed accumulator PHI nodes in the PHIs list to +// primed accumulator PHI nodes. The list is traversed in reverse order to +// change all the PHI operands of a PHI node before changing the node itself. +// We keep a map to associate each changed PHI node to its non-changed form. +static void convertUnprimedAccPHIs(const PPCInstrInfo *TII, + MachineRegisterInfo *MRI, + SmallVectorImpl &PHIs, + Register Dst) { + DenseMap ChangedPHIMap; + for (auto It = PHIs.rbegin(), End = PHIs.rend(); It != End; ++It) { +MachineInstr *PHI = *It; +SmallVector, 4> PHIOps; +// We check if the current PHI node can be changed by looking at its +// operands. If all the operands are either copies from primed +// accumulators, implicit definitions or other unprimed accumulator +// PHI nodes, we change it. +for (unsigned PHIOp = 1, NumOps = PHI->getNumOperands(); PHIOp != NumOps; + PHIOp += 2) { + Register RegOp = PHI->getOperand(PHIOp).getReg(); + MachineInstr *PHIInput = MRI->getVRegDef(RegOp); + unsigned Opcode = PHIInput->getOpcode(); + assert((Opcode == PPC::COPY || Opcode == PPC::IMPLICIT_DEF || + Opcode == PPC::PHI) && + "Unexpected instruction"); + if (Opcode == PPC::COPY) { +assert(MRI->getRegClass(PHIInput->getOperand(1).getReg()) == + &PPC::ACCRCRegClass && + "Unexpected register class"); +PHIOps.push_back({PHIInput->getOperand(1), PHI->getOperand(PHIOp +