================ @@ -4212,6 +4213,96 @@ MachineSDNode *X86DAGToDAGISel::emitPCMPESTR(unsigned ROpc, unsigned MOpc, return CNode; } +// When the consumer of a right shift (arithmetic or logical) wouldn't notice +// the difference if the instruction was a rotate right instead (because the +// bits shifted in are truncated away), the shift can be replaced by the RORX +// instruction from BMI2. This doesn't set flags and can output to a different +// register. However, this increases code size in most cases, and doesn't leave +// the high bits in a useful state. There may be other situations where this +// transformation is profitable given those conditions, but currently the +// transformation is only made when it likely avoids spilling flags. +bool X86DAGToDAGISel::rightShiftUncloberFlags(SDNode *N) { + EVT VT = N->getValueType(0); + + // Target has to have BMI2 for RORX + if (!Subtarget->hasBMI2()) + return false; + + // Only handle scalar shifts. + if (VT.isVector()) + return false; + + unsigned OpSize; + if (VT == MVT::i64) + OpSize = 64; + else if (VT == MVT::i32) + OpSize = 32; + else if (VT == MVT::i16) + OpSize = 16; + else if (VT == MVT::i8) + return false; // i8 shift can't be truncated. + else + llvm_unreachable("Unexpected shift size"); + + unsigned TruncateSize = 0; + // This only works when the result is truncated. + for (const SDNode *User : N->uses()) { + auto name = User->getOperationName(CurDAG); + if (User->getMachineOpcode() != TargetOpcode::EXTRACT_SUBREG) + return false; + EVT TuncateType = User->getValueType(0); + if (TuncateType == MVT::i32) + TruncateSize = std::max(TruncateSize, 32U); + else if (TuncateType == MVT::i16) + TruncateSize = std::max(TruncateSize, 16U); + else if (TuncateType == MVT::i8) + TruncateSize = std::max(TruncateSize, 8U); + else + return false; + } + if (TruncateSize >= OpSize) + return false; + + // The shift must be by an immediate that wouldn't expose the zero or sign + // extended result. + auto *ShiftAmount = dyn_cast<ConstantSDNode>(N->getOperand(1)); + if (!ShiftAmount || ShiftAmount->getZExtValue() > OpSize - TruncateSize) + return false; + + // Only make the replacement when it avoids clobbering used flags. This is a + // similar heuristic as used in the conversion to LEA, namely looking at the + // operand for an instruction that creates flags where those flags are used. + // This will have both false positives and false negatives. Ideally, both of + // these happen later on. Perhaps in copy to flags lowering or in register + // allocation. + bool MightClobberFlags = false; + SDNode *Input = N->getOperand(0).getNode(); + for (auto Use : Input->uses()) { + if (Use->getOpcode() == ISD::CopyToReg) { + auto RegisterNode = ---------------- topperc wrote:
`auto *RegisterSDNode` to make it clear it's a pointer. https://github.com/llvm/llvm-project/pull/77964 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits