================ @@ -21095,6 +21095,50 @@ static SDValue foldTruncStoreOfExt(SelectionDAG &DAG, SDNode *N) { return SDValue(); } +// A custom combine to lower load <3 x i8> as the more efficient sequence +// below: +// ldrb wX, [x0, #2] +// ldrh wY, [x0] +// orr wX, wY, wX, lsl #16 +// fmov s0, wX +// +static SDValue combineV3I8LoadExt(LoadSDNode *LD, SelectionDAG &DAG) { + EVT MemVT = LD->getMemoryVT(); + if (MemVT != EVT::getVectorVT(*DAG.getContext(), MVT::i8, 3) || + LD->getOriginalAlign() >= 4) + return SDValue(); + + SDLoc DL(LD); + SDValue Chain = LD->getChain(); + SDValue BasePtr = LD->getBasePtr(); ---------------- fhahn wrote:
Ah I see, thanks! I wasn't able to construct a test case where it would be indexed (tried in ff1cde5) but added an assert to catch the use, if possible. https://github.com/llvm/llvm-project/pull/78632 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits