================ @@ -3181,136 +3193,250 @@ Register SPIRVInstructionSelector::buildPointerToResource( return AcReg; } -bool SPIRVInstructionSelector::selectFirstBitHigh16(Register ResVReg, - const SPIRVType *ResType, - MachineInstr &I, - bool IsSigned) const { - unsigned Opcode = IsSigned ? SPIRV::OpSConvert : SPIRV::OpUConvert; - // zero or sign extend +bool SPIRVInstructionSelector::selectFirstBitSet16( + Register ResVReg, const SPIRVType *ResType, MachineInstr &I, + unsigned ExtendOpcode, unsigned BitSetOpcode) const { Register ExtReg = MRI->createVirtualRegister(GR.getRegClass(ResType)); - bool Result = - selectOpWithSrcs(ExtReg, ResType, I, {I.getOperand(2).getReg()}, Opcode); - return Result && selectFirstBitHigh32(ResVReg, ResType, I, ExtReg, IsSigned); + bool Result = selectOpWithSrcs(ExtReg, ResType, I, {I.getOperand(2).getReg()}, + ExtendOpcode); + + return Result && + selectFirstBitSet32(ResVReg, ResType, I, ExtReg, BitSetOpcode); } -bool SPIRVInstructionSelector::selectFirstBitHigh32(Register ResVReg, - const SPIRVType *ResType, - MachineInstr &I, - Register SrcReg, - bool IsSigned) const { - unsigned Opcode = IsSigned ? GL::FindSMsb : GL::FindUMsb; +bool SPIRVInstructionSelector::selectFirstBitSet32( + Register ResVReg, const SPIRVType *ResType, MachineInstr &I, + Register SrcReg, unsigned BitSetOpcode) const { return BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(SPIRV::OpExtInst)) .addDef(ResVReg) .addUse(GR.getSPIRVTypeID(ResType)) .addImm(static_cast<uint32_t>(SPIRV::InstructionSet::GLSL_std_450)) - .addImm(Opcode) + .addImm(BitSetOpcode) .addUse(SrcReg) .constrainAllUses(TII, TRI, RBI); } -bool SPIRVInstructionSelector::selectFirstBitHigh64(Register ResVReg, - const SPIRVType *ResType, - MachineInstr &I, - bool IsSigned) const { - Register OpReg = I.getOperand(2).getReg(); - // 1. split our int64 into 2 pieces using a bitcast - unsigned count = GR.getScalarOrVectorComponentCount(ResType); - SPIRVType *baseType = GR.retrieveScalarOrVectorIntType(ResType); +bool SPIRVInstructionSelector::selectFirstBitSet64Overflow( + Register ResVReg, const SPIRVType *ResType, MachineInstr &I, + Register SrcReg, unsigned BitSetOpcode, bool SwapPrimarySide) const { + + // SPIR-V only allow vecs of size 2,3,4. Calling with a larger vec requires + // creating a param reg and return reg with an invalid vec size. If that is + // resolved then this function is valid for vectors of any component size. + unsigned ComponentCount = GR.getScalarOrVectorComponentCount(ResType); + assert(ComponentCount < 5 && "Vec 5+ will generate invalid SPIR-V ops"); + + bool ZeroAsNull = STI.isOpenCLEnv(); MachineIRBuilder MIRBuilder(I); - SPIRVType *postCastT = - GR.getOrCreateSPIRVVectorType(baseType, 2 * count, MIRBuilder); - Register bitcastReg = MRI->createVirtualRegister(GR.getRegClass(postCastT)); - bool Result = - selectOpWithSrcs(bitcastReg, postCastT, I, {OpReg}, SPIRV::OpBitcast); + SPIRVType *BaseType = GR.retrieveScalarOrVectorIntType(ResType); + SPIRVType *I64Type = GR.getOrCreateSPIRVIntegerType(64, MIRBuilder); + SPIRVType *I64x2Type = GR.getOrCreateSPIRVVectorType(I64Type, 2, MIRBuilder); + SPIRVType *Vec2ResType = + GR.getOrCreateSPIRVVectorType(BaseType, 2, MIRBuilder); + + std::vector<Register> PartialRegs; + + // Loops 0, 2, 4, ... but stops one loop early when ComponentCount is odd + unsigned CurrentComponent = 0; + for (; CurrentComponent + 1 < ComponentCount; CurrentComponent += 2) { + // This register holds the firstbitX result for each of the i64x2 vectors + // extracted from SrcReg + Register BitSetResult = + MRI->createVirtualRegister(GR.getRegClass(I64x2Type)); + + auto MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), + TII.get(SPIRV::OpVectorShuffle)) + .addDef(BitSetResult) + .addUse(GR.getSPIRVTypeID(I64x2Type)) + .addUse(SrcReg) + // Per the spec, repeat the vector if only one vec is needed + .addUse(SrcReg); + + MIB.addImm(CurrentComponent); + MIB.addImm(CurrentComponent + 1); + + if (!MIB.constrainAllUses(TII, TRI, RBI)) + return false; + + Register SubVecBitSetReg = + MRI->createVirtualRegister(GR.getRegClass(Vec2ResType)); + + if (!selectFirstBitSet64(SubVecBitSetReg, Vec2ResType, I, BitSetResult, + BitSetOpcode, SwapPrimarySide)) + return false; + + PartialRegs.push_back(SubVecBitSetReg); + } + + // On odd component counts we need to handle one more component + if (CurrentComponent != ComponentCount) { + Register FinalElemReg = MRI->createVirtualRegister(GR.getRegClass(I64Type)); + Register ConstIntLastIdx = GR.getOrCreateConstInt( + ComponentCount - 1, I, BaseType, TII, ZeroAsNull); + + if (!selectOpWithSrcs(FinalElemReg, I64Type, I, {SrcReg, ConstIntLastIdx}, + SPIRV::OpVectorExtractDynamic)) + return false; + + Register FinalElemBitSetReg = + MRI->createVirtualRegister(GR.getRegClass(BaseType)); + + if (!selectFirstBitSet64(FinalElemBitSetReg, BaseType, I, FinalElemReg, + BitSetOpcode, SwapPrimarySide)) + return false; + + PartialRegs.push_back(FinalElemBitSetReg); + } + + // Join all the resulting registers back into the return type in order + // (ie i32x2, i32x2, i32x1 -> i32x5) + return selectOpWithSrcs(ResVReg, ResType, I, PartialRegs, + SPIRV::OpCompositeConstruct); +} + +bool SPIRVInstructionSelector::selectFirstBitSet64( + Register ResVReg, const SPIRVType *ResType, MachineInstr &I, + Register SrcReg, unsigned BitSetOpcode, bool SwapPrimarySide) const { + unsigned ComponentCount = GR.getScalarOrVectorComponentCount(ResType); + SPIRVType *BaseType = GR.retrieveScalarOrVectorIntType(ResType); + bool ZeroAsNull = STI.isOpenCLEnv(); + Register ConstIntZero = + GR.getOrCreateConstInt(0, I, BaseType, TII, ZeroAsNull); + Register ConstIntOne = + GR.getOrCreateConstInt(1, I, BaseType, TII, ZeroAsNull); + + // SPIRV doesn't support vectors with more than 4 components. Since the + // algoritm below converts i64 -> i32x2 and i64x4 -> i32x8 it can only + // operate on vectors with 2 or less components. When largers vectors are + // seen. Split them, recurse, then recombine them. ---------------- s-perron wrote:
```suggestion // SPIRV doesn't support vectors with more than 4 components. Since the // algorithm below converts i64 -> i32x2 and i64x4 -> i32x8 it can only // operate on vectors with 2 or less components. When larger vectors are // seen, split them, recurse, and then recombine them. ``` https://github.com/llvm/llvm-project/pull/116858 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits