Author: Amr Hesham Date: 2025-05-30T20:13:23+02:00 New Revision: 4dcfcd3a66847e2ed377efdefa22fea9b59a8285
URL: https://github.com/llvm/llvm-project/commit/4dcfcd3a66847e2ed377efdefa22fea9b59a8285 DIFF: https://github.com/llvm/llvm-project/commit/4dcfcd3a66847e2ed377efdefa22fea9b59a8285.diff LOG: [CIR] Upstream ShuffleDynamicOp for VectorType (#141411) This change adds support for the Dynamic Shuffle op for VectorType Issue https://github.com/llvm/llvm-project/issues/136487 Added: clang/test/CIR/IR/invalid-vector-shuffle-dyn-wrong-operands.cir Modified: clang/include/clang/CIR/Dialect/IR/CIROps.td clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp clang/lib/CIR/Dialect/IR/CIRDialect.cpp clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h clang/test/CIR/CodeGen/vector-ext.cpp clang/test/CIR/CodeGen/vector.cpp clang/test/CIR/IR/vector.cir Removed: ################################################################################ diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td index 5ce03c19369cb..cfa766105812f 100644 --- a/clang/include/clang/CIR/Dialect/IR/CIROps.td +++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td @@ -2141,4 +2141,38 @@ def VecCmpOp : CIR_Op<"vec.cmp", [Pure, SameTypeOperands]> { }]; } +//===----------------------------------------------------------------------===// +// VecShuffleDynamicOp +//===----------------------------------------------------------------------===// + +def VecShuffleDynamicOp : CIR_Op<"vec.shuffle.dynamic", + [Pure, AllTypesMatch<["vec", "result"]>]> { + let summary = "Shuffle a vector using indices in another vector"; + let description = [{ + The `cir.vec.shuffle.dynamic` operation implements the undocumented form of + Clang's __builtin_shufflevector, where the indices of the shuffled result + can be runtime values. + + There are two input vectors, which must have the same number of elements. + The second input vector must have an integral element type. The elements of + the second vector are interpreted as indices into the first vector. The + result vector is constructed by taking the elements from the first input + vector from the indices indicated by the elements of the second vector. + + ```mlir + %new_vec = cir.vec.shuffle.dynamic %vec : !cir.vector<4 x !s32i>, %indices + : !cir.vector<4 x !s32i> + ``` + }]; + + let arguments = (ins CIR_VectorType:$vec, IntegerVector:$indices); + let results = (outs CIR_VectorType:$result); + let assemblyFormat = [{ + $vec `:` qualified(type($vec)) `,` $indices `:` qualified(type($indices)) + attr-dict + }]; + + let hasVerifier = 1; +} + #endif // CLANG_CIR_DIALECT_IR_CIROPS_TD diff --git a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp index 058015ca55729..bdb12bf86d1bf 100644 --- a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp @@ -171,6 +171,20 @@ class ScalarExprEmitter : public StmtVisitor<ScalarExprEmitter, mlir::Value> { return emitLoadOfLValue(e); } + mlir::Value VisitShuffleVectorExpr(ShuffleVectorExpr *e) { + if (e->getNumSubExprs() == 2) { + // The undocumented form of __builtin_shufflevector. + mlir::Value inputVec = Visit(e->getExpr(0)); + mlir::Value indexVec = Visit(e->getExpr(1)); + return cgf.builder.create<cir::VecShuffleDynamicOp>( + cgf.getLoc(e->getSourceRange()), inputVec, indexVec); + } + + cgf.getCIRGenModule().errorNYI(e->getSourceRange(), + "ShuffleVectorExpr with indices"); + return {}; + } + mlir::Value VisitMemberExpr(MemberExpr *e); mlir::Value VisitInitListExpr(InitListExpr *e); diff --git a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp index 9e2b2908b22d8..c7cc27561c87c 100644 --- a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp +++ b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp @@ -1564,6 +1564,20 @@ OpFoldResult cir::VecExtractOp::fold(FoldAdaptor adaptor) { return elements[index]; } +//===----------------------------------------------------------------------===// +// VecShuffleDynamicOp +//===----------------------------------------------------------------------===// + +LogicalResult cir::VecShuffleDynamicOp::verify() { + // The number of elements in the two input vectors must match. + if (getVec().getType().getSize() != + mlir::cast<cir::VectorType>(getIndices().getType()).getSize()) { + return emitOpError() << ": the number of elements in " << getVec().getType() + << " and " << getIndices().getType() << " don't match"; + } + return success(); +} + //===----------------------------------------------------------------------===// // TableGen'd op method definitions //===----------------------------------------------------------------------===// diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp index d30c85d572fed..96dd0a7f6c547 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp @@ -1709,7 +1709,8 @@ void ConvertCIRToLLVMPass::runOnOperation() { CIRToLLVMVecCreateOpLowering, CIRToLLVMVecExtractOpLowering, CIRToLLVMVecInsertOpLowering, - CIRToLLVMVecCmpOpLowering + CIRToLLVMVecCmpOpLowering, + CIRToLLVMVecShuffleDynamicOpLowering // clang-format on >(converter, patterns.getContext()); @@ -1863,6 +1864,60 @@ mlir::LogicalResult CIRToLLVMVecCmpOpLowering::matchAndRewrite( return mlir::success(); } +mlir::LogicalResult CIRToLLVMVecShuffleDynamicOpLowering::matchAndRewrite( + cir::VecShuffleDynamicOp op, OpAdaptor adaptor, + mlir::ConversionPatternRewriter &rewriter) const { + // LLVM IR does not have an operation that corresponds to this form of + // the built-in. + // __builtin_shufflevector(V, I) + // is implemented as this pseudocode, where the for loop is unrolled + // and N is the number of elements: + // + // result = undef + // maskbits = NextPowerOf2(N - 1) + // masked = I & maskbits + // for (i in 0 <= i < N) + // result[i] = V[masked[i]] + mlir::Location loc = op.getLoc(); + mlir::Value input = adaptor.getVec(); + mlir::Type llvmIndexVecType = + getTypeConverter()->convertType(op.getIndices().getType()); + mlir::Type llvmIndexType = getTypeConverter()->convertType( + elementTypeIfVector(op.getIndices().getType())); + uint64_t numElements = + mlir::cast<cir::VectorType>(op.getVec().getType()).getSize(); + + uint64_t maskBits = llvm::NextPowerOf2(numElements - 1) - 1; + mlir::Value maskValue = rewriter.create<mlir::LLVM::ConstantOp>( + loc, llvmIndexType, rewriter.getIntegerAttr(llvmIndexType, maskBits)); + mlir::Value maskVector = + rewriter.create<mlir::LLVM::UndefOp>(loc, llvmIndexVecType); + + for (uint64_t i = 0; i < numElements; ++i) { + mlir::Value idxValue = + rewriter.create<mlir::LLVM::ConstantOp>(loc, rewriter.getI64Type(), i); + maskVector = rewriter.create<mlir::LLVM::InsertElementOp>( + loc, maskVector, maskValue, idxValue); + } + + mlir::Value maskedIndices = rewriter.create<mlir::LLVM::AndOp>( + loc, llvmIndexVecType, adaptor.getIndices(), maskVector); + mlir::Value result = rewriter.create<mlir::LLVM::UndefOp>( + loc, getTypeConverter()->convertType(op.getVec().getType())); + for (uint64_t i = 0; i < numElements; ++i) { + mlir::Value iValue = + rewriter.create<mlir::LLVM::ConstantOp>(loc, rewriter.getI64Type(), i); + mlir::Value indexValue = rewriter.create<mlir::LLVM::ExtractElementOp>( + loc, maskedIndices, iValue); + mlir::Value valueAtIndex = + rewriter.create<mlir::LLVM::ExtractElementOp>(loc, input, indexValue); + result = rewriter.create<mlir::LLVM::InsertElementOp>(loc, result, + valueAtIndex, iValue); + } + rewriter.replaceOp(op, result); + return mlir::success(); +} + std::unique_ptr<mlir::Pass> createConvertCIRToLLVMPass() { return std::make_unique<ConvertCIRToLLVMPass>(); } diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h index 053e77f03648e..6b8862db2c8be 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h @@ -352,6 +352,17 @@ class CIRToLLVMVecCmpOpLowering mlir::ConversionPatternRewriter &) const override; }; +class CIRToLLVMVecShuffleDynamicOpLowering + : public mlir::OpConversionPattern<cir::VecShuffleDynamicOp> { +public: + using mlir::OpConversionPattern< + cir::VecShuffleDynamicOp>::OpConversionPattern; + + mlir::LogicalResult + matchAndRewrite(cir::VecShuffleDynamicOp op, OpAdaptor, + mlir::ConversionPatternRewriter &) const override; +}; + } // namespace direct } // namespace cir diff --git a/clang/test/CIR/CodeGen/vector-ext.cpp b/clang/test/CIR/CodeGen/vector-ext.cpp index aab723f041edf..9316c0c2c61eb 100644 --- a/clang/test/CIR/CodeGen/vector-ext.cpp +++ b/clang/test/CIR/CodeGen/vector-ext.cpp @@ -6,6 +6,7 @@ // RUN: FileCheck --input-file=%t.ll %s -check-prefix=OGCG typedef int vi4 __attribute__((ext_vector_type(4))); +typedef int vi6 __attribute__((ext_vector_type(6))); typedef unsigned int uvi4 __attribute__((ext_vector_type(4))); typedef int vi3 __attribute__((ext_vector_type(3))); typedef int vi2 __attribute__((ext_vector_type(2))); @@ -988,3 +989,87 @@ void foo14() { // OGCG: %[[TMP_B:.*]] = load <4 x float>, ptr %[[VEC_B]], align 16 // OGCG: %[[GE:.*]] = fcmp oge <4 x float> %[[TMP_A]], %[[TMP_B]] // OGCG: %[[RES:.*]] = sext <4 x i1> %[[GE]] to <4 x i32> + +void foo15() { + vi4 a; + vi4 b; + vi4 r = __builtin_shufflevector(a, b); +} + +// CIR: %[[TMP_A:.*]] = cir.load{{.*}} {{.*}} : !cir.ptr<!cir.vector<4 x !s32i>>, !cir.vector<4 x !s32i> +// CIR: %[[TMP_B:.*]] = cir.load{{>*}} {{.*}} : !cir.ptr<!cir.vector<4 x !s32i>>, !cir.vector<4 x !s32i> +// CIR: %[[NEW_VEC:.*]] = cir.vec.shuffle.dynamic %[[TMP_A]] : !cir.vector<4 x !s32i>, %[[TMP_B]] : !cir.vector<4 x !s32i> + +// LLVM: %[[TMP_A:.*]] = load <4 x i32>, ptr {{.*}}, align 16 +// LLVM: %[[TMP_B:.*]] = load <4 x i32>, ptr {{.*}}, align 16 +// LLVM: %[[MASK:.*]] = and <4 x i32> %[[TMP_B]], splat (i32 3) +// LLVM: %[[SHUF_IDX_0:.*]] = extractelement <4 x i32> %[[MASK]], i64 0 +// LLVM: %[[SHUF_ELE_0:.*]] = extractelement <4 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_0]] +// LLVM: %[[SHUF_INS_0:.*]] = insertelement <4 x i32> undef, i32 %[[SHUF_ELE_0]], i64 0 +// LLVM: %[[SHUF_IDX_1:.*]] = extractelement <4 x i32> %[[MASK]], i64 1 +// LLVM: %[[SHUF_ELE_1:.*]] = extractelement <4 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_1]] +// LLVM: %[[SHUF_INS_1:.*]] = insertelement <4 x i32> %[[SHUF_INS_0]], i32 %[[SHUF_ELE_1]], i64 1 +// LLVM: %[[SHUF_IDX_2:.*]] = extractelement <4 x i32> %[[MASK]], i64 2 +// LLVM: %[[SHUF_ELE_2:.*]] = extractelement <4 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_2]] +// LLVM: %[[SHUF_INS_2:.*]] = insertelement <4 x i32> %[[SHUF_INS_1]], i32 %[[SHUF_ELE_2]], i64 2 +// LLVM: %[[SHUF_IDX_3:.*]] = extractelement <4 x i32> %[[MASK]], i64 3 +// LLVM: %[[SHUF_ELE_3:.*]] = extractelement <4 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_3]] +// LLVM: %[[SHUF_INS_3:.*]] = insertelement <4 x i32> %[[SHUF_INS_2]], i32 %[[SHUF_ELE_3]], i64 3 + +// OGCG: %[[TMP_A:.*]] = load <4 x i32>, ptr {{.*}}, align 16 +// OGCG: %[[TMP_B:.*]] = load <4 x i32>, ptr {{.*}}, align 16 +// OGCG: %[[MASK:.*]] = and <4 x i32> %[[TMP_B]], splat (i32 3) +// OGCG: %[[SHUF_IDX_0:.*]] = extractelement <4 x i32> %[[MASK]], i64 0 +// OGCG: %[[SHUF_ELE_0:.*]] = extractelement <4 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_0]] +// OGCG: %[[SHUF_INS_0:.*]] = insertelement <4 x i32> poison, i32 %[[SHUF_ELE_0]], i64 0 +// OGCG: %[[SHUF_IDX_1:.*]] = extractelement <4 x i32> %[[MASK]], i64 1 +// OGCG: %[[SHUF_ELE_1:.*]] = extractelement <4 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_1]] +// OGCG: %[[SHUF_INS_1:.*]] = insertelement <4 x i32> %[[SHUF_INS_0]], i32 %[[SHUF_ELE_1]], i64 1 +// OGCG: %[[SHUF_IDX_2:.*]] = extractelement <4 x i32> %[[MASK]], i64 2 +// OGCG: %[[SHUF_ELE_2:.*]] = extractelement <4 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_2]] +// OGCG: %[[SHUF_INS_2:.*]] = insertelement <4 x i32> %[[SHUF_INS_1]], i32 %[[SHUF_ELE_2]], i64 2 +// OGCG: %[[SHUF_IDX_3:.*]] = extractelement <4 x i32> %[[MASK]], i64 3 +// OGCG: %[[SHUF_ELE_3:.*]] = extractelement <4 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_3]] +// OGCG: %[[SHUF_INS_3:.*]] = insertelement <4 x i32> %[[SHUF_INS_2]], i32 %[[SHUF_ELE_3]], i64 3 + +void foo16() { + vi6 a; + vi6 b; + vi6 r = __builtin_shufflevector(a, b); +} + +// CIR: %[[TMP_A:.*]] = cir.load{{.*}} {{.*}} : !cir.ptr<!cir.vector<6 x !s32i>>, !cir.vector<6 x !s32i> +// CIR: %[[TMP_B:.*]] = cir.load{{>*}} {{.*}} : !cir.ptr<!cir.vector<6 x !s32i>>, !cir.vector<6 x !s32i> +// CIR: %[[NEW_VEC:.*]] = cir.vec.shuffle.dynamic %[[TMP_A]] : !cir.vector<6 x !s32i>, %[[TMP_B]] : !cir.vector<6 x !s32i> + +// LLVM: %[[TMP_A:.*]] = load <6 x i32>, ptr {{.*}}, align 32 +// LLVM: %[[TMP_B:.*]] = load <6 x i32>, ptr {{.*}}, align 32 +// LLVM: %[[MASK:.*]] = and <6 x i32> %[[TMP_B]], splat (i32 7) +// LLVM: %[[SHUF_IDX_0:.*]] = extractelement <6 x i32> %[[MASK]], i64 0 +// LLVM: %[[SHUF_ELE_0:.*]] = extractelement <6 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_0]] +// LLVM: %[[SHUF_INS_0:.*]] = insertelement <6 x i32> undef, i32 %[[SHUF_ELE_0]], i64 0 +// LLVM: %[[SHUF_IDX_1:.*]] = extractelement <6 x i32> %[[MASK]], i64 1 +// LLVM: %[[SHUF_ELE_1:.*]] = extractelement <6 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_1]] +// LLVM: %[[SHUF_INS_1:.*]] = insertelement <6 x i32> %[[SHUF_INS_0]], i32 %[[SHUF_ELE_1]], i64 1 +// LLVM: %[[SHUF_IDX_2:.*]] = extractelement <6 x i32> %[[MASK]], i64 2 +// LLVM: %[[SHUF_ELE_2:.*]] = extractelement <6 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_2]] +// LLVM: %[[SHUF_INS_2:.*]] = insertelement <6 x i32> %[[SHUF_INS_1]], i32 %[[SHUF_ELE_2]], i64 2 +// LLVM: %[[SHUF_IDX_3:.*]] = extractelement <6 x i32> %[[MASK]], i64 3 +// LLVM: %[[SHUF_ELE_3:.*]] = extractelement <6 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_3]] +// LLVM: %[[SHUF_INS_3:.*]] = insertelement <6 x i32> %[[SHUF_INS_2]], i32 %[[SHUF_ELE_3]], i64 3 + +// OGCG: %[[TMP_A:.*]] = load <6 x i32>, ptr {{.*}}, align 32 +// OGCG: %[[TMP_B:.*]] = load <6 x i32>, ptr {{.*}}, align 32 +// OGCG: %[[MASK:.*]] = and <6 x i32> %[[TMP_B]], splat (i32 7) +// OGCG: %[[SHUF_IDX_0:.*]] = extractelement <6 x i32> %[[MASK]], i64 0 +// OGCG: %[[SHUF_ELE_0:.*]] = extractelement <6 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_0]] +// OGCG: %[[SHUF_INS_0:.*]] = insertelement <6 x i32> poison, i32 %[[SHUF_ELE_0]], i64 0 +// OGCG: %[[SHUF_IDX_1:.*]] = extractelement <6 x i32> %[[MASK]], i64 1 +// OGCG: %[[SHUF_ELE_1:.*]] = extractelement <6 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_1]] +// OGCG: %[[SHUF_INS_1:.*]] = insertelement <6 x i32> %[[SHUF_INS_0]], i32 %[[SHUF_ELE_1]], i64 1 +// OGCG: %[[SHUF_IDX_2:.*]] = extractelement <6 x i32> %[[MASK]], i64 2 +// OGCG: %[[SHUF_ELE_2:.*]] = extractelement <6 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_2]] +// OGCG: %[[SHUF_INS_2:.*]] = insertelement <6 x i32> %[[SHUF_INS_1]], i32 %[[SHUF_ELE_2]], i64 2 +// OGCG: %[[SHUF_IDX_3:.*]] = extractelement <6 x i32> %[[MASK]], i64 3 +// OGCG: %[[SHUF_ELE_3:.*]] = extractelement <6 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_3]] +// OGCG: %[[SHUF_INS_3:.*]] = insertelement <6 x i32> %[[SHUF_INS_2]], i32 %[[SHUF_ELE_3]], i64 3 diff --git a/clang/test/CIR/CodeGen/vector.cpp b/clang/test/CIR/CodeGen/vector.cpp index f5a4fcacac4d4..24a30171d59c4 100644 --- a/clang/test/CIR/CodeGen/vector.cpp +++ b/clang/test/CIR/CodeGen/vector.cpp @@ -6,6 +6,7 @@ // RUN: FileCheck --input-file=%t.ll %s -check-prefix=OGCG typedef int vi4 __attribute__((vector_size(16))); +typedef int vi6 __attribute__((vector_size(24))); typedef unsigned int uvi4 __attribute__((vector_size(16))); typedef float vf4 __attribute__((vector_size(16))); typedef double vd2 __attribute__((vector_size(16))); @@ -967,3 +968,87 @@ void foo14() { // OGCG: %[[GE:.*]] = fcmp oge <4 x float> %[[TMP_A]], %[[TMP_B]] // OGCG: %[[RES:.*]] = sext <4 x i1> %[[GE]] to <4 x i32> // OGCG: store <4 x i32> %[[RES]], ptr {{.*}}, align 16 + +void foo15() { + vi4 a; + vi4 b; + vi4 r = __builtin_shufflevector(a, b); +} + +// CIR: %[[TMP_A:.*]] = cir.load{{.*}} {{.*}} : !cir.ptr<!cir.vector<4 x !s32i>>, !cir.vector<4 x !s32i> +// CIR: %[[TMP_B:.*]] = cir.load{{>*}} {{.*}} : !cir.ptr<!cir.vector<4 x !s32i>>, !cir.vector<4 x !s32i> +// CIR: %[[NEW_VEC:.*]] = cir.vec.shuffle.dynamic %[[TMP_A]] : !cir.vector<4 x !s32i>, %[[TMP_B]] : !cir.vector<4 x !s32i> + +// LLVM: %[[TMP_A:.*]] = load <4 x i32>, ptr {{.*}}, align 16 +// LLVM: %[[TMP_B:.*]] = load <4 x i32>, ptr {{.*}}, align 16 +// LLVM: %[[MASK:.*]] = and <4 x i32> %[[TMP_B]], splat (i32 3) +// LLVM: %[[SHUF_IDX_0:.*]] = extractelement <4 x i32> %[[MASK]], i64 0 +// LLVM: %[[SHUF_ELE_0:.*]] = extractelement <4 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_0]] +// LLVM: %[[SHUF_INS_0:.*]] = insertelement <4 x i32> undef, i32 %[[SHUF_ELE_0]], i64 0 +// LLVM: %[[SHUF_IDX_1:.*]] = extractelement <4 x i32> %[[MASK]], i64 1 +// LLVM: %[[SHUF_ELE_1:.*]] = extractelement <4 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_1]] +// LLVM: %[[SHUF_INS_1:.*]] = insertelement <4 x i32> %[[SHUF_INS_0]], i32 %[[SHUF_ELE_1]], i64 1 +// LLVM: %[[SHUF_IDX_2:.*]] = extractelement <4 x i32> %[[MASK]], i64 2 +// LLVM: %[[SHUF_ELE_2:.*]] = extractelement <4 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_2]] +// LLVM: %[[SHUF_INS_2:.*]] = insertelement <4 x i32> %[[SHUF_INS_1]], i32 %[[SHUF_ELE_2]], i64 2 +// LLVM: %[[SHUF_IDX_3:.*]] = extractelement <4 x i32> %[[MASK]], i64 3 +// LLVM: %[[SHUF_ELE_3:.*]] = extractelement <4 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_3]] +// LLVM: %[[SHUF_INS_3:.*]] = insertelement <4 x i32> %[[SHUF_INS_2]], i32 %[[SHUF_ELE_3]], i64 3 + +// OGCG: %[[TMP_A:.*]] = load <4 x i32>, ptr {{.*}}, align 16 +// OGCG: %[[TMP_B:.*]] = load <4 x i32>, ptr {{.*}}, align 16 +// OGCG: %[[MASK:.*]] = and <4 x i32> %[[TMP_B]], splat (i32 3) +// OGCG: %[[SHUF_IDX_0:.*]] = extractelement <4 x i32> %[[MASK]], i64 0 +// OGCG: %[[SHUF_ELE_0:.*]] = extractelement <4 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_0]] +// OGCG: %[[SHUF_INS_0:.*]] = insertelement <4 x i32> poison, i32 %[[SHUF_ELE_0]], i64 0 +// OGCG: %[[SHUF_IDX_1:.*]] = extractelement <4 x i32> %[[MASK]], i64 1 +// OGCG: %[[SHUF_ELE_1:.*]] = extractelement <4 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_1]] +// OGCG: %[[SHUF_INS_1:.*]] = insertelement <4 x i32> %[[SHUF_INS_0]], i32 %[[SHUF_ELE_1]], i64 1 +// OGCG: %[[SHUF_IDX_2:.*]] = extractelement <4 x i32> %[[MASK]], i64 2 +// OGCG: %[[SHUF_ELE_2:.*]] = extractelement <4 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_2]] +// OGCG: %[[SHUF_INS_2:.*]] = insertelement <4 x i32> %[[SHUF_INS_1]], i32 %[[SHUF_ELE_2]], i64 2 +// OGCG: %[[SHUF_IDX_3:.*]] = extractelement <4 x i32> %[[MASK]], i64 3 +// OGCG: %[[SHUF_ELE_3:.*]] = extractelement <4 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_3]] +// OGCG: %[[SHUF_INS_3:.*]] = insertelement <4 x i32> %[[SHUF_INS_2]], i32 %[[SHUF_ELE_3]], i64 3 + +void foo16() { + vi6 a; + vi6 b; + vi6 r = __builtin_shufflevector(a, b); +} + +// CIR: %[[TMP_A:.*]] = cir.load{{.*}} {{.*}} : !cir.ptr<!cir.vector<6 x !s32i>>, !cir.vector<6 x !s32i> +// CIR: %[[TMP_B:.*]] = cir.load{{>*}} {{.*}} : !cir.ptr<!cir.vector<6 x !s32i>>, !cir.vector<6 x !s32i> +// CIR: %[[NEW_VEC:.*]] = cir.vec.shuffle.dynamic %[[TMP_A]] : !cir.vector<6 x !s32i>, %[[TMP_B]] : !cir.vector<6 x !s32i> + +// LLVM: %[[TMP_A:.*]] = load <6 x i32>, ptr {{.*}}, align 32 +// LLVM: %[[TMP_B:.*]] = load <6 x i32>, ptr {{.*}}, align 32 +// LLVM: %[[MASK:.*]] = and <6 x i32> %[[TMP_B]], splat (i32 7) +// LLVM: %[[SHUF_IDX_0:.*]] = extractelement <6 x i32> %[[MASK]], i64 0 +// LLVM: %[[SHUF_ELE_0:.*]] = extractelement <6 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_0]] +// LLVM: %[[SHUF_INS_0:.*]] = insertelement <6 x i32> undef, i32 %[[SHUF_ELE_0]], i64 0 +// LLVM: %[[SHUF_IDX_1:.*]] = extractelement <6 x i32> %[[MASK]], i64 1 +// LLVM: %[[SHUF_ELE_1:.*]] = extractelement <6 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_1]] +// LLVM: %[[SHUF_INS_1:.*]] = insertelement <6 x i32> %[[SHUF_INS_0]], i32 %[[SHUF_ELE_1]], i64 1 +// LLVM: %[[SHUF_IDX_2:.*]] = extractelement <6 x i32> %[[MASK]], i64 2 +// LLVM: %[[SHUF_ELE_2:.*]] = extractelement <6 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_2]] +// LLVM: %[[SHUF_INS_2:.*]] = insertelement <6 x i32> %[[SHUF_INS_1]], i32 %[[SHUF_ELE_2]], i64 2 +// LLVM: %[[SHUF_IDX_3:.*]] = extractelement <6 x i32> %[[MASK]], i64 3 +// LLVM: %[[SHUF_ELE_3:.*]] = extractelement <6 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_3]] +// LLVM: %[[SHUF_INS_3:.*]] = insertelement <6 x i32> %[[SHUF_INS_2]], i32 %[[SHUF_ELE_3]], i64 3 + +// OGCG: %[[TMP_A:.*]] = load <6 x i32>, ptr {{.*}}, align 32 +// OGCG: %[[TMP_B:.*]] = load <6 x i32>, ptr {{.*}}, align 32 +// OGCG: %[[MASK:.*]] = and <6 x i32> %[[TMP_B]], splat (i32 7) +// OGCG: %[[SHUF_IDX_0:.*]] = extractelement <6 x i32> %[[MASK]], i64 0 +// OGCG: %[[SHUF_ELE_0:.*]] = extractelement <6 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_0]] +// OGCG: %[[SHUF_INS_0:.*]] = insertelement <6 x i32> poison, i32 %[[SHUF_ELE_0]], i64 0 +// OGCG: %[[SHUF_IDX_1:.*]] = extractelement <6 x i32> %[[MASK]], i64 1 +// OGCG: %[[SHUF_ELE_1:.*]] = extractelement <6 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_1]] +// OGCG: %[[SHUF_INS_1:.*]] = insertelement <6 x i32> %[[SHUF_INS_0]], i32 %[[SHUF_ELE_1]], i64 1 +// OGCG: %[[SHUF_IDX_2:.*]] = extractelement <6 x i32> %[[MASK]], i64 2 +// OGCG: %[[SHUF_ELE_2:.*]] = extractelement <6 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_2]] +// OGCG: %[[SHUF_INS_2:.*]] = insertelement <6 x i32> %[[SHUF_INS_1]], i32 %[[SHUF_ELE_2]], i64 2 +// OGCG: %[[SHUF_IDX_3:.*]] = extractelement <6 x i32> %[[MASK]], i64 3 +// OGCG: %[[SHUF_ELE_3:.*]] = extractelement <6 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_3]] +// OGCG: %[[SHUF_INS_3:.*]] = insertelement <6 x i32> %[[SHUF_INS_2]], i32 %[[SHUF_ELE_3]], i64 3 diff --git a/clang/test/CIR/IR/invalid-vector-shuffle-dyn-wrong-operands.cir b/clang/test/CIR/IR/invalid-vector-shuffle-dyn-wrong-operands.cir new file mode 100644 index 0000000000000..2115e769e1773 --- /dev/null +++ b/clang/test/CIR/IR/invalid-vector-shuffle-dyn-wrong-operands.cir @@ -0,0 +1,19 @@ +// RUN: cir-opt %s -verify-diagnostics -split-input-file + +!s32i = !cir.int<s, 32> +!s64i = !cir.int<s, 64> + +module { + cir.func @foo() { + %1 = cir.const #cir.int<1> : !s32i + %2 = cir.const #cir.int<2> : !s32i + %3 = cir.const #cir.int<3> : !s32i + %4 = cir.const #cir.int<4> : !s32i + %vec = cir.vec.create(%1, %2, %3, %4 : !s32i, !s32i, !s32i, !s32i) : !cir.vector<4 x !s32i> + %indices = cir.vec.create(%1, %2 : !s32i, !s32i) : !cir.vector<2 x !s32i> + + // expected-error @below {{the number of elements in '!cir.vector<4 x !cir.int<s, 32>>' and '!cir.vector<2 x !cir.int<s, 32>>' don't match}} + %new_vec = cir.vec.shuffle.dynamic %vec : !cir.vector<4 x !s32i>, %indices : !cir.vector<2 x !s32i> + cir.return + } +} diff --git a/clang/test/CIR/IR/vector.cir b/clang/test/CIR/IR/vector.cir index 6ad008e8d0e9f..a455acf92ab6f 100644 --- a/clang/test/CIR/IR/vector.cir +++ b/clang/test/CIR/IR/vector.cir @@ -165,4 +165,26 @@ cir.func @vector_compare_test() { // CHECK: cir.return // CHECK: } +cir.func @vector_shuffle_dynamic_test() { + %0 = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>, ["a"] + %1 = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>, ["b"] + %2 = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>, ["r", init] + %3 = cir.load align(16) %0 : !cir.ptr<!cir.vector<4 x !s32i>>, !cir.vector<4 x !s32i> + %4 = cir.load align(16) %1 : !cir.ptr<!cir.vector<4 x !s32i>>, !cir.vector<4 x !s32i> + %5 = cir.vec.shuffle.dynamic %3 : !cir.vector<4 x !s32i>, %4 : !cir.vector<4 x !s32i> + cir.store align(16) %5, %2 : !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>> + cir.return +} + +// CHECK: cir.func @vector_shuffle_dynamic_test() { +// CHECK: %[[VEC_A:.*]] = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>, ["a"] +// CHECK: %[[VEC_B:.*]] = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>, ["b"] +// CHECK: %[[RES:.*]] = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>, ["r", init] +// CHECK: %[[TMP_A:.*]] = cir.load{{.*}} %[[VEC_A]] : !cir.ptr<!cir.vector<4 x !s32i>>, !cir.vector<4 x !s32i> +// CHECK: %[[TMP_B:.*]] = cir.load{{.*}} %[[VEC_B]] : !cir.ptr<!cir.vector<4 x !s32i>>, !cir.vector<4 x !s32i> +// CHECK: %[[VEC_SHUF:.*]] = cir.vec.shuffle.dynamic %[[TMP_A]] : !cir.vector<4 x !s32i>, %[[TMP_B]] : !cir.vector<4 x !s32i> +// CHECK: cir.store{{.*}} %[[VEC_SHUF]], %[[RES]] : !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>> +// CHECK: cir.return +// CHECK: } + } _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits