[clang] [CIR] Upstream ShuffleDynamicOp for VectorType (PR #141411)
Amr Hesham via cfe-commits
cfe-commits at lists.llvm.org
Thu May 29 03:48:36 PDT 2025
https://github.com/AmrDeveloper updated https://github.com/llvm/llvm-project/pull/141411
>From 8e662a8ccbde1fce77399af5e7b0c24249c1bd96 Mon Sep 17 00:00:00 2001
From: AmrDeveloper <amr96 at programmer.net>
Date: Sun, 25 May 2025 17:21:34 +0200
Subject: [PATCH 1/4] [CIR] Upstream ShuffleDynamicOp for VectorType
---
clang/include/clang/CIR/Dialect/IR/CIROps.td | 33 ++++++++++++
clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp | 14 +++++
clang/lib/CIR/Dialect/IR/CIRDialect.cpp | 14 +++++
.../CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 51 ++++++++++++++++++-
.../CIR/Lowering/DirectToLLVM/LowerToLLVM.h | 11 ++++
clang/test/CIR/CodeGen/vector-ext.cpp | 42 +++++++++++++++
clang/test/CIR/CodeGen/vector.cpp | 43 ++++++++++++++++
clang/test/CIR/IR/vector.cir | 22 ++++++++
8 files changed, 229 insertions(+), 1 deletion(-)
diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td
index 5ce03c19369cb..a8229d4c45308 100644
--- a/clang/include/clang/CIR/Dialect/IR/CIROps.td
+++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td
@@ -2141,4 +2141,37 @@ def VecCmpOp : CIR_Op<"vec.cmp", [Pure, SameTypeOperands]> {
}];
}
+//===----------------------------------------------------------------------===//
+// VecShuffleDynamicOp
+//===----------------------------------------------------------------------===//
+
+def VecShuffleDynamicOp : CIR_Op<"vec.shuffle.dynamic",
+ [Pure, AllTypesMatch<["vec", "result"]>]> {
+ let summary = "Shuffle a vector using indices in another vector";
+ let description = [{
+ The `cir.vec.shuffle.dynamic` operation implements the undocumented form of
+ Clang's __builtin_shufflevector, where the indices of the shuffled result
+ can be runtime values.
+
+ There are two input vectors, which must have the same number of elements.
+ The second input vector must have an integral element type. The elements of
+ the second vector are interpreted as indices into the first vector. The
+ result vector is constructed by taking the elements from the first input
+ vector from the indices indicated by the elements of the second vector.
+
+ ```mlir
+ %new_vec = cir.vec.shuffle.dynamic %vec : !cir.vector<4 x !s32i>, %indices : !cir.vector<4 x !s32i>
+ ```
+ }];
+
+ let arguments = (ins CIR_VectorType:$vec, IntegerVector:$indices);
+ let results = (outs CIR_VectorType:$result);
+ let assemblyFormat = [{
+ $vec `:` qualified(type($vec)) `,` $indices `:` qualified(type($indices))
+ attr-dict
+ }];
+
+ let hasVerifier = 1;
+}
+
#endif // CLANG_CIR_DIALECT_IR_CIROPS_TD
diff --git a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp
index 058015ca55729..bdb12bf86d1bf 100644
--- a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp
@@ -171,6 +171,20 @@ class ScalarExprEmitter : public StmtVisitor<ScalarExprEmitter, mlir::Value> {
return emitLoadOfLValue(e);
}
+ mlir::Value VisitShuffleVectorExpr(ShuffleVectorExpr *e) {
+ if (e->getNumSubExprs() == 2) {
+ // The undocumented form of __builtin_shufflevector.
+ mlir::Value inputVec = Visit(e->getExpr(0));
+ mlir::Value indexVec = Visit(e->getExpr(1));
+ return cgf.builder.create<cir::VecShuffleDynamicOp>(
+ cgf.getLoc(e->getSourceRange()), inputVec, indexVec);
+ }
+
+ cgf.getCIRGenModule().errorNYI(e->getSourceRange(),
+ "ShuffleVectorExpr with indices");
+ return {};
+ }
+
mlir::Value VisitMemberExpr(MemberExpr *e);
mlir::Value VisitInitListExpr(InitListExpr *e);
diff --git a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp
index 9e2b2908b22d8..c7cc27561c87c 100644
--- a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp
+++ b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp
@@ -1564,6 +1564,20 @@ OpFoldResult cir::VecExtractOp::fold(FoldAdaptor adaptor) {
return elements[index];
}
+//===----------------------------------------------------------------------===//
+// VecShuffleDynamicOp
+//===----------------------------------------------------------------------===//
+
+LogicalResult cir::VecShuffleDynamicOp::verify() {
+ // The number of elements in the two input vectors must match.
+ if (getVec().getType().getSize() !=
+ mlir::cast<cir::VectorType>(getIndices().getType()).getSize()) {
+ return emitOpError() << ": the number of elements in " << getVec().getType()
+ << " and " << getIndices().getType() << " don't match";
+ }
+ return success();
+}
+
//===----------------------------------------------------------------------===//
// TableGen'd op method definitions
//===----------------------------------------------------------------------===//
diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
index 8e82af7e62bc0..67590b90b0325 100644
--- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
@@ -1717,7 +1717,8 @@ void ConvertCIRToLLVMPass::runOnOperation() {
CIRToLLVMVecCreateOpLowering,
CIRToLLVMVecExtractOpLowering,
CIRToLLVMVecInsertOpLowering,
- CIRToLLVMVecCmpOpLowering
+ CIRToLLVMVecCmpOpLowering,
+ CIRToLLVMVecShuffleDynamicOpLowering
// clang-format on
>(converter, patterns.getContext());
@@ -1871,6 +1872,54 @@ mlir::LogicalResult CIRToLLVMVecCmpOpLowering::matchAndRewrite(
return mlir::success();
}
+mlir::LogicalResult CIRToLLVMVecShuffleDynamicOpLowering::matchAndRewrite(
+ cir::VecShuffleDynamicOp op, OpAdaptor adaptor,
+ mlir::ConversionPatternRewriter &rewriter) const {
+ // LLVM IR does not have an operation that corresponds to this form of
+ // the built-in.
+ // __builtin_shufflevector(V, I)
+ // is implemented as this pseudocode, where the for loop is unrolled
+ // and N is the number of elements:
+ // masked = I & (N-1)
+ // for (i in 0 <= i < N)
+ // result[i] = V[masked[i]]
+ mlir::Location loc = op.getLoc();
+ mlir::Value input = adaptor.getVec();
+ mlir::Type llvmIndexVecType =
+ getTypeConverter()->convertType(op.getIndices().getType());
+ mlir::Type llvmIndexType = getTypeConverter()->convertType(
+ elementTypeIfVector(op.getIndices().getType()));
+ uint64_t numElements =
+ mlir::cast<cir::VectorType>(op.getVec().getType()).getSize();
+ mlir::Value maskValue = rewriter.create<mlir::LLVM::ConstantOp>(
+ loc, llvmIndexType,
+ mlir::IntegerAttr::get(llvmIndexType, numElements - 1));
+ mlir::Value maskVector =
+ rewriter.create<mlir::LLVM::UndefOp>(loc, llvmIndexVecType);
+ for (uint64_t i = 0; i < numElements; ++i) {
+ mlir::Value iValue =
+ rewriter.create<mlir::LLVM::ConstantOp>(loc, rewriter.getI64Type(), i);
+ maskVector = rewriter.create<mlir::LLVM::InsertElementOp>(
+ loc, maskVector, maskValue, iValue);
+ }
+ mlir::Value maskedIndices = rewriter.create<mlir::LLVM::AndOp>(
+ loc, llvmIndexVecType, adaptor.getIndices(), maskVector);
+ mlir::Value result = rewriter.create<mlir::LLVM::UndefOp>(
+ loc, getTypeConverter()->convertType(op.getVec().getType()));
+ for (uint64_t i = 0; i < numElements; ++i) {
+ mlir::Value iValue =
+ rewriter.create<mlir::LLVM::ConstantOp>(loc, rewriter.getI64Type(), i);
+ mlir::Value indexValue = rewriter.create<mlir::LLVM::ExtractElementOp>(
+ loc, maskedIndices, iValue);
+ mlir::Value valueAtIndex =
+ rewriter.create<mlir::LLVM::ExtractElementOp>(loc, input, indexValue);
+ result = rewriter.create<mlir::LLVM::InsertElementOp>(loc, result,
+ valueAtIndex, iValue);
+ }
+ rewriter.replaceOp(op, result);
+ return mlir::success();
+}
+
std::unique_ptr<mlir::Pass> createConvertCIRToLLVMPass() {
return std::make_unique<ConvertCIRToLLVMPass>();
}
diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h
index 053e77f03648e..6b8862db2c8be 100644
--- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h
@@ -352,6 +352,17 @@ class CIRToLLVMVecCmpOpLowering
mlir::ConversionPatternRewriter &) const override;
};
+class CIRToLLVMVecShuffleDynamicOpLowering
+ : public mlir::OpConversionPattern<cir::VecShuffleDynamicOp> {
+public:
+ using mlir::OpConversionPattern<
+ cir::VecShuffleDynamicOp>::OpConversionPattern;
+
+ mlir::LogicalResult
+ matchAndRewrite(cir::VecShuffleDynamicOp op, OpAdaptor,
+ mlir::ConversionPatternRewriter &) const override;
+};
+
} // namespace direct
} // namespace cir
diff --git a/clang/test/CIR/CodeGen/vector-ext.cpp b/clang/test/CIR/CodeGen/vector-ext.cpp
index aab723f041edf..4ff14b12b43cf 100644
--- a/clang/test/CIR/CodeGen/vector-ext.cpp
+++ b/clang/test/CIR/CodeGen/vector-ext.cpp
@@ -988,3 +988,45 @@ void foo14() {
// OGCG: %[[TMP_B:.*]] = load <4 x float>, ptr %[[VEC_B]], align 16
// OGCG: %[[GE:.*]] = fcmp oge <4 x float> %[[TMP_A]], %[[TMP_B]]
// OGCG: %[[RES:.*]] = sext <4 x i1> %[[GE]] to <4 x i32>
+
+void foo15() {
+ vi4 a;
+ vi4 b;
+ vi4 r = __builtin_shufflevector(a, b);
+}
+
+// CIR: %[[TMP_A:.*]] = cir.load{{.*}} {{.*}} : !cir.ptr<!cir.vector<4 x !s32i>>, !cir.vector<4 x !s32i>
+// CIR: %[[TMP_B:.*]] = cir.load{{>*}} {{.*}} : !cir.ptr<!cir.vector<4 x !s32i>>, !cir.vector<4 x !s32i>
+// CIR: %[[NEW_VEC:.*]] = cir.vec.shuffle.dynamic %[[TMP_A]] : !cir.vector<4 x !s32i>, %[[TMP_B]] : !cir.vector<4 x !s32i>
+
+// LLVM: %[[TMP_A:.*]] = load <4 x i32>, ptr {{.*}}, align 16
+// LLVM: %[[TMP_B:.*]] = load <4 x i32>, ptr {{.*}}, align 16
+// LLVM: %[[MASK:.*]] = and <4 x i32> %[[TMP_B]], splat (i32 3)
+// LLVM: %[[SHUF_IDX_0:.*]] = extractelement <4 x i32> %[[MASK]], i64 0
+// LLVM: %[[SHUF_ELE_0:.*]] = extractelement <4 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_0]]
+// LLVM: %[[SHUF_INS_0:.*]] = insertelement <4 x i32> undef, i32 %[[SHUF_ELE_0]], i64 0
+// LLVM: %[[SHUF_IDX_1:.*]] = extractelement <4 x i32> %[[MASK]], i64 1
+// LLVM: %[[SHUF_ELE_1:.*]] = extractelement <4 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_1]]
+// LLVM: %[[SHUF_INS_1:.*]] = insertelement <4 x i32> %[[SHUF_INS_0]], i32 %[[SHUF_ELE_1]], i64 1
+// LLVM: %[[SHUF_IDX_2:.*]] = extractelement <4 x i32> %[[MASK]], i64 2
+// LLVM: %[[SHUF_ELE_2:.*]] = extractelement <4 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_2]]
+// LLVM: %[[SHUF_INS_2:.*]] = insertelement <4 x i32> %[[SHUF_INS_1]], i32 %[[SHUF_ELE_2]], i64 2
+// LLVM: %[[SHUF_IDX_3:.*]] = extractelement <4 x i32> %[[MASK]], i64 3
+// LLVM: %[[SHUF_ELE_3:.*]] = extractelement <4 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_3]]
+// LLVM: %[[SHUF_INS_3:.*]] = insertelement <4 x i32> %[[SHUF_INS_2]], i32 %[[SHUF_ELE_3]], i64 3
+
+// OGCG: %[[TMP_A:.*]] = load <4 x i32>, ptr {{.*}}, align 16
+// OGCG: %[[TMP_B:.*]] = load <4 x i32>, ptr {{.*}}, align 16
+// OGCG: %[[MASK:.*]] = and <4 x i32> %[[TMP_B]], splat (i32 3)
+// OGCG: %[[SHUF_IDX_0:.*]] = extractelement <4 x i32> %[[MASK]], i64 0
+// OGCG: %[[SHUF_ELE_0:.*]] = extractelement <4 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_0]]
+// OGCG: %[[SHUF_INS_0:.*]] = insertelement <4 x i32> poison, i32 %[[SHUF_ELE_0]], i64 0
+// OGCG: %[[SHUF_IDX_1:.*]] = extractelement <4 x i32> %[[MASK]], i64 1
+// OGCG: %[[SHUF_ELE_1:.*]] = extractelement <4 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_1]]
+// OGCG: %[[SHUF_INS_1:.*]] = insertelement <4 x i32> %[[SHUF_INS_0]], i32 %[[SHUF_ELE_1]], i64 1
+// OGCG: %[[SHUF_IDX_2:.*]] = extractelement <4 x i32> %[[MASK]], i64 2
+// OGCG: %[[SHUF_ELE_2:.*]] = extractelement <4 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_2]]
+// OGCG: %[[SHUF_INS_2:.*]] = insertelement <4 x i32> %[[SHUF_INS_1]], i32 %[[SHUF_ELE_2]], i64 2
+// OGCG: %[[SHUF_IDX_3:.*]] = extractelement <4 x i32> %[[MASK]], i64 3
+// OGCG: %[[SHUF_ELE_3:.*]] = extractelement <4 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_3]]
+// OGCG: %[[SHUF_INS_3:.*]] = insertelement <4 x i32> %[[SHUF_INS_2]], i32 %[[SHUF_ELE_3]], i64 3
diff --git a/clang/test/CIR/CodeGen/vector.cpp b/clang/test/CIR/CodeGen/vector.cpp
index f5a4fcacac4d4..27e669915ee60 100644
--- a/clang/test/CIR/CodeGen/vector.cpp
+++ b/clang/test/CIR/CodeGen/vector.cpp
@@ -967,3 +967,46 @@ void foo14() {
// OGCG: %[[GE:.*]] = fcmp oge <4 x float> %[[TMP_A]], %[[TMP_B]]
// OGCG: %[[RES:.*]] = sext <4 x i1> %[[GE]] to <4 x i32>
// OGCG: store <4 x i32> %[[RES]], ptr {{.*}}, align 16
+
+void foo15() {
+ vi4 a;
+ vi4 b;
+ vi4 r = __builtin_shufflevector(a, b);
+}
+
+// CIR: %[[TMP_A:.*]] = cir.load{{.*}} {{.*}} : !cir.ptr<!cir.vector<4 x !s32i>>, !cir.vector<4 x !s32i>
+// CIR: %[[TMP_B:.*]] = cir.load{{>*}} {{.*}} : !cir.ptr<!cir.vector<4 x !s32i>>, !cir.vector<4 x !s32i>
+// CIR: %[[NEW_VEC:.*]] = cir.vec.shuffle.dynamic %[[TMP_A]] : !cir.vector<4 x !s32i>, %[[TMP_B]] : !cir.vector<4 x !s32i>
+
+// LLVM: %[[TMP_A:.*]] = load <4 x i32>, ptr {{.*}}, align 16
+// LLVM: %[[TMP_B:.*]] = load <4 x i32>, ptr {{.*}}, align 16
+// LLVM: %[[MASK:.*]] = and <4 x i32> %[[TMP_B]], splat (i32 3)
+// LLVM: %[[SHUF_IDX_0:.*]] = extractelement <4 x i32> %[[MASK]], i64 0
+// LLVM: %[[SHUF_ELE_0:.*]] = extractelement <4 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_0]]
+// LLVM: %[[SHUF_INS_0:.*]] = insertelement <4 x i32> undef, i32 %[[SHUF_ELE_0]], i64 0
+// LLVM: %[[SHUF_IDX_1:.*]] = extractelement <4 x i32> %[[MASK]], i64 1
+// LLVM: %[[SHUF_ELE_1:.*]] = extractelement <4 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_1]]
+// LLVM: %[[SHUF_INS_1:.*]] = insertelement <4 x i32> %[[SHUF_INS_0]], i32 %[[SHUF_ELE_1]], i64 1
+// LLVM: %[[SHUF_IDX_2:.*]] = extractelement <4 x i32> %[[MASK]], i64 2
+// LLVM: %[[SHUF_ELE_2:.*]] = extractelement <4 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_2]]
+// LLVM: %[[SHUF_INS_2:.*]] = insertelement <4 x i32> %[[SHUF_INS_1]], i32 %[[SHUF_ELE_2]], i64 2
+// LLVM: %[[SHUF_IDX_3:.*]] = extractelement <4 x i32> %[[MASK]], i64 3
+// LLVM: %[[SHUF_ELE_3:.*]] = extractelement <4 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_3]]
+// LLVM: %[[SHUF_INS_3:.*]] = insertelement <4 x i32> %[[SHUF_INS_2]], i32 %[[SHUF_ELE_3]], i64 3
+
+// OGCG: %[[TMP_A:.*]] = load <4 x i32>, ptr {{.*}}, align 16
+// OGCG: %[[TMP_B:.*]] = load <4 x i32>, ptr {{.*}}, align 16
+// OGCG: %[[MASK:.*]] = and <4 x i32> %[[TMP_B]], splat (i32 3)
+// OGCG: %[[SHUF_IDX_0:.*]] = extractelement <4 x i32> %[[MASK]], i64 0
+// OGCG: %[[SHUF_ELE_0:.*]] = extractelement <4 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_0]]
+// OGCG: %[[SHUF_INS_0:.*]] = insertelement <4 x i32> poison, i32 %[[SHUF_ELE_0]], i64 0
+// OGCG: %[[SHUF_IDX_1:.*]] = extractelement <4 x i32> %[[MASK]], i64 1
+// OGCG: %[[SHUF_ELE_1:.*]] = extractelement <4 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_1]]
+// OGCG: %[[SHUF_INS_1:.*]] = insertelement <4 x i32> %[[SHUF_INS_0]], i32 %[[SHUF_ELE_1]], i64 1
+// OGCG: %[[SHUF_IDX_2:.*]] = extractelement <4 x i32> %[[MASK]], i64 2
+// OGCG: %[[SHUF_ELE_2:.*]] = extractelement <4 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_2]]
+// OGCG: %[[SHUF_INS_2:.*]] = insertelement <4 x i32> %[[SHUF_INS_1]], i32 %[[SHUF_ELE_2]], i64 2
+// OGCG: %[[SHUF_IDX_3:.*]] = extractelement <4 x i32> %[[MASK]], i64 3
+// OGCG: %[[SHUF_ELE_3:.*]] = extractelement <4 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_3]]
+// OGCG: %[[SHUF_INS_3:.*]] = insertelement <4 x i32> %[[SHUF_INS_2]], i32 %[[SHUF_ELE_3]], i64 3
+
diff --git a/clang/test/CIR/IR/vector.cir b/clang/test/CIR/IR/vector.cir
index 6ad008e8d0e9f..a455acf92ab6f 100644
--- a/clang/test/CIR/IR/vector.cir
+++ b/clang/test/CIR/IR/vector.cir
@@ -165,4 +165,26 @@ cir.func @vector_compare_test() {
// CHECK: cir.return
// CHECK: }
+cir.func @vector_shuffle_dynamic_test() {
+ %0 = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>, ["a"]
+ %1 = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>, ["b"]
+ %2 = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>, ["r", init]
+ %3 = cir.load align(16) %0 : !cir.ptr<!cir.vector<4 x !s32i>>, !cir.vector<4 x !s32i>
+ %4 = cir.load align(16) %1 : !cir.ptr<!cir.vector<4 x !s32i>>, !cir.vector<4 x !s32i>
+ %5 = cir.vec.shuffle.dynamic %3 : !cir.vector<4 x !s32i>, %4 : !cir.vector<4 x !s32i>
+ cir.store align(16) %5, %2 : !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>
+ cir.return
+}
+
+// CHECK: cir.func @vector_shuffle_dynamic_test() {
+// CHECK: %[[VEC_A:.*]] = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>, ["a"]
+// CHECK: %[[VEC_B:.*]] = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>, ["b"]
+// CHECK: %[[RES:.*]] = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>, ["r", init]
+// CHECK: %[[TMP_A:.*]] = cir.load{{.*}} %[[VEC_A]] : !cir.ptr<!cir.vector<4 x !s32i>>, !cir.vector<4 x !s32i>
+// CHECK: %[[TMP_B:.*]] = cir.load{{.*}} %[[VEC_B]] : !cir.ptr<!cir.vector<4 x !s32i>>, !cir.vector<4 x !s32i>
+// CHECK: %[[VEC_SHUF:.*]] = cir.vec.shuffle.dynamic %[[TMP_A]] : !cir.vector<4 x !s32i>, %[[TMP_B]] : !cir.vector<4 x !s32i>
+// CHECK: cir.store{{.*}} %[[VEC_SHUF]], %[[RES]] : !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>
+// CHECK: cir.return
+// CHECK: }
+
}
>From 77916a6311b43751be33739b7441696ecaba5b39 Mon Sep 17 00:00:00 2001
From: AmrDeveloper <amr96 at programmer.net>
Date: Wed, 28 May 2025 19:44:40 +0200
Subject: [PATCH 2/4] Add test for the verifier
---
...alid-vector-shuffle-dyn-wrong-operands.cir | 19 +++++++++++++++++++
1 file changed, 19 insertions(+)
create mode 100644 clang/test/CIR/IR/invalid-vector-shuffle-dyn-wrong-operands.cir
diff --git a/clang/test/CIR/IR/invalid-vector-shuffle-dyn-wrong-operands.cir b/clang/test/CIR/IR/invalid-vector-shuffle-dyn-wrong-operands.cir
new file mode 100644
index 0000000000000..2115e769e1773
--- /dev/null
+++ b/clang/test/CIR/IR/invalid-vector-shuffle-dyn-wrong-operands.cir
@@ -0,0 +1,19 @@
+// RUN: cir-opt %s -verify-diagnostics -split-input-file
+
+!s32i = !cir.int<s, 32>
+!s64i = !cir.int<s, 64>
+
+module {
+ cir.func @foo() {
+ %1 = cir.const #cir.int<1> : !s32i
+ %2 = cir.const #cir.int<2> : !s32i
+ %3 = cir.const #cir.int<3> : !s32i
+ %4 = cir.const #cir.int<4> : !s32i
+ %vec = cir.vec.create(%1, %2, %3, %4 : !s32i, !s32i, !s32i, !s32i) : !cir.vector<4 x !s32i>
+ %indices = cir.vec.create(%1, %2 : !s32i, !s32i) : !cir.vector<2 x !s32i>
+
+ // expected-error @below {{the number of elements in '!cir.vector<4 x !cir.int<s, 32>>' and '!cir.vector<2 x !cir.int<s, 32>>' don't match}}
+ %new_vec = cir.vec.shuffle.dynamic %vec : !cir.vector<4 x !s32i>, %indices : !cir.vector<2 x !s32i>
+ cir.return
+ }
+}
>From bcb66cb722c43bf3584b28f6b8b91d2113f417b2 Mon Sep 17 00:00:00 2001
From: AmrDeveloper <amr96 at programmer.net>
Date: Thu, 29 May 2025 00:19:45 +0200
Subject: [PATCH 3/4] Address code review comments
---
clang/include/clang/CIR/Dialect/IR/CIROps.td | 3 ++-
clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 5 +++++
2 files changed, 7 insertions(+), 1 deletion(-)
diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td
index a8229d4c45308..cfa766105812f 100644
--- a/clang/include/clang/CIR/Dialect/IR/CIROps.td
+++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td
@@ -2160,7 +2160,8 @@ def VecShuffleDynamicOp : CIR_Op<"vec.shuffle.dynamic",
vector from the indices indicated by the elements of the second vector.
```mlir
- %new_vec = cir.vec.shuffle.dynamic %vec : !cir.vector<4 x !s32i>, %indices : !cir.vector<4 x !s32i>
+ %new_vec = cir.vec.shuffle.dynamic %vec : !cir.vector<4 x !s32i>, %indices
+ : !cir.vector<4 x !s32i>
```
}];
diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
index 67590b90b0325..3340969275f0e 100644
--- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
@@ -1891,6 +1891,11 @@ mlir::LogicalResult CIRToLLVMVecShuffleDynamicOpLowering::matchAndRewrite(
elementTypeIfVector(op.getIndices().getType()));
uint64_t numElements =
mlir::cast<cir::VectorType>(op.getVec().getType()).getSize();
+
+ if (!llvm::isPowerOf2_64(numElements))
+ return op.emitError() << "unsupported VecShuffleDynamic for VectorType "
+ "with size not power of 2";
+
mlir::Value maskValue = rewriter.create<mlir::LLVM::ConstantOp>(
loc, llvmIndexType,
mlir::IntegerAttr::get(llvmIndexType, numElements - 1));
>From f6552364831f03ba0e431e916ffd6105105fcdc4 Mon Sep 17 00:00:00 2001
From: AmrDeveloper <amr96 at programmer.net>
Date: Thu, 29 May 2025 12:31:40 +0200
Subject: [PATCH 4/4] Update VecShuffleDynamicOp maskBits logic
---
.../CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 23 +++++-----
clang/test/CIR/CodeGen/vector-ext.cpp | 43 +++++++++++++++++++
clang/test/CIR/CodeGen/vector.cpp | 42 ++++++++++++++++++
3 files changed, 97 insertions(+), 11 deletions(-)
diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
index 3340969275f0e..efa5ed5961bdd 100644
--- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
@@ -1880,9 +1880,12 @@ mlir::LogicalResult CIRToLLVMVecShuffleDynamicOpLowering::matchAndRewrite(
// __builtin_shufflevector(V, I)
// is implemented as this pseudocode, where the for loop is unrolled
// and N is the number of elements:
- // masked = I & (N-1)
- // for (i in 0 <= i < N)
- // result[i] = V[masked[i]]
+ //
+ // result = undef
+ // maskbits = NextPowerOf2(N - 1)
+ // masked = I & maskbits
+ // for (i in 0 <= i < N)
+ // result[i] = V[masked[i]]
mlir::Location loc = op.getLoc();
mlir::Value input = adaptor.getVec();
mlir::Type llvmIndexVecType =
@@ -1892,21 +1895,19 @@ mlir::LogicalResult CIRToLLVMVecShuffleDynamicOpLowering::matchAndRewrite(
uint64_t numElements =
mlir::cast<cir::VectorType>(op.getVec().getType()).getSize();
- if (!llvm::isPowerOf2_64(numElements))
- return op.emitError() << "unsupported VecShuffleDynamic for VectorType "
- "with size not power of 2";
-
+ uint64_t maskBits = llvm::NextPowerOf2(numElements - 1) - 1;
mlir::Value maskValue = rewriter.create<mlir::LLVM::ConstantOp>(
- loc, llvmIndexType,
- mlir::IntegerAttr::get(llvmIndexType, numElements - 1));
+ loc, llvmIndexType, rewriter.getIntegerAttr(llvmIndexType, maskBits));
mlir::Value maskVector =
rewriter.create<mlir::LLVM::UndefOp>(loc, llvmIndexVecType);
+
for (uint64_t i = 0; i < numElements; ++i) {
- mlir::Value iValue =
+ mlir::Value idxValue =
rewriter.create<mlir::LLVM::ConstantOp>(loc, rewriter.getI64Type(), i);
maskVector = rewriter.create<mlir::LLVM::InsertElementOp>(
- loc, maskVector, maskValue, iValue);
+ loc, maskVector, maskValue, idxValue);
}
+
mlir::Value maskedIndices = rewriter.create<mlir::LLVM::AndOp>(
loc, llvmIndexVecType, adaptor.getIndices(), maskVector);
mlir::Value result = rewriter.create<mlir::LLVM::UndefOp>(
diff --git a/clang/test/CIR/CodeGen/vector-ext.cpp b/clang/test/CIR/CodeGen/vector-ext.cpp
index 4ff14b12b43cf..9316c0c2c61eb 100644
--- a/clang/test/CIR/CodeGen/vector-ext.cpp
+++ b/clang/test/CIR/CodeGen/vector-ext.cpp
@@ -6,6 +6,7 @@
// RUN: FileCheck --input-file=%t.ll %s -check-prefix=OGCG
typedef int vi4 __attribute__((ext_vector_type(4)));
+typedef int vi6 __attribute__((ext_vector_type(6)));
typedef unsigned int uvi4 __attribute__((ext_vector_type(4)));
typedef int vi3 __attribute__((ext_vector_type(3)));
typedef int vi2 __attribute__((ext_vector_type(2)));
@@ -1030,3 +1031,45 @@ void foo15() {
// OGCG: %[[SHUF_IDX_3:.*]] = extractelement <4 x i32> %[[MASK]], i64 3
// OGCG: %[[SHUF_ELE_3:.*]] = extractelement <4 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_3]]
// OGCG: %[[SHUF_INS_3:.*]] = insertelement <4 x i32> %[[SHUF_INS_2]], i32 %[[SHUF_ELE_3]], i64 3
+
+void foo16() {
+ vi6 a;
+ vi6 b;
+ vi6 r = __builtin_shufflevector(a, b);
+}
+
+// CIR: %[[TMP_A:.*]] = cir.load{{.*}} {{.*}} : !cir.ptr<!cir.vector<6 x !s32i>>, !cir.vector<6 x !s32i>
+// CIR: %[[TMP_B:.*]] = cir.load{{>*}} {{.*}} : !cir.ptr<!cir.vector<6 x !s32i>>, !cir.vector<6 x !s32i>
+// CIR: %[[NEW_VEC:.*]] = cir.vec.shuffle.dynamic %[[TMP_A]] : !cir.vector<6 x !s32i>, %[[TMP_B]] : !cir.vector<6 x !s32i>
+
+// LLVM: %[[TMP_A:.*]] = load <6 x i32>, ptr {{.*}}, align 32
+// LLVM: %[[TMP_B:.*]] = load <6 x i32>, ptr {{.*}}, align 32
+// LLVM: %[[MASK:.*]] = and <6 x i32> %[[TMP_B]], splat (i32 7)
+// LLVM: %[[SHUF_IDX_0:.*]] = extractelement <6 x i32> %[[MASK]], i64 0
+// LLVM: %[[SHUF_ELE_0:.*]] = extractelement <6 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_0]]
+// LLVM: %[[SHUF_INS_0:.*]] = insertelement <6 x i32> undef, i32 %[[SHUF_ELE_0]], i64 0
+// LLVM: %[[SHUF_IDX_1:.*]] = extractelement <6 x i32> %[[MASK]], i64 1
+// LLVM: %[[SHUF_ELE_1:.*]] = extractelement <6 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_1]]
+// LLVM: %[[SHUF_INS_1:.*]] = insertelement <6 x i32> %[[SHUF_INS_0]], i32 %[[SHUF_ELE_1]], i64 1
+// LLVM: %[[SHUF_IDX_2:.*]] = extractelement <6 x i32> %[[MASK]], i64 2
+// LLVM: %[[SHUF_ELE_2:.*]] = extractelement <6 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_2]]
+// LLVM: %[[SHUF_INS_2:.*]] = insertelement <6 x i32> %[[SHUF_INS_1]], i32 %[[SHUF_ELE_2]], i64 2
+// LLVM: %[[SHUF_IDX_3:.*]] = extractelement <6 x i32> %[[MASK]], i64 3
+// LLVM: %[[SHUF_ELE_3:.*]] = extractelement <6 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_3]]
+// LLVM: %[[SHUF_INS_3:.*]] = insertelement <6 x i32> %[[SHUF_INS_2]], i32 %[[SHUF_ELE_3]], i64 3
+
+// OGCG: %[[TMP_A:.*]] = load <6 x i32>, ptr {{.*}}, align 32
+// OGCG: %[[TMP_B:.*]] = load <6 x i32>, ptr {{.*}}, align 32
+// OGCG: %[[MASK:.*]] = and <6 x i32> %[[TMP_B]], splat (i32 7)
+// OGCG: %[[SHUF_IDX_0:.*]] = extractelement <6 x i32> %[[MASK]], i64 0
+// OGCG: %[[SHUF_ELE_0:.*]] = extractelement <6 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_0]]
+// OGCG: %[[SHUF_INS_0:.*]] = insertelement <6 x i32> poison, i32 %[[SHUF_ELE_0]], i64 0
+// OGCG: %[[SHUF_IDX_1:.*]] = extractelement <6 x i32> %[[MASK]], i64 1
+// OGCG: %[[SHUF_ELE_1:.*]] = extractelement <6 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_1]]
+// OGCG: %[[SHUF_INS_1:.*]] = insertelement <6 x i32> %[[SHUF_INS_0]], i32 %[[SHUF_ELE_1]], i64 1
+// OGCG: %[[SHUF_IDX_2:.*]] = extractelement <6 x i32> %[[MASK]], i64 2
+// OGCG: %[[SHUF_ELE_2:.*]] = extractelement <6 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_2]]
+// OGCG: %[[SHUF_INS_2:.*]] = insertelement <6 x i32> %[[SHUF_INS_1]], i32 %[[SHUF_ELE_2]], i64 2
+// OGCG: %[[SHUF_IDX_3:.*]] = extractelement <6 x i32> %[[MASK]], i64 3
+// OGCG: %[[SHUF_ELE_3:.*]] = extractelement <6 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_3]]
+// OGCG: %[[SHUF_INS_3:.*]] = insertelement <6 x i32> %[[SHUF_INS_2]], i32 %[[SHUF_ELE_3]], i64 3
diff --git a/clang/test/CIR/CodeGen/vector.cpp b/clang/test/CIR/CodeGen/vector.cpp
index 27e669915ee60..24a30171d59c4 100644
--- a/clang/test/CIR/CodeGen/vector.cpp
+++ b/clang/test/CIR/CodeGen/vector.cpp
@@ -6,6 +6,7 @@
// RUN: FileCheck --input-file=%t.ll %s -check-prefix=OGCG
typedef int vi4 __attribute__((vector_size(16)));
+typedef int vi6 __attribute__((vector_size(24)));
typedef unsigned int uvi4 __attribute__((vector_size(16)));
typedef float vf4 __attribute__((vector_size(16)));
typedef double vd2 __attribute__((vector_size(16)));
@@ -1010,3 +1011,44 @@ void foo15() {
// OGCG: %[[SHUF_ELE_3:.*]] = extractelement <4 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_3]]
// OGCG: %[[SHUF_INS_3:.*]] = insertelement <4 x i32> %[[SHUF_INS_2]], i32 %[[SHUF_ELE_3]], i64 3
+void foo16() {
+ vi6 a;
+ vi6 b;
+ vi6 r = __builtin_shufflevector(a, b);
+}
+
+// CIR: %[[TMP_A:.*]] = cir.load{{.*}} {{.*}} : !cir.ptr<!cir.vector<6 x !s32i>>, !cir.vector<6 x !s32i>
+// CIR: %[[TMP_B:.*]] = cir.load{{>*}} {{.*}} : !cir.ptr<!cir.vector<6 x !s32i>>, !cir.vector<6 x !s32i>
+// CIR: %[[NEW_VEC:.*]] = cir.vec.shuffle.dynamic %[[TMP_A]] : !cir.vector<6 x !s32i>, %[[TMP_B]] : !cir.vector<6 x !s32i>
+
+// LLVM: %[[TMP_A:.*]] = load <6 x i32>, ptr {{.*}}, align 32
+// LLVM: %[[TMP_B:.*]] = load <6 x i32>, ptr {{.*}}, align 32
+// LLVM: %[[MASK:.*]] = and <6 x i32> %[[TMP_B]], splat (i32 7)
+// LLVM: %[[SHUF_IDX_0:.*]] = extractelement <6 x i32> %[[MASK]], i64 0
+// LLVM: %[[SHUF_ELE_0:.*]] = extractelement <6 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_0]]
+// LLVM: %[[SHUF_INS_0:.*]] = insertelement <6 x i32> undef, i32 %[[SHUF_ELE_0]], i64 0
+// LLVM: %[[SHUF_IDX_1:.*]] = extractelement <6 x i32> %[[MASK]], i64 1
+// LLVM: %[[SHUF_ELE_1:.*]] = extractelement <6 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_1]]
+// LLVM: %[[SHUF_INS_1:.*]] = insertelement <6 x i32> %[[SHUF_INS_0]], i32 %[[SHUF_ELE_1]], i64 1
+// LLVM: %[[SHUF_IDX_2:.*]] = extractelement <6 x i32> %[[MASK]], i64 2
+// LLVM: %[[SHUF_ELE_2:.*]] = extractelement <6 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_2]]
+// LLVM: %[[SHUF_INS_2:.*]] = insertelement <6 x i32> %[[SHUF_INS_1]], i32 %[[SHUF_ELE_2]], i64 2
+// LLVM: %[[SHUF_IDX_3:.*]] = extractelement <6 x i32> %[[MASK]], i64 3
+// LLVM: %[[SHUF_ELE_3:.*]] = extractelement <6 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_3]]
+// LLVM: %[[SHUF_INS_3:.*]] = insertelement <6 x i32> %[[SHUF_INS_2]], i32 %[[SHUF_ELE_3]], i64 3
+
+// OGCG: %[[TMP_A:.*]] = load <6 x i32>, ptr {{.*}}, align 32
+// OGCG: %[[TMP_B:.*]] = load <6 x i32>, ptr {{.*}}, align 32
+// OGCG: %[[MASK:.*]] = and <6 x i32> %[[TMP_B]], splat (i32 7)
+// OGCG: %[[SHUF_IDX_0:.*]] = extractelement <6 x i32> %[[MASK]], i64 0
+// OGCG: %[[SHUF_ELE_0:.*]] = extractelement <6 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_0]]
+// OGCG: %[[SHUF_INS_0:.*]] = insertelement <6 x i32> poison, i32 %[[SHUF_ELE_0]], i64 0
+// OGCG: %[[SHUF_IDX_1:.*]] = extractelement <6 x i32> %[[MASK]], i64 1
+// OGCG: %[[SHUF_ELE_1:.*]] = extractelement <6 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_1]]
+// OGCG: %[[SHUF_INS_1:.*]] = insertelement <6 x i32> %[[SHUF_INS_0]], i32 %[[SHUF_ELE_1]], i64 1
+// OGCG: %[[SHUF_IDX_2:.*]] = extractelement <6 x i32> %[[MASK]], i64 2
+// OGCG: %[[SHUF_ELE_2:.*]] = extractelement <6 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_2]]
+// OGCG: %[[SHUF_INS_2:.*]] = insertelement <6 x i32> %[[SHUF_INS_1]], i32 %[[SHUF_ELE_2]], i64 2
+// OGCG: %[[SHUF_IDX_3:.*]] = extractelement <6 x i32> %[[MASK]], i64 3
+// OGCG: %[[SHUF_ELE_3:.*]] = extractelement <6 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_3]]
+// OGCG: %[[SHUF_INS_3:.*]] = insertelement <6 x i32> %[[SHUF_INS_2]], i32 %[[SHUF_ELE_3]], i64 3
More information about the cfe-commits
mailing list