[llvm] ee35e34 - [ConstantFolding] Add folding for [de]interleave2, insert and extract (#141301)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Jun 11 16:00:32 PDT 2025
Author: Nikolay Panchenko
Date: 2025-06-11T19:00:29-04:00
New Revision: ee35e342945d6825c9b2b004fd135cf16c84ea0e
URL: https://github.com/llvm/llvm-project/commit/ee35e342945d6825c9b2b004fd135cf16c84ea0e
DIFF: https://github.com/llvm/llvm-project/commit/ee35e342945d6825c9b2b004fd135cf16c84ea0e.diff
LOG: [ConstantFolding] Add folding for [de]interleave2, insert and extract (#141301)
The change adds folding for 4 vector intrinsics: `interleave2`,
`deinterleave2`, `vector_extract` and `vector_insert`. For the last 2
intrinsics the change does not use `ShuffleVector` fold mechanism as
it's much simpler to construct result vector explicitly.
Added:
llvm/test/Transforms/InstSimplify/ConstProp/vector-calls.ll
Modified:
llvm/lib/Analysis/ConstantFolding.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
index 1ef0badd23757..139a0b81e299b 100644
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -1635,6 +1635,10 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
case Intrinsic::vector_reduce_smax:
case Intrinsic::vector_reduce_umin:
case Intrinsic::vector_reduce_umax:
+ case Intrinsic::vector_extract:
+ case Intrinsic::vector_insert:
+ case Intrinsic::vector_interleave2:
+ case Intrinsic::vector_deinterleave2:
// Target intrinsics
case Intrinsic::amdgcn_perm:
case Intrinsic::amdgcn_wave_reduce_umin:
@@ -3758,6 +3762,72 @@ static Constant *ConstantFoldFixedVectorCall(
}
return nullptr;
}
+ case Intrinsic::vector_extract: {
+ auto *Idx = dyn_cast<ConstantInt>(Operands[1]);
+ Constant *Vec = Operands[0];
+ if (!Idx || !isa<FixedVectorType>(Vec->getType()))
+ return nullptr;
+
+ unsigned NumElements = FVTy->getNumElements();
+ unsigned VecNumElements =
+ cast<FixedVectorType>(Vec->getType())->getNumElements();
+ unsigned StartingIndex = Idx->getZExtValue();
+
+ // Extracting entire vector is nop
+ if (NumElements == VecNumElements && StartingIndex == 0)
+ return Vec;
+
+ for (unsigned I = StartingIndex, E = StartingIndex + NumElements; I < E;
+ ++I) {
+ Constant *Elt = Vec->getAggregateElement(I);
+ if (!Elt)
+ return nullptr;
+ Result[I - StartingIndex] = Elt;
+ }
+
+ return ConstantVector::get(Result);
+ }
+ case Intrinsic::vector_insert: {
+ Constant *Vec = Operands[0];
+ Constant *SubVec = Operands[1];
+ auto *Idx = dyn_cast<ConstantInt>(Operands[2]);
+ if (!Idx || !isa<FixedVectorType>(Vec->getType()))
+ return nullptr;
+
+ unsigned SubVecNumElements =
+ cast<FixedVectorType>(SubVec->getType())->getNumElements();
+ unsigned VecNumElements =
+ cast<FixedVectorType>(Vec->getType())->getNumElements();
+ unsigned IdxN = Idx->getZExtValue();
+ // Replacing entire vector with a subvec is nop
+ if (SubVecNumElements == VecNumElements && IdxN == 0)
+ return SubVec;
+
+ for (unsigned I = 0; I < VecNumElements; ++I) {
+ Constant *Elt;
+ if (I < IdxN + SubVecNumElements)
+ Elt = SubVec->getAggregateElement(I - IdxN);
+ else
+ Elt = Vec->getAggregateElement(I);
+ if (!Elt)
+ return nullptr;
+ Result[I] = Elt;
+ }
+ return ConstantVector::get(Result);
+ }
+ case Intrinsic::vector_interleave2: {
+ unsigned NumElements =
+ cast<FixedVectorType>(Operands[0]->getType())->getNumElements();
+ for (unsigned I = 0; I < NumElements; ++I) {
+ Constant *Elt0 = Operands[0]->getAggregateElement(I);
+ Constant *Elt1 = Operands[1]->getAggregateElement(I);
+ if (!Elt0 || !Elt1)
+ return nullptr;
+ Result[2 * I] = Elt0;
+ Result[2 * I + 1] = Elt1;
+ }
+ return ConstantVector::get(Result);
+ }
default:
break;
}
@@ -3919,6 +3989,33 @@ ConstantFoldStructCall(StringRef Name, Intrinsic::ID IntrinsicID,
return nullptr;
return ConstantStruct::get(StTy, SinResult, CosResult);
}
+ case Intrinsic::vector_deinterleave2: {
+ auto *Vec = dyn_cast<Constant>(Operands[0]);
+ if (!Vec)
+ return nullptr;
+
+ auto *VecTy = cast<VectorType>(Vec->getType());
+ unsigned NumElements = VecTy->getElementCount().getKnownMinValue() / 2;
+ if (isa<ConstantAggregateZero>(Vec)) {
+ auto *HalfVecTy = VectorType::getHalfElementsVectorType(VecTy);
+ return ConstantStruct::get(StTy, ConstantAggregateZero::get(HalfVecTy),
+ ConstantAggregateZero::get(HalfVecTy));
+ }
+ if (isa<FixedVectorType>(Vec->getType())) {
+ SmallVector<Constant *, 4> Res0(NumElements), Res1(NumElements);
+ for (unsigned I = 0; I < NumElements; ++I) {
+ Constant *Elt0 = Vec->getAggregateElement(2 * I);
+ Constant *Elt1 = Vec->getAggregateElement(2 * I + 1);
+ if (!Elt0 || !Elt1)
+ return nullptr;
+ Res0[I] = Elt0;
+ Res1[I] = Elt1;
+ }
+ return ConstantStruct::get(StTy, ConstantVector::get(Res0),
+ ConstantVector::get(Res1));
+ }
+ return nullptr;
+ }
default:
// TODO: Constant folding of vector intrinsics that fall through here does
// not work (e.g. overflow intrinsics)
diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/vector-calls.ll b/llvm/test/Transforms/InstSimplify/ConstProp/vector-calls.ll
new file mode 100644
index 0000000000000..9dbe3d4e50ee1
--- /dev/null
+++ b/llvm/test/Transforms/InstSimplify/ConstProp/vector-calls.ll
@@ -0,0 +1,68 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -passes=instsimplify,verify -S | FileCheck %s
+
+define <3 x i32> @fold_vector_extract() {
+; CHECK-LABEL: define <3 x i32> @fold_vector_extract() {
+; CHECK-NEXT: ret <3 x i32> <i32 3, i32 4, i32 5>
+;
+ %1 = call <3 x i32> @llvm.vector.extract.v3i32.v8i32(<8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, i64 3)
+ ret <3 x i32> %1
+}
+
+ at a = external global i16, align 1
+
+define <3 x i32> @fold_vector_extract_constexpr() {
+; CHECK-LABEL: define <3 x i32> @fold_vector_extract_constexpr() {
+; CHECK-NEXT: ret <3 x i32> <i32 ptrtoint (ptr @a to i32), i32 1, i32 2>
+;
+ %1 = call <3 x i32> @llvm.vector.extract.v3i32.v8i32(<8 x i32> <i32 ptrtoint (ptr @a to i32), i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, i64 0)
+ ret <3 x i32> %1
+}
+
+define <8 x i32> @fold_vector_extract_nop() {
+; CHECK-LABEL: define <8 x i32> @fold_vector_extract_nop() {
+; CHECK-NEXT: ret <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+;
+ %1 = call <8 x i32> @llvm.vector.extract.v3i32.v8i32(<8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, i64 0)
+ ret <8 x i32> %1
+}
+
+define <8 x i32> @fold_vector_insert() {
+; CHECK-LABEL: define <8 x i32> @fold_vector_insert() {
+; CHECK-NEXT: ret <8 x i32> <i32 9, i32 10, i32 11, i32 12, i32 5, i32 6, i32 7, i32 8>
+;
+ %1 = call <8 x i32> @llvm.vector.insert.v8i32(<8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>, <4 x i32> <i32 9, i32 10, i32 11, i32 12>, i64 0)
+ ret <8 x i32> %1
+}
+
+define <8 x i32> @fold_vector_insert_nop() {
+; CHECK-LABEL: define <8 x i32> @fold_vector_insert_nop() {
+; CHECK-NEXT: ret <8 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18>
+;
+ %1 = call <8 x i32> @llvm.vector.insert.v8i32(<8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>, <8 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18>, i64 0)
+ ret <8 x i32> %1
+}
+
+define <8 x i32> @fold_vector_interleave2() {
+; CHECK-LABEL: define <8 x i32> @fold_vector_interleave2() {
+; CHECK-NEXT: ret <8 x i32> <i32 1, i32 5, i32 2, i32 6, i32 3, i32 7, i32 4, i32 8>
+;
+ %1 = call <8 x i32> @llvm.vector.interleave2.v8i32(<4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32> <i32 5, i32 6, i32 7, i32 8>)
+ ret <8 x i32> %1
+}
+
+define {<4 x i32>, <4 x i32>} @fold_vector_deinterleave2() {
+; CHECK-LABEL: define { <4 x i32>, <4 x i32> } @fold_vector_deinterleave2() {
+; CHECK-NEXT: ret { <4 x i32>, <4 x i32> } { <4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32> <i32 5, i32 6, i32 7, i32 8> }
+;
+ %1 = call {<4 x i32>, <4 x i32>} @llvm.vector.deinterleave2.v4i32.v8i32(<8 x i32> <i32 1, i32 5, i32 2, i32 6, i32 3, i32 7, i32 4, i32 8>)
+ ret {<4 x i32>, <4 x i32>} %1
+}
+
+define {<vscale x 4 x i32>, <vscale x 4 x i32>} @fold_scalable_vector_deinterleave2() {
+; CHECK-LABEL: define { <vscale x 4 x i32>, <vscale x 4 x i32> } @fold_scalable_vector_deinterleave2() {
+; CHECK-NEXT: ret { <vscale x 4 x i32>, <vscale x 4 x i32> } zeroinitializer
+;
+ %1 = call {<vscale x 4 x i32>, <vscale x 4 x i32>} @llvm.vector.deinterleave2.v4i32.v8i32(<vscale x 8 x i32> zeroinitializer)
+ ret {<vscale x 4 x i32>, <vscale x 4 x i32>} %1
+}
More information about the llvm-commits
mailing list