[llvm] [SandboxVec][BottomUpVec] Implement packing of vectors (PR #116447)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Nov 15 21:58:07 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: vporpo (vporpo)
<details>
<summary>Changes</summary>
Up until now we could only support packing of scalar elements. This patch fixes this by implementing packing of vector elements, by generating extractelement and insertelement instruction pairs.
---
Full diff: https://github.com/llvm/llvm-project/pull/116447.diff
2 Files Affected:
- (modified) llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp (+23-3)
- (modified) llvm/test/Transforms/SandboxVectorizer/bottomup_basic.ll (+23)
``````````diff
diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp
index 4b82fea2aa2b22..a2ea11be59b8ed 100644
--- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp
+++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp
@@ -181,12 +181,32 @@ Value *BottomUpVec::createPack(ArrayRef<Value *> ToPack) {
// An element can be either scalar or vector. We need to generate different
// IR for each case.
if (Elm->getType()->isVectorTy()) {
- llvm_unreachable("Unimplemented");
+ unsigned NumElms =
+ cast<FixedVectorType>(Elm->getType())->getNumElements();
+ for (auto ExtrLane : seq<int>(0, NumElms)) {
+ // We generate extract-insert pairs, for each lane in `Elm`.
+ Constant *ExtrLaneC =
+ ConstantInt::getSigned(Type::getInt32Ty(Ctx), ExtrLane);
+ // This may return a Constant if Elm is a Constant.
+ auto *ExtrI =
+ ExtractElementInst::create(Elm, ExtrLaneC, WhereIt, Ctx, "VPack");
+ if (!isa<Constant>(ExtrI))
+ WhereIt = std::next(cast<Instruction>(ExtrI)->getIterator());
+ Constant *InsertLaneC =
+ ConstantInt::getSigned(Type::getInt32Ty(Ctx), InsertIdx++);
+ // This may also return a Constant if ExtrI is a Constant.
+ auto *InsertI = InsertElementInst::create(
+ LastInsert, ExtrI, InsertLaneC, WhereIt, Ctx, "VPack");
+ if (!isa<Constant>(InsertI)) {
+ LastInsert = InsertI;
+ WhereIt = std::next(cast<Instruction>(LastInsert)->getIterator());
+ }
+ }
} else {
Constant *InsertLaneC =
ConstantInt::getSigned(Type::getInt32Ty(Ctx), InsertIdx++);
- // This may be folded into a Constant if LastInsert is a Constant. In that
- // case we only collect the last constant.
+ // This may be folded into a Constant if LastInsert is a Constant. In
+ // that case we only collect the last constant.
LastInsert = InsertElementInst::create(LastInsert, Elm, InsertLaneC,
WhereIt, Ctx, "Pack");
if (auto *NewI = dyn_cast<Instruction>(LastInsert))
diff --git a/llvm/test/Transforms/SandboxVectorizer/bottomup_basic.ll b/llvm/test/Transforms/SandboxVectorizer/bottomup_basic.ll
index dff27d06d8ed22..7422d287ff3e2a 100644
--- a/llvm/test/Transforms/SandboxVectorizer/bottomup_basic.ll
+++ b/llvm/test/Transforms/SandboxVectorizer/bottomup_basic.ll
@@ -187,3 +187,26 @@ define void @cant_vectorize_seeds(ptr %ptr) {
ret void
}
+define void @pack_vectors(ptr %ptr, ptr %ptr2) {
+; CHECK-LABEL: define void @pack_vectors(
+; CHECK-SAME: ptr [[PTR:%.*]], ptr [[PTR2:%.*]]) {
+; CHECK-NEXT: [[PTR0:%.*]] = getelementptr <2 x float>, ptr [[PTR]], i32 0
+; CHECK-NEXT: [[PTR1:%.*]] = getelementptr float, ptr [[PTR]], i32 2
+; CHECK-NEXT: [[LD0:%.*]] = load <2 x float>, ptr [[PTR0]], align 8
+; CHECK-NEXT: [[LD1:%.*]] = load float, ptr [[PTR2]], align 4
+; CHECK-NEXT: [[VPACK:%.*]] = extractelement <2 x float> [[LD0]], i32 0
+; CHECK-NEXT: [[VPACK1:%.*]] = insertelement <3 x float> poison, float [[VPACK]], i32 0
+; CHECK-NEXT: [[VPACK2:%.*]] = extractelement <2 x float> [[LD0]], i32 1
+; CHECK-NEXT: [[VPACK3:%.*]] = insertelement <3 x float> [[VPACK1]], float [[VPACK2]], i32 1
+; CHECK-NEXT: [[PACK:%.*]] = insertelement <3 x float> [[VPACK3]], float [[LD1]], i32 2
+; CHECK-NEXT: store <3 x float> [[PACK]], ptr [[PTR0]], align 8
+; CHECK-NEXT: ret void
+;
+ %ptr0 = getelementptr <2 x float>, ptr %ptr, i32 0
+ %ptr1 = getelementptr float, ptr %ptr, i32 2
+ %ld0 = load <2 x float>, ptr %ptr0
+ %ld1 = load float, ptr %ptr2
+ store <2 x float> %ld0, ptr %ptr0
+ store float %ld1, ptr %ptr1
+ ret void
+}
``````````
</details>
https://github.com/llvm/llvm-project/pull/116447
More information about the llvm-commits
mailing list