[llvm] 689ace5 - [MemCpyOptimizer] Support scalable vectors in performStackMoveO… (#67632)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 28 12:25:42 PDT 2023
Author: Craig Topper
Date: 2023-09-28T12:25:38-07:00
New Revision: 689ace53a5caa79e32a49b679e6fc3dce4f495b0
URL: https://github.com/llvm/llvm-project/commit/689ace53a5caa79e32a49b679e6fc3dce4f495b0
DIFF: https://github.com/llvm/llvm-project/commit/689ace53a5caa79e32a49b679e6fc3dce4f495b0.diff
LOG: [MemCpyOptimizer] Support scalable vectors in performStackMoveO… (#67632)
…ptzn.
This changes performStackMoveOptzn to take a TypeSize instead of
uint64_t to avoid an implicit conversion when called from
processStoreOfLoad.
performStackMoveOptzn has been updated to allow scalable types in the
rest of its code.
Added:
Modified:
llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h
llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
llvm/test/Transforms/MemCpyOpt/stack-move.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h b/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h
index 3e8a5bf6a5bd56e..6c809bc881d050d 100644
--- a/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h
+++ b/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h
@@ -83,7 +83,7 @@ class MemCpyOptPass : public PassInfoMixin<MemCpyOptPass> {
bool moveUp(StoreInst *SI, Instruction *P, const LoadInst *LI);
bool performStackMoveOptzn(Instruction *Load, Instruction *Store,
AllocaInst *DestAlloca, AllocaInst *SrcAlloca,
- uint64_t Size, BatchAAResults &BAA);
+ TypeSize Size, BatchAAResults &BAA);
void eraseInstruction(Instruction *I);
bool iterateOnFunction(Function &F);
diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index e818f6d4e84627d..783ef57805610b9 100644
--- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -1432,7 +1432,7 @@ bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
// allocas that aren't captured.
bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
AllocaInst *DestAlloca,
- AllocaInst *SrcAlloca, uint64_t Size,
+ AllocaInst *SrcAlloca, TypeSize Size,
BatchAAResults &BAA) {
LLVM_DEBUG(dbgs() << "Stack Move: Attempting to optimize:\n"
<< *Store << "\n");
@@ -1446,13 +1446,12 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
// Check that copy is full with static size.
const DataLayout &DL = DestAlloca->getModule()->getDataLayout();
std::optional<TypeSize> SrcSize = SrcAlloca->getAllocationSize(DL);
- if (!SrcSize || SrcSize->isScalable() || Size != SrcSize->getFixedValue()) {
+ if (!SrcSize || Size != *SrcSize) {
LLVM_DEBUG(dbgs() << "Stack Move: Source alloca size mismatch\n");
return false;
}
std::optional<TypeSize> DestSize = DestAlloca->getAllocationSize(DL);
- if (!DestSize || DestSize->isScalable() ||
- Size != DestSize->getFixedValue()) {
+ if (!DestSize || Size != *DestSize) {
LLVM_DEBUG(dbgs() << "Stack Move: Destination alloca size mismatch\n");
return false;
}
@@ -1770,8 +1769,8 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
ConstantInt *Len = dyn_cast<ConstantInt>(M->getLength());
if (Len == nullptr)
return false;
- if (performStackMoveOptzn(M, M, DestAlloca, SrcAlloca, Len->getZExtValue(),
- BAA)) {
+ if (performStackMoveOptzn(M, M, DestAlloca, SrcAlloca,
+ TypeSize::getFixed(Len->getZExtValue()), BAA)) {
// Avoid invalidating the iterator.
BBI = M->getNextNonDebugInstruction()->getIterator();
eraseInstruction(M);
diff --git a/llvm/test/Transforms/MemCpyOpt/stack-move.ll b/llvm/test/Transforms/MemCpyOpt/stack-move.ll
index dee630f470d0053..6089c0a4d7cf507 100644
--- a/llvm/test/Transforms/MemCpyOpt/stack-move.ll
+++ b/llvm/test/Transforms/MemCpyOpt/stack-move.ll
@@ -113,6 +113,33 @@ define void @load_store() {
ret void
}
+; Test scalable vectors.
+define void @load_store_scalable(<vscale x 4 x i32> %x) {
+; CHECK-LABEL: define void @load_store_scalable
+; CHECK-SAME: (<vscale x 4 x i32> [[X:%.*]]) {
+; CHECK-NEXT: [[SRC:%.*]] = alloca <vscale x 4 x i32>, align 16
+; CHECK-NEXT: store <vscale x 4 x i32> [[X]], ptr [[SRC]], align 16
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]])
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]])
+; CHECK-NEXT: ret void
+;
+ %src = alloca <vscale x 4 x i32>
+ %dest = alloca <vscale x 4 x i32>
+ call void @llvm.lifetime.start.p0(i64 -1, ptr nocapture %src)
+ call void @llvm.lifetime.start.p0(i64 -1, ptr nocapture %dest)
+ store <vscale x 4 x i32> %x, ptr %src
+ %1 = call i32 @use_nocapture(ptr nocapture %src)
+
+ %src.val = load <vscale x 4 x i32>, ptr %src
+ store <vscale x 4 x i32> %src.val, ptr %dest
+
+ %2 = call i32 @use_nocapture(ptr nocapture %dest)
+
+ call void @llvm.lifetime.end.p0(i64 -1, ptr nocapture %src)
+ call void @llvm.lifetime.end.p0(i64 -1, ptr nocapture %dest)
+ ret void
+}
+
; Tests that merging two allocas shouldn't be more poisonous, smaller aligned src is valid.
define void @align_up() {
; CHECK-LABEL: define void @align_up() {
More information about the llvm-commits
mailing list