[llvm] [LICM] Hoisting writeonly calls (PR #143799)
Jiachen Wang via llvm-commits
llvm-commits at lists.llvm.org
Sun Jun 15 09:57:45 PDT 2025
https://github.com/WanderingAura updated https://github.com/llvm/llvm-project/pull/143799
>From 83b0d30386f14cf4c3e6acccdb7dca8c04078add Mon Sep 17 00:00:00 2001
From: Jiachen Wang <jiachen.wang2000 at gmail.com>
Date: Fri, 13 Jun 2025 18:59:42 +0100
Subject: [PATCH] Hoist writeonly calls
---
llvm/lib/Transforms/Scalar/LICM.cpp | 136 +++++++++++++--------
llvm/test/Transforms/LICM/call-hoisting.ll | 105 +++++++++++++++-
2 files changed, 184 insertions(+), 57 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp
index bd59caa6a959a..5376f4af2c9bc 100644
--- a/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -186,6 +186,9 @@ static bool isSafeToExecuteUnconditionally(
const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo,
OptimizationRemarkEmitter *ORE, const Instruction *CtxI,
AssumptionCache *AC, bool AllowSpeculation);
+static bool noConflictingReadWrites(Instruction *I, MemorySSA *MSSA,
+ AAResults *AA, Loop *CurLoop,
+ SinkAndHoistLICMFlags &Flags);
static bool pointerInvalidatedByLoop(MemorySSA *MSSA, MemoryUse *MU,
Loop *CurLoop, Instruction &I,
SinkAndHoistLICMFlags &Flags,
@@ -1258,8 +1261,16 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
return true;
}
- // FIXME: This should use mod/ref information to see if we can hoist or
- // sink the call.
+ if (Behavior.onlyWritesMemory()) {
+ // If it's the only memory access then there is nothing
+ // stopping us from hoisting it.
+ if (isOnlyMemoryAccess(CI, CurLoop, MSSAU))
+ return true;
+
+ if (Behavior.onlyAccessesArgPointees()) {
+ return noConflictingReadWrites(CI, MSSA, AA, CurLoop, Flags);
+ }
+ }
return false;
} else if (auto *FI = dyn_cast<FenceInst>(&I)) {
@@ -1277,57 +1288,7 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
// arbitrary number of reads in the loop.
if (isOnlyMemoryAccess(SI, CurLoop, MSSAU))
return true;
- // If there are more accesses than the Promotion cap, then give up as we're
- // not walking a list that long.
- if (Flags.tooManyMemoryAccesses())
- return false;
-
- auto *SIMD = MSSA->getMemoryAccess(SI);
- BatchAAResults BAA(*AA);
- auto *Source = getClobberingMemoryAccess(*MSSA, BAA, Flags, SIMD);
- // Make sure there are no clobbers inside the loop.
- if (!MSSA->isLiveOnEntryDef(Source) &&
- CurLoop->contains(Source->getBlock()))
- return false;
-
- // If there are interfering Uses (i.e. their defining access is in the
- // loop), or ordered loads (stored as Defs!), don't move this store.
- // Could do better here, but this is conservatively correct.
- // TODO: Cache set of Uses on the first walk in runOnLoop, update when
- // moving accesses. Can also extend to dominating uses.
- for (auto *BB : CurLoop->getBlocks())
- if (auto *Accesses = MSSA->getBlockAccesses(BB)) {
- for (const auto &MA : *Accesses)
- if (const auto *MU = dyn_cast<MemoryUse>(&MA)) {
- auto *MD = getClobberingMemoryAccess(*MSSA, BAA, Flags,
- const_cast<MemoryUse *>(MU));
- if (!MSSA->isLiveOnEntryDef(MD) &&
- CurLoop->contains(MD->getBlock()))
- return false;
- // Disable hoisting past potentially interfering loads. Optimized
- // Uses may point to an access outside the loop, as getClobbering
- // checks the previous iteration when walking the backedge.
- // FIXME: More precise: no Uses that alias SI.
- if (!Flags.getIsSink() && !MSSA->dominates(SIMD, MU))
- return false;
- } else if (const auto *MD = dyn_cast<MemoryDef>(&MA)) {
- if (auto *LI = dyn_cast<LoadInst>(MD->getMemoryInst())) {
- (void)LI; // Silence warning.
- assert(!LI->isUnordered() && "Expected unordered load");
- return false;
- }
- // Any call, while it may not be clobbering SI, it may be a use.
- if (auto *CI = dyn_cast<CallInst>(MD->getMemoryInst())) {
- // Check if the call may read from the memory location written
- // to by SI. Check CI's attributes and arguments; the number of
- // such checks performed is limited above by NoOfMemAccTooLarge.
- ModRefInfo MRI = BAA.getModRefInfo(CI, MemoryLocation::get(SI));
- if (isModOrRefSet(MRI))
- return false;
- }
- }
- }
- return true;
+ return noConflictingReadWrites(SI, MSSA, AA, CurLoop, Flags);
}
assert(!I.mayReadOrWriteMemory() && "unhandled aliasing");
@@ -2354,6 +2315,75 @@ collectPromotionCandidates(MemorySSA *MSSA, AliasAnalysis *AA, Loop *L) {
return Result;
}
+// For a given store instruction or writeonly call instruction, this function
+// checks that there are no read or writes that conflict with the memory
+// access in the instruction
+static bool noConflictingReadWrites(Instruction *I, MemorySSA *MSSA,
+ AAResults *AA, Loop *CurLoop,
+ SinkAndHoistLICMFlags &Flags) {
+ assert(isa<CallInst>(*I) || isa<StoreInst>(*I));
+ // If there are more accesses than the Promotion cap, then give up as we're
+ // not walking a list that long.
+ if (Flags.tooManyMemoryAccesses()) {
+ return false;
+ }
+
+ auto *IMD = MSSA->getMemoryAccess(I);
+ BatchAAResults BAA(*AA);
+ auto *Source = getClobberingMemoryAccess(*MSSA, BAA, Flags, IMD);
+ // Make sure there are no clobbers inside the loop.
+ if (!MSSA->isLiveOnEntryDef(Source) && CurLoop->contains(Source->getBlock()))
+ return false;
+
+ // If there are interfering Uses (i.e. their defining access is in the
+ // loop), or ordered loads (stored as Defs!), don't move this store.
+ // Could do better here, but this is conservatively correct.
+ // TODO: Cache set of Uses on the first walk in runOnLoop, update when
+ // moving accesses. Can also extend to dominating uses.
+ for (auto *BB : CurLoop->getBlocks())
+ if (auto *Accesses = MSSA->getBlockAccesses(BB)) {
+ for (const auto &MA : *Accesses)
+ if (const auto *MU = dyn_cast<MemoryUse>(&MA)) {
+ auto *MD = getClobberingMemoryAccess(*MSSA, BAA, Flags,
+ const_cast<MemoryUse *>(MU));
+ if (!MSSA->isLiveOnEntryDef(MD) && CurLoop->contains(MD->getBlock()))
+ return false;
+ // Disable hoisting past potentially interfering loads. Optimized
+ // Uses may point to an access outside the loop, as getClobbering
+ // checks the previous iteration when walking the backedge.
+ // FIXME: More precise: no Uses that alias I.
+ if (!Flags.getIsSink() && !MSSA->dominates(IMD, MU))
+ return false;
+ } else if (const auto *MD = dyn_cast<MemoryDef>(&MA)) {
+ if (auto *LI = dyn_cast<LoadInst>(MD->getMemoryInst())) {
+ (void)LI; // Silence warning.
+ assert(!LI->isUnordered() && "Expected unordered load");
+ return false;
+ }
+ // Any call, while it may not be clobbering I, it may be a use.
+ if (auto *CI = dyn_cast<CallInst>(MD->getMemoryInst())) {
+ // Check if the call may read from the memory location written
+ // to by I. Check CI's attributes and arguments; the number of
+ // such checks performed is limited above by NoOfMemAccTooLarge.
+ if (auto *SI = dyn_cast<StoreInst>(I)) {
+ ModRefInfo MRI = BAA.getModRefInfo(CI, MemoryLocation::get(SI));
+ if (isModOrRefSet(MRI))
+ return false;
+ } else if (CallInst *SCI = dyn_cast<CallInst>(I)) {
+ // If the instruction we are wanting to hoist is also a call
+ // instruction then we need not check mod/ref info with itself
+ if (SCI == CI)
+ continue;
+ ModRefInfo MRI = BAA.getModRefInfo(CI, SCI);
+ if (isModOrRefSet(MRI))
+ return false;
+ }
+ }
+ }
+ }
+ return true;
+}
+
static bool pointerInvalidatedByLoop(MemorySSA *MSSA, MemoryUse *MU,
Loop *CurLoop, Instruction &I,
SinkAndHoistLICMFlags &Flags,
diff --git a/llvm/test/Transforms/LICM/call-hoisting.ll b/llvm/test/Transforms/LICM/call-hoisting.ll
index 907f13438623a..331efaf30d374 100644
--- a/llvm/test/Transforms/LICM/call-hoisting.ll
+++ b/llvm/test/Transforms/LICM/call-hoisting.ll
@@ -86,14 +86,16 @@ exit:
declare void @store(i32 %val, ptr %p) argmemonly writeonly nounwind
+; loop invariant calls to writeonly functions such as the above
+; should be hoisted
define void @test(ptr %loc) {
; CHECK-LABEL: define void @test(
; CHECK-SAME: ptr [[LOC:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: call void @store(i32 0, ptr [[LOC]])
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
-; CHECK-NEXT: call void @store(i32 0, ptr [[LOC]])
; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[IV]], 200
; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]]
@@ -118,10 +120,10 @@ define void @test_multiexit(ptr %loc, i1 %earlycnd) {
; CHECK-LABEL: define void @test_multiexit(
; CHECK-SAME: ptr [[LOC:%.*]], i1 [[EARLYCND:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: call void @store(i32 0, ptr [[LOC]])
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[BACKEDGE:.*]] ]
-; CHECK-NEXT: call void @store(i32 0, ptr [[LOC]])
; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
; CHECK-NEXT: br i1 [[EARLYCND]], label %[[EXIT1:.*]], label %[[BACKEDGE]]
; CHECK: [[BACKEDGE]]:
@@ -151,6 +153,97 @@ exit2:
ret void
}
+; cannot be hoisted because the two pointers can alias one another
+define void @neg_two_pointer(ptr %loc, ptr %otherloc) {
+; CHECK-LABEL: define void @neg_two_pointer(
+; CHECK-SAME: ptr [[LOC:%.*]], ptr [[OTHERLOC:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: call void @store(i32 0, ptr [[LOC]])
+; CHECK-NEXT: call void @store(i32 1, ptr [[OTHERLOC]])
+; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[IV]], 200
+; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i32 [0, %entry], [%iv.next, %loop]
+ call void @store(i32 0, ptr %loc)
+ call void @store(i32 1, ptr %otherloc)
+ %iv.next = add i32 %iv, 1
+ %cmp = icmp slt i32 %iv, 200
+ br i1 %cmp, label %loop, label %exit
+exit:
+ ret void
+}
+
+; hoisted due to pointers not aliasing
+define void @two_pointer_noalias(ptr noalias %loc, ptr noalias %otherloc) {
+; CHECK-LABEL: define void @two_pointer_noalias(
+; CHECK-SAME: ptr noalias [[LOC:%.*]], ptr noalias [[OTHERLOC:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: call void @store(i32 0, ptr [[LOC]])
+; CHECK-NEXT: call void @store(i32 1, ptr [[OTHERLOC]])
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[IV]], 200
+; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i32 [0, %entry], [%iv.next, %loop]
+ call void @store(i32 0, ptr %loc)
+ call void @store(i32 1, ptr %otherloc)
+ %iv.next = add i32 %iv, 1
+ %cmp = icmp slt i32 %iv, 200
+ br i1 %cmp, label %loop, label %exit
+exit:
+ ret void
+}
+
+; when there's a conflicting read, store call should not be hoisted
+define void @neg_conflicting_read(ptr noalias %loc, ptr noalias %otherloc) {
+; CHECK-LABEL: define void @neg_conflicting_read(
+; CHECK-SAME: ptr noalias [[LOC:%.*]], ptr noalias [[OTHERLOC:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: call void @store(i32 0, ptr [[LOC]])
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: call void @load(i32 0, ptr [[LOC]])
+; CHECK-NEXT: call void @store(i32 0, ptr [[LOC]])
+; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[IV]], 200
+; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ call void @store(i32 0, ptr %loc)
+ br label %loop
+loop:
+ %iv = phi i32 [0, %entry], [%iv.next, %loop]
+ call void @load(i32 0, ptr %loc)
+ call void @store(i32 0, ptr %loc)
+ %iv.next = add i32 %iv, 1
+ %cmp = icmp slt i32 %iv, 200
+ br i1 %cmp, label %loop, label %exit
+exit:
+ ret void
+}
+
define void @neg_lv_value(ptr %loc) {
; CHECK-LABEL: define void @neg_lv_value(
; CHECK-SAME: ptr [[LOC:%.*]]) {
@@ -374,14 +467,17 @@ exit:
ret void
}
-define void @neg_not_argmemonly(ptr %loc) {
+; when the call is not argmemonly and its not the only memory access
+; we do not hoist it
+define void @neg_not_argmemonly(ptr %loc, ptr %loc2) {
; CHECK-LABEL: define void @neg_not_argmemonly(
-; CHECK-SAME: ptr [[LOC:%.*]]) {
+; CHECK-SAME: ptr [[LOC:%.*]], ptr [[LOC2:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; CHECK-NEXT: call void @not_argmemonly(i32 0, ptr [[LOC]])
+; CHECK-NEXT: call void @load(i32 0, ptr [[LOC2]])
; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[IV]], 200
; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]]
@@ -394,6 +490,7 @@ entry:
loop:
%iv = phi i32 [0, %entry], [%iv.next, %loop]
call void @not_argmemonly(i32 0, ptr %loc)
+ call void @load(i32 0, ptr %loc2)
%iv.next = add i32 %iv, 1
%cmp = icmp slt i32 %iv, 200
br i1 %cmp, label %loop, label %exit
More information about the llvm-commits
mailing list