[llvm] [LICM] Hoisting writeonly calls (PR #143799)

Jiachen Wang via llvm-commits llvm-commits at lists.llvm.org
Fri Jun 13 10:58:00 PDT 2025


https://github.com/WanderingAura updated https://github.com/llvm/llvm-project/pull/143799

>From 64e7d5e1f239b19931d90abaffcd5ff265126179 Mon Sep 17 00:00:00 2001
From: Jiachen Wang <jiachen.wang2000 at gmail.com>
Date: Wed, 11 Jun 2025 21:44:28 +0100
Subject: [PATCH] Hoist loop invariant writeonly calls

---
 llvm/lib/Transforms/Scalar/LICM.cpp           | 136 +++++++++++-------
 .../Transforms/LICM/call-hoisting-temp.ll     |  26 ++++
 llvm/test/Transforms/LICM/call-hoisting.ll    | 105 +++++++++++++-
 3 files changed, 210 insertions(+), 57 deletions(-)
 create mode 100644 llvm/test/Transforms/LICM/call-hoisting-temp.ll

diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp
index bd59caa6a959a..5376f4af2c9bc 100644
--- a/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -186,6 +186,9 @@ static bool isSafeToExecuteUnconditionally(
     const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo,
     OptimizationRemarkEmitter *ORE, const Instruction *CtxI,
     AssumptionCache *AC, bool AllowSpeculation);
+static bool noConflictingReadWrites(Instruction *I, MemorySSA *MSSA,
+                                    AAResults *AA, Loop *CurLoop,
+                                    SinkAndHoistLICMFlags &Flags);
 static bool pointerInvalidatedByLoop(MemorySSA *MSSA, MemoryUse *MU,
                                      Loop *CurLoop, Instruction &I,
                                      SinkAndHoistLICMFlags &Flags,
@@ -1258,8 +1261,16 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
         return true;
     }
 
-    // FIXME: This should use mod/ref information to see if we can hoist or
-    // sink the call.
+    if (Behavior.onlyWritesMemory()) {
+      // If it's the only memory access then there is nothing
+      // stopping us from hoisting it.
+      if (isOnlyMemoryAccess(CI, CurLoop, MSSAU))
+        return true;
+
+      if (Behavior.onlyAccessesArgPointees()) {
+        return noConflictingReadWrites(CI, MSSA, AA, CurLoop, Flags);
+      }
+    }
 
     return false;
   } else if (auto *FI = dyn_cast<FenceInst>(&I)) {
@@ -1277,57 +1288,7 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
     // arbitrary number of reads in the loop.
     if (isOnlyMemoryAccess(SI, CurLoop, MSSAU))
       return true;
-    // If there are more accesses than the Promotion cap, then give up as we're
-    // not walking a list that long.
-    if (Flags.tooManyMemoryAccesses())
-      return false;
-
-    auto *SIMD = MSSA->getMemoryAccess(SI);
-    BatchAAResults BAA(*AA);
-    auto *Source = getClobberingMemoryAccess(*MSSA, BAA, Flags, SIMD);
-    // Make sure there are no clobbers inside the loop.
-    if (!MSSA->isLiveOnEntryDef(Source) &&
-           CurLoop->contains(Source->getBlock()))
-      return false;
-
-    // If there are interfering Uses (i.e. their defining access is in the
-    // loop), or ordered loads (stored as Defs!), don't move this store.
-    // Could do better here, but this is conservatively correct.
-    // TODO: Cache set of Uses on the first walk in runOnLoop, update when
-    // moving accesses. Can also extend to dominating uses.
-    for (auto *BB : CurLoop->getBlocks())
-      if (auto *Accesses = MSSA->getBlockAccesses(BB)) {
-        for (const auto &MA : *Accesses)
-          if (const auto *MU = dyn_cast<MemoryUse>(&MA)) {
-            auto *MD = getClobberingMemoryAccess(*MSSA, BAA, Flags,
-                const_cast<MemoryUse *>(MU));
-            if (!MSSA->isLiveOnEntryDef(MD) &&
-                CurLoop->contains(MD->getBlock()))
-              return false;
-            // Disable hoisting past potentially interfering loads. Optimized
-            // Uses may point to an access outside the loop, as getClobbering
-            // checks the previous iteration when walking the backedge.
-            // FIXME: More precise: no Uses that alias SI.
-            if (!Flags.getIsSink() && !MSSA->dominates(SIMD, MU))
-              return false;
-          } else if (const auto *MD = dyn_cast<MemoryDef>(&MA)) {
-            if (auto *LI = dyn_cast<LoadInst>(MD->getMemoryInst())) {
-              (void)LI; // Silence warning.
-              assert(!LI->isUnordered() && "Expected unordered load");
-              return false;
-            }
-            // Any call, while it may not be clobbering SI, it may be a use.
-            if (auto *CI = dyn_cast<CallInst>(MD->getMemoryInst())) {
-              // Check if the call may read from the memory location written
-              // to by SI. Check CI's attributes and arguments; the number of
-              // such checks performed is limited above by NoOfMemAccTooLarge.
-              ModRefInfo MRI = BAA.getModRefInfo(CI, MemoryLocation::get(SI));
-              if (isModOrRefSet(MRI))
-                return false;
-            }
-          }
-      }
-    return true;
+    return noConflictingReadWrites(SI, MSSA, AA, CurLoop, Flags);
   }
 
   assert(!I.mayReadOrWriteMemory() && "unhandled aliasing");
@@ -2354,6 +2315,75 @@ collectPromotionCandidates(MemorySSA *MSSA, AliasAnalysis *AA, Loop *L) {
   return Result;
 }
 
+// For a given store instruction or writeonly call instruction, this function
+// checks that there are no read or writes that conflict with the memory
+// access in the instruction
+static bool noConflictingReadWrites(Instruction *I, MemorySSA *MSSA,
+                                    AAResults *AA, Loop *CurLoop,
+                                    SinkAndHoistLICMFlags &Flags) {
+  assert(isa<CallInst>(*I) || isa<StoreInst>(*I));
+  // If there are more accesses than the Promotion cap, then give up as we're
+  // not walking a list that long.
+  if (Flags.tooManyMemoryAccesses()) {
+    return false;
+  }
+
+  auto *IMD = MSSA->getMemoryAccess(I);
+  BatchAAResults BAA(*AA);
+  auto *Source = getClobberingMemoryAccess(*MSSA, BAA, Flags, IMD);
+  // Make sure there are no clobbers inside the loop.
+  if (!MSSA->isLiveOnEntryDef(Source) && CurLoop->contains(Source->getBlock()))
+    return false;
+
+  // If there are interfering Uses (i.e. their defining access is in the
+  // loop), or ordered loads (stored as Defs!), don't move this store.
+  // Could do better here, but this is conservatively correct.
+  // TODO: Cache set of Uses on the first walk in runOnLoop, update when
+  // moving accesses. Can also extend to dominating uses.
+  for (auto *BB : CurLoop->getBlocks())
+    if (auto *Accesses = MSSA->getBlockAccesses(BB)) {
+      for (const auto &MA : *Accesses)
+        if (const auto *MU = dyn_cast<MemoryUse>(&MA)) {
+          auto *MD = getClobberingMemoryAccess(*MSSA, BAA, Flags,
+                                               const_cast<MemoryUse *>(MU));
+          if (!MSSA->isLiveOnEntryDef(MD) && CurLoop->contains(MD->getBlock()))
+            return false;
+          // Disable hoisting past potentially interfering loads. Optimized
+          // Uses may point to an access outside the loop, as getClobbering
+          // checks the previous iteration when walking the backedge.
+          // FIXME: More precise: no Uses that alias I.
+          if (!Flags.getIsSink() && !MSSA->dominates(IMD, MU))
+            return false;
+        } else if (const auto *MD = dyn_cast<MemoryDef>(&MA)) {
+          if (auto *LI = dyn_cast<LoadInst>(MD->getMemoryInst())) {
+            (void)LI; // Silence warning.
+            assert(!LI->isUnordered() && "Expected unordered load");
+            return false;
+          }
+          // Any call, while it may not be clobbering I, it may be a use.
+          if (auto *CI = dyn_cast<CallInst>(MD->getMemoryInst())) {
+            // Check if the call may read from the memory location written
+            // to by I. Check CI's attributes and arguments; the number of
+            // such checks performed is limited above by NoOfMemAccTooLarge.
+            if (auto *SI = dyn_cast<StoreInst>(I)) {
+              ModRefInfo MRI = BAA.getModRefInfo(CI, MemoryLocation::get(SI));
+              if (isModOrRefSet(MRI))
+                return false;
+            } else if (CallInst *SCI = dyn_cast<CallInst>(I)) {
+              // If the instruction we are wanting to hoist is also a call
+              // instruction then we need not check mod/ref info with itself
+              if (SCI == CI)
+                continue;
+              ModRefInfo MRI = BAA.getModRefInfo(CI, SCI);
+              if (isModOrRefSet(MRI))
+                return false;
+            }
+          }
+        }
+    }
+  return true;
+}
+
 static bool pointerInvalidatedByLoop(MemorySSA *MSSA, MemoryUse *MU,
                                      Loop *CurLoop, Instruction &I,
                                      SinkAndHoistLICMFlags &Flags,
diff --git a/llvm/test/Transforms/LICM/call-hoisting-temp.ll b/llvm/test/Transforms/LICM/call-hoisting-temp.ll
new file mode 100644
index 0000000000000..bb01e57e64c57
--- /dev/null
+++ b/llvm/test/Transforms/LICM/call-hoisting-temp.ll
@@ -0,0 +1,26 @@
+; RUN: opt -S -passes=licm %s | FileCheck %s
+; RUN: opt -aa-pipeline=basic-aa -passes='require<aa>,require<target-ir>,require<scalar-evolution>,require<opt-remark-emit>,loop-mssa(licm)' < %s -S | FileCheck %s
+
+
+declare void @store(i32 %val, ptr %p) argmemonly writeonly nounwind
+
+; hoisted due to pointers not aliasing
+define void @two_pointer_noalias(ptr noalias %loc, ptr noalias %otherloc) {
+; CHECK-LABEL: @two_pointer_noalias
+; CHECK-LABEL: entry:
+; CHECK: call void @store
+; CHECK-LABEL: loop:
+; CHECK-LABEL: exit:
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i32 [0, %entry], [%iv.next, %loop]
+  call void @store(i32 0, ptr %loc)
+  call void @store(i32 1, ptr %otherloc)
+  %iv.next = add i32 %iv, 1
+  %cmp = icmp slt i32 %iv, 200
+  br i1 %cmp, label %loop, label %exit
+exit:
+  ret void
+}
diff --git a/llvm/test/Transforms/LICM/call-hoisting.ll b/llvm/test/Transforms/LICM/call-hoisting.ll
index 907f13438623a..331efaf30d374 100644
--- a/llvm/test/Transforms/LICM/call-hoisting.ll
+++ b/llvm/test/Transforms/LICM/call-hoisting.ll
@@ -86,14 +86,16 @@ exit:
 
 declare void @store(i32 %val, ptr %p) argmemonly writeonly nounwind
 
+; loop invariant calls to writeonly functions such as the above
+; should be hoisted
 define void @test(ptr %loc) {
 ; CHECK-LABEL: define void @test(
 ; CHECK-SAME: ptr [[LOC:%.*]]) {
 ; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    call void @store(i32 0, ptr [[LOC]])
 ; CHECK-NEXT:    br label %[[LOOP:.*]]
 ; CHECK:       [[LOOP]]:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
-; CHECK-NEXT:    call void @store(i32 0, ptr [[LOC]])
 ; CHECK-NEXT:    [[IV_NEXT]] = add i32 [[IV]], 1
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[IV]], 200
 ; CHECK-NEXT:    br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]]
@@ -118,10 +120,10 @@ define void @test_multiexit(ptr %loc, i1 %earlycnd) {
 ; CHECK-LABEL: define void @test_multiexit(
 ; CHECK-SAME: ptr [[LOC:%.*]], i1 [[EARLYCND:%.*]]) {
 ; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    call void @store(i32 0, ptr [[LOC]])
 ; CHECK-NEXT:    br label %[[LOOP:.*]]
 ; CHECK:       [[LOOP]]:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[BACKEDGE:.*]] ]
-; CHECK-NEXT:    call void @store(i32 0, ptr [[LOC]])
 ; CHECK-NEXT:    [[IV_NEXT]] = add i32 [[IV]], 1
 ; CHECK-NEXT:    br i1 [[EARLYCND]], label %[[EXIT1:.*]], label %[[BACKEDGE]]
 ; CHECK:       [[BACKEDGE]]:
@@ -151,6 +153,97 @@ exit2:
   ret void
 }
 
+; cannot be hoisted because the two pointers can alias one another
+define void @neg_two_pointer(ptr %loc, ptr %otherloc) {
+; CHECK-LABEL: define void @neg_two_pointer(
+; CHECK-SAME: ptr [[LOC:%.*]], ptr [[OTHERLOC:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    call void @store(i32 0, ptr [[LOC]])
+; CHECK-NEXT:    call void @store(i32 1, ptr [[OTHERLOC]])
+; CHECK-NEXT:    [[IV_NEXT]] = add i32 [[IV]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[IV]], 200
+; CHECK-NEXT:    br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i32 [0, %entry], [%iv.next, %loop]
+  call void @store(i32 0, ptr %loc)
+  call void @store(i32 1, ptr %otherloc)
+  %iv.next = add i32 %iv, 1
+  %cmp = icmp slt i32 %iv, 200
+  br i1 %cmp, label %loop, label %exit
+exit:
+  ret void
+}
+
+; hoisted due to pointers not aliasing
+define void @two_pointer_noalias(ptr noalias %loc, ptr noalias %otherloc) {
+; CHECK-LABEL: define void @two_pointer_noalias(
+; CHECK-SAME: ptr noalias [[LOC:%.*]], ptr noalias [[OTHERLOC:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    call void @store(i32 0, ptr [[LOC]])
+; CHECK-NEXT:    call void @store(i32 1, ptr [[OTHERLOC]])
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[IV_NEXT]] = add i32 [[IV]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[IV]], 200
+; CHECK-NEXT:    br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i32 [0, %entry], [%iv.next, %loop]
+  call void @store(i32 0, ptr %loc)
+  call void @store(i32 1, ptr %otherloc)
+  %iv.next = add i32 %iv, 1
+  %cmp = icmp slt i32 %iv, 200
+  br i1 %cmp, label %loop, label %exit
+exit:
+  ret void
+}
+
+; when there's a conflicting read, store call should not be hoisted
+define void @neg_conflicting_read(ptr noalias %loc, ptr noalias %otherloc) {
+; CHECK-LABEL: define void @neg_conflicting_read(
+; CHECK-SAME: ptr noalias [[LOC:%.*]], ptr noalias [[OTHERLOC:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    call void @store(i32 0, ptr [[LOC]])
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    call void @load(i32 0, ptr [[LOC]])
+; CHECK-NEXT:    call void @store(i32 0, ptr [[LOC]])
+; CHECK-NEXT:    [[IV_NEXT]] = add i32 [[IV]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[IV]], 200
+; CHECK-NEXT:    br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    ret void
+;
+entry:
+  call void @store(i32 0, ptr %loc)
+  br label %loop
+loop:
+  %iv = phi i32 [0, %entry], [%iv.next, %loop]
+  call void @load(i32 0, ptr %loc)
+  call void @store(i32 0, ptr %loc)
+  %iv.next = add i32 %iv, 1
+  %cmp = icmp slt i32 %iv, 200
+  br i1 %cmp, label %loop, label %exit
+exit:
+  ret void
+}
+
 define void @neg_lv_value(ptr %loc) {
 ; CHECK-LABEL: define void @neg_lv_value(
 ; CHECK-SAME: ptr [[LOC:%.*]]) {
@@ -374,14 +467,17 @@ exit:
   ret void
 }
 
-define void @neg_not_argmemonly(ptr %loc) {
+; when the call is not argmemonly and its not the only memory access
+; we do not hoist it
+define void @neg_not_argmemonly(ptr %loc, ptr %loc2) {
 ; CHECK-LABEL: define void @neg_not_argmemonly(
-; CHECK-SAME: ptr [[LOC:%.*]]) {
+; CHECK-SAME: ptr [[LOC:%.*]], ptr [[LOC2:%.*]]) {
 ; CHECK-NEXT:  [[ENTRY:.*]]:
 ; CHECK-NEXT:    br label %[[LOOP:.*]]
 ; CHECK:       [[LOOP]]:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
 ; CHECK-NEXT:    call void @not_argmemonly(i32 0, ptr [[LOC]])
+; CHECK-NEXT:    call void @load(i32 0, ptr [[LOC2]])
 ; CHECK-NEXT:    [[IV_NEXT]] = add i32 [[IV]], 1
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[IV]], 200
 ; CHECK-NEXT:    br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]]
@@ -394,6 +490,7 @@ entry:
 loop:
   %iv = phi i32 [0, %entry], [%iv.next, %loop]
   call void @not_argmemonly(i32 0, ptr %loc)
+  call void @load(i32 0, ptr %loc2)
   %iv.next = add i32 %iv, 1
   %cmp = icmp slt i32 %iv, 200
   br i1 %cmp, label %loop, label %exit



More information about the llvm-commits mailing list