[llvm] 5344d8e - [CodeGenPrepare] Estimate liveness of loop invariants when checking for address folding profitability

Mon Apr 24 02:26:01 PDT 2023

Author: Momchil Velikov
Date: 2023-04-24T10:21:36+01:00
New Revision: 5344d8e10bb7d8672d4bfae8adb010465470d51b

URL: https://github.com/llvm/llvm-project/commit/5344d8e10bb7d8672d4bfae8adb010465470d51b
DIFF: https://github.com/llvm/llvm-project/commit/5344d8e10bb7d8672d4bfae8adb010465470d51b.diff

LOG: [CodeGenPrepare] Estimate liveness of loop invariants when checking for address folding profitability

When checking the profitability of folding an address computation
into a memory instruction, the compiler tries to determine the liveness
of the values, comprising the address, at the point of the memory instruction.
This patch improves on the live variable estimates by including
the loop invariants which are references in the loop body.

Reviewed By: dmgreen

Differential Revision: https://reviews.llvm.org/D143897

Added: 
    llvm/test/CodeGen/AArch64/gep-sink-loop-inv-live.ll

Modified: 
    llvm/lib/CodeGen/CodeGenPrepare.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index c7831827d11e8..96fe94f8e02a1 100644

--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -268,6 +268,10 @@ static cl::opt<unsigned>
     MaxAddressUsersToScan("cgp-max-address-users-to-scan", cl::init(100),
                           cl::Hidden,
                           cl::desc("Max number of address users to look at"));
+
+static cl::opt<unsigned> MaxLoopInvUsersToScan(
+    "cgp-max-loop-inv-users-to-scan", cl::init(20), cl::Hidden,
+    cl::desc("Max number of loop invariant users to look at"));
 namespace {
 
 enum ExtType {
@@ -5060,6 +5064,19 @@ static bool FindAllMemoryUses(
                            PSI, BFI, SeenInsts);
 }
 
+static bool isUsedInLoop(const Value *V, const Loop *L) {
+  unsigned N = 0;
+
+  for (const Use &U : V->uses()) {
+    if (++N > MaxLoopInvUsersToScan)
+      break;
+    const Instruction *UserI = cast<Instruction>(U.getUser());
+    if (L->contains(UserI->getParent()))
+      return true;
+  }
+
+  return false;
+}
 
 /// Return true if Val is already known to be live at the use site that we're
 /// folding it into. If so, there is no cost to include it in the addressing
@@ -5083,10 +5100,17 @@ bool AddressingModeMatcher::valueAlreadyLiveAtInst(Value *Val,
     if (AI->isStaticAlloca())
       return true;
 
+  // If the value is loop invariant and is used in the loop which contains the
+  // memory instruction, it's live.
+  BasicBlock *BB = MemoryInst->getParent();
+  if (Loop *L = LI.getLoopFor(BB);
+      L && L->isLoopInvariant(Val) && isUsedInLoop(Val, L))
+    return true;
+
   // Check to see if this value is already used in the memory instruction's
   // block.  If so, it's already live into the block at the very least, so we
   // can reasonably fold it.
-  return Val->isUsedInBasicBlock(MemoryInst->getParent());
+  return Val->isUsedInBasicBlock(BB);
 }
 
 /// It is possible for the addressing mode of the machine to fold the specified

diff  --git a/llvm/test/CodeGen/AArch64/gep-sink-loop-inv-live.ll b/llvm/test/CodeGen/AArch64/gep-sink-loop-inv-live.ll
new file mode 100644
index 0000000000000..8d7a38028985a
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/gep-sink-loop-inv-live.ll
@@ -0,0 +1,62 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -codegenprepare < %s | FileCheck %s
+target triple = "aarch64-linux"
+
+declare void @use(...)
+declare i64 @next(i64)
+
+define void @f(ptr %a, i64 %k, i64 %n, ptr %q) {
+; CHECK-LABEL: @f(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[COND:%.*]]
+; CHECK:       cond:
+; CHECK-NEXT:    [[I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[J:%.*]], [[IF_THEN:%.*]] ], [ [[I]], [[IF_ELSE:%.*]] ]
+; CHECK-NEXT:    [[P:%.*]] = getelementptr i32, ptr [[A:%.*]], i64 [[I]]
+; CHECK-NEXT:    [[C0:%.*]] = icmp ult i64 [[I]], [[N:%.*]]
+; CHECK-NEXT:    br i1 [[C0]], label [[LOOP:%.*]], label [[EXIT:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[J]] = call i64 @next(i64 [[I]])
+; CHECK-NEXT:    [[SUNKADDR:%.*]] = mul i64 [[I]], 4
+; CHECK-NEXT:    [[SUNKADDR1:%.*]] = getelementptr i8, ptr [[A]], i64 [[SUNKADDR]]
+; CHECK-NEXT:    [[V:%.*]] = load i32, ptr [[SUNKADDR1]], align 4
+; CHECK-NEXT:    [[C1:%.*]] = icmp slt i32 [[V]], 0
+; CHECK-NEXT:    br i1 [[C1]], label [[IF_THEN]], label [[IF_ELSE]]
+; CHECK:       if.then:
+; CHECK-NEXT:    store ptr [[P]], ptr [[Q:%.*]], align 8
+; CHECK-NEXT:    br label [[COND]]
+; CHECK:       if.else:
+; CHECK-NEXT:    call void @use(ptr [[A]])
+; CHECK-NEXT:    br label [[COND]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %cond
+
+cond:
+  %i = phi i64 [0, %entry], [%i.next, %next]
+  %p = getelementptr i32, ptr %a, i64 %i
+  %c0 = icmp ult i64 %i, %n
+  br i1 %c0, label %loop, label %exit
+
+loop:
+  %j = call i64 @next(i64 %i)
+  %v = load i32, ptr %p
+  %c1 = icmp slt i32 %v, 0
+  br i1 %c1, label %if.then, label %if.else
+
+if.then:
+  store ptr %p, ptr %q
+  br label %next
+
+if.else:
+  call void @use(ptr %a)
+  br label %next
+
+next:
+  %i.next = phi i64 [%j, %if.then], [%i, %if.else]
+  br label %cond
+
+exit:
+  ret void
+}