[llvm] 99e57f0 - [CodeGenPrepare] Increase the limit on the number of instructions to scan

Momchil Velikov via llvm-commits llvm-commits at lists.llvm.org
Thu Mar 30 06:38:53 PDT 2023


Author: Momchil Velikov
Date: 2023-03-30T14:38:22+01:00
New Revision: 99e57f06c4df56bc821cd6b6e646886e2e1ee851

URL: https://github.com/llvm/llvm-project/commit/99e57f06c4df56bc821cd6b6e646886e2e1ee851
DIFF: https://github.com/llvm/llvm-project/commit/99e57f06c4df56bc821cd6b6e646886e2e1ee851.diff

LOG: [CodeGenPrepare] Increase the limit on the number of instructions to scan

... when finding all memory uses for an address and make it a
parameter.

Now that we have avoided potentially exponential run time of
`FindAllMemoryUses` in D143893. it'd be beneficial to increase the
limit up from 20.

Reviewed By: mkazantsev

Differential Revision: https://reviews.llvm.org/D143894

Change-Id: I3abdf40332ef65e9b2f819ac32ac60e4200ec51d

Added: 
    

Modified: 
    llvm/lib/CodeGen/CodeGenPrepare.cpp
    llvm/test/CodeGen/Generic/addr-use-count.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index db3aa72c1be8d..bd00c9fac9295 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -264,6 +264,10 @@ static cl::opt<unsigned>
     HugeFuncThresholdInCGPP("cgpp-huge-func", cl::init(10000), cl::Hidden,
                             cl::desc("Least BB number of huge function."));
 
+static cl::opt<unsigned>
+    MaxAddressUsersToScan("cgp-max-address-users-to-scan", cl::init(100),
+                          cl::Hidden,
+                          cl::desc("Max number of address users to look at"));
 namespace {
 
 enum ExtType {
@@ -4961,10 +4965,6 @@ static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal,
   return true;
 }
 
-// Max number of memory uses to look at before aborting the search to conserve
-// compile time.
-static constexpr int MaxMemoryUsesToScan = 20;
-
 /// Recursively walk all the uses of I until we find a memory use.
 /// If we find an obviously non-foldable instruction, return true.
 /// Add accessed addresses and types to MemoryUses.
@@ -4972,7 +4972,7 @@ static bool FindAllMemoryUses(
     Instruction *I, SmallVectorImpl<std::pair<Value *, Type *>> &MemoryUses,
     SmallPtrSetImpl<Instruction *> &ConsideredInsts, const TargetLowering &TLI,
     const TargetRegisterInfo &TRI, bool OptSize, ProfileSummaryInfo *PSI,
-    BlockFrequencyInfo *BFI, int &SeenInsts) {
+    BlockFrequencyInfo *BFI, unsigned &SeenInsts) {
   // If we already considered this instruction, we're done.
   if (!ConsideredInsts.insert(I).second)
     return false;
@@ -4985,7 +4985,7 @@ static bool FindAllMemoryUses(
   for (Use &U : I->uses()) {
     // Conservatively return true if we're seeing a large number or a deep chain
     // of users. This avoids excessive compilation times in pathological cases.
-    if (SeenInsts++ >= MaxMemoryUsesToScan)
+    if (SeenInsts++ >= MaxAddressUsersToScan)
       return true;
 
     Instruction *UserI = cast<Instruction>(U.getUser());
@@ -5047,7 +5047,7 @@ static bool FindAllMemoryUses(
     Instruction *I, SmallVectorImpl<std::pair<Value *, Type *>> &MemoryUses,
     const TargetLowering &TLI, const TargetRegisterInfo &TRI, bool OptSize,
     ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) {
-  int SeenInsts = 0;
+  unsigned SeenInsts = 0;
   SmallPtrSet<Instruction *, 16> ConsideredInsts;
   return FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI, TRI, OptSize,
                            PSI, BFI, SeenInsts);

diff  --git a/llvm/test/CodeGen/Generic/addr-use-count.ll b/llvm/test/CodeGen/Generic/addr-use-count.ll
index 825d43259da54..00943b5a58e2b 100644
--- a/llvm/test/CodeGen/Generic/addr-use-count.ll
+++ b/llvm/test/CodeGen/Generic/addr-use-count.ll
@@ -2,7 +2,7 @@
 ; RUN: opt -S -codegenprepare < %s | FileCheck %s
 ; REQUIRES: aarch64-registered-target
 
-; Test that `%addr` is not sunk, since the number of memory uses to scan exceeds the limit.
+; Test that `%addr` is sunk, after we've increased limit on the number of the memory uses to scan.
 
 target triple = "aarch64-linux"
 
@@ -11,30 +11,28 @@ declare void @g(...)
 define void @f(ptr %p) {
 ; CHECK-LABEL: @f(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[ADDR:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 4
-; CHECK-NEXT:    br label [[EXIT:%.*]]
-; CHECK:       exit:
-; CHECK-NEXT:    [[T0:%.*]] = load i32, ptr [[ADDR]], align 4
-; CHECK-NEXT:    [[T1:%.*]] = load i32, ptr [[ADDR]], align 4
-; CHECK-NEXT:    [[T2:%.*]] = load i32, ptr [[ADDR]], align 4
-; CHECK-NEXT:    [[T3:%.*]] = load i32, ptr [[ADDR]], align 4
-; CHECK-NEXT:    [[T4:%.*]] = load i32, ptr [[ADDR]], align 4
-; CHECK-NEXT:    [[T5:%.*]] = load i32, ptr [[ADDR]], align 4
-; CHECK-NEXT:    [[T6:%.*]] = load i32, ptr [[ADDR]], align 4
-; CHECK-NEXT:    [[T7:%.*]] = load i32, ptr [[ADDR]], align 4
-; CHECK-NEXT:    [[T8:%.*]] = load i32, ptr [[ADDR]], align 4
-; CHECK-NEXT:    [[T9:%.*]] = load i32, ptr [[ADDR]], align 4
-; CHECK-NEXT:    [[T10:%.*]] = load i32, ptr [[ADDR]], align 4
-; CHECK-NEXT:    [[T11:%.*]] = load i32, ptr [[ADDR]], align 4
-; CHECK-NEXT:    [[T12:%.*]] = load i32, ptr [[ADDR]], align 4
-; CHECK-NEXT:    [[T13:%.*]] = load i32, ptr [[ADDR]], align 4
-; CHECK-NEXT:    [[T14:%.*]] = load i32, ptr [[ADDR]], align 4
-; CHECK-NEXT:    [[T15:%.*]] = load i32, ptr [[ADDR]], align 4
-; CHECK-NEXT:    [[T16:%.*]] = load i32, ptr [[ADDR]], align 4
-; CHECK-NEXT:    [[T17:%.*]] = load i32, ptr [[ADDR]], align 4
-; CHECK-NEXT:    [[T18:%.*]] = load i32, ptr [[ADDR]], align 4
-; CHECK-NEXT:    [[ADDR_1:%.*]] = getelementptr i8, ptr [[ADDR]], i32 4
-; CHECK-NEXT:    [[T19:%.*]] = load i32, ptr [[ADDR_1]], align 4
+; CHECK-NEXT:    [[SUNKADDR:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 4
+; CHECK-NEXT:    [[T0:%.*]] = load i32, ptr [[SUNKADDR]], align 4
+; CHECK-NEXT:    [[T1:%.*]] = load i32, ptr [[SUNKADDR]], align 4
+; CHECK-NEXT:    [[T2:%.*]] = load i32, ptr [[SUNKADDR]], align 4
+; CHECK-NEXT:    [[T3:%.*]] = load i32, ptr [[SUNKADDR]], align 4
+; CHECK-NEXT:    [[T4:%.*]] = load i32, ptr [[SUNKADDR]], align 4
+; CHECK-NEXT:    [[T5:%.*]] = load i32, ptr [[SUNKADDR]], align 4
+; CHECK-NEXT:    [[T6:%.*]] = load i32, ptr [[SUNKADDR]], align 4
+; CHECK-NEXT:    [[T7:%.*]] = load i32, ptr [[SUNKADDR]], align 4
+; CHECK-NEXT:    [[T8:%.*]] = load i32, ptr [[SUNKADDR]], align 4
+; CHECK-NEXT:    [[T9:%.*]] = load i32, ptr [[SUNKADDR]], align 4
+; CHECK-NEXT:    [[T10:%.*]] = load i32, ptr [[SUNKADDR]], align 4
+; CHECK-NEXT:    [[T11:%.*]] = load i32, ptr [[SUNKADDR]], align 4
+; CHECK-NEXT:    [[T12:%.*]] = load i32, ptr [[SUNKADDR]], align 4
+; CHECK-NEXT:    [[T13:%.*]] = load i32, ptr [[SUNKADDR]], align 4
+; CHECK-NEXT:    [[T14:%.*]] = load i32, ptr [[SUNKADDR]], align 4
+; CHECK-NEXT:    [[T15:%.*]] = load i32, ptr [[SUNKADDR]], align 4
+; CHECK-NEXT:    [[T16:%.*]] = load i32, ptr [[SUNKADDR]], align 4
+; CHECK-NEXT:    [[T17:%.*]] = load i32, ptr [[SUNKADDR]], align 4
+; CHECK-NEXT:    [[T18:%.*]] = load i32, ptr [[SUNKADDR]], align 4
+; CHECK-NEXT:    [[SUNKADDR1:%.*]] = getelementptr i8, ptr [[P]], i64 8
+; CHECK-NEXT:    [[T19:%.*]] = load i32, ptr [[SUNKADDR1]], align 4
 ; CHECK-NEXT:    call void @g(i32 [[T0]], i32 [[T1]], i32 [[T2]], i32 [[T3]], i32 [[T4]], i32 [[T5]], i32 [[T6]], i32 [[T7]], i32 [[T8]], i32 [[T9]], i32 [[T10]], i32 [[T11]], i32 [[T12]], i32 [[T13]], i32 [[T14]], i32 [[T15]], i32 [[T16]], i32 [[T17]], i32 [[T18]], i32 [[T19]])
 ; CHECK-NEXT:    ret void
 ;


        


More information about the llvm-commits mailing list