[llvm] [GVN] Permit load PRE to happen in more cases (PR #76063)

via llvm-commits llvm-commits at lists.llvm.org
Wed Dec 20 07:22:52 PST 2023


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-transforms

Author: David Sherwood (david-arm)

<details>
<summary>Changes</summary>

There is code in PerformLoadPRE that bails out if the number of
predecessors where we have to insert a load is greater than one:

  // If we need to insert new load in multiple predecessors, reject it.
  // FIXME: If we could restructure the CFG, we could make a common pred with
  // all the preds that don't have an available Load and insert a new load into
  // that one block.
  if (NumInsertPreds > 1)
      return false;

However, this is quite restrictive because NumInsertPreds is the sum

  unsigned NumInsertPreds = PredLoads.size() + CriticalEdgePredSplit.size();

where PredLoads and CriticalEdgePredSplit are the lists of available and
unavailable loads, respectively. Therefore, with just one unavailable
load we refuse to perform load PRE if there are other predecessors with
available loads. I've changed this check above to be less conservative:

  if (CriticalEdgePredSplit.size() > 1)
      return false;

As a result we can perform load PRE in many more cases. On the
AArch64 neoverse-v1 machine we see 1% gains in both SPEC2017 perlbench
and omnetpp benchmarks. On neoverse-n1 we see 1% and 3.5% gains in
perlbench and omnetpp, respectively. I only observed one regression with
exchange2, but this is related to function specialisation and will be
fixed separately.

---
Full diff: https://github.com/llvm/llvm-project/pull/76063.diff


3 Files Affected:

- (modified) llvm/lib/Transforms/Scalar/GVN.cpp (+1-1) 
- (modified) llvm/test/Transforms/GVN/PRE/load-pre-licm.ll (+97-1) 
- (modified) llvm/test/Transforms/GVN/condprop.ll (+12-8) 


``````````diff
diff --git a/llvm/lib/Transforms/Scalar/GVN.cpp b/llvm/lib/Transforms/Scalar/GVN.cpp
index 5e58af0edc1556..02d7315115c4e5 100644
--- a/llvm/lib/Transforms/Scalar/GVN.cpp
+++ b/llvm/lib/Transforms/Scalar/GVN.cpp
@@ -1625,7 +1625,7 @@ bool GVNPass::PerformLoadPRE(LoadInst *Load, AvailValInBlkVect &ValuesPerBlock,
   // FIXME: If we could restructure the CFG, we could make a common pred with
   // all the preds that don't have an available Load and insert a new load into
   // that one block.
-  if (NumInsertPreds > 1)
+  if (CriticalEdgePredSplit.size() > 1)
       return false;
 
   // Now we know where we will insert load. We must ensure that it is safe
diff --git a/llvm/test/Transforms/GVN/PRE/load-pre-licm.ll b/llvm/test/Transforms/GVN/PRE/load-pre-licm.ll
index c52f46b4f63ee9..9ec9568b3a5bce 100644
--- a/llvm/test/Transforms/GVN/PRE/load-pre-licm.ll
+++ b/llvm/test/Transforms/GVN/PRE/load-pre-licm.ll
@@ -130,11 +130,13 @@ define i32 @test3(i1 %cnd, ptr %p) {
 ; CHECK:       header:
 ; CHECK-NEXT:    br i1 [[CND:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
 ; CHECK:       bb1:
+; CHECK-NEXT:    [[V1_PRE1:%.*]] = load i32, ptr [[P:%.*]], align 4
 ; CHECK-NEXT:    br label [[MERGE:%.*]]
 ; CHECK:       bb2:
+; CHECK-NEXT:    [[V1_PRE:%.*]] = load i32, ptr [[P]], align 4
 ; CHECK-NEXT:    br label [[MERGE]]
 ; CHECK:       merge:
-; CHECK-NEXT:    [[V1:%.*]] = load i32, ptr [[P:%.*]], align 4
+; CHECK-NEXT:    [[V1:%.*]] = phi i32 [ [[V1_PRE]], [[BB2]] ], [ [[V1_PRE1]], [[BB1]] ]
 ; CHECK-NEXT:    call void @hold(i32 [[V1]])
 ; CHECK-NEXT:    br label [[HEADER]]
 ;
@@ -282,3 +284,97 @@ header:
   call void @hold(i32 %v1)
   br label %header
 }
+
+
+; In block Z one load is unavailable (via entry block), whereas via other 2
+; predecessors the loads are available.
+define i8 @test7a(i1 %c1, ptr %p, i8 %i, i8 %j) {
+; CHECK-LABEL: @test7a(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[C1:%.*]], label [[A:%.*]], label [[ENTRY_Z_CRIT_EDGE:%.*]]
+; CHECK:       entry.Z_crit_edge:
+; CHECK-NEXT:    [[PK_PHI_TRANS_INSERT:%.*]] = getelementptr i8, ptr [[P:%.*]], i8 [[I:%.*]]
+; CHECK-NEXT:    [[Z_PRE1:%.*]] = load i8, ptr [[PK_PHI_TRANS_INSERT]], align 1
+; CHECK-NEXT:    br label [[Z:%.*]]
+; CHECK:       A:
+; CHECK-NEXT:    [[PI:%.*]] = getelementptr i8, ptr [[P]], i8 [[I]]
+; CHECK-NEXT:    [[PJ:%.*]] = getelementptr i8, ptr [[P]], i8 [[J:%.*]]
+; CHECK-NEXT:    [[X:%.*]] = load i8, ptr [[PI]], align 1
+; CHECK-NEXT:    [[Y:%.*]] = load i8, ptr [[PJ]], align 1
+; CHECK-NEXT:    [[C2:%.*]] = icmp slt i8 [[X]], [[Y]]
+; CHECK-NEXT:    br i1 [[C2]], label [[Z]], label [[B:%.*]]
+; CHECK:       B:
+; CHECK-NEXT:    br label [[Z]]
+; CHECK:       Z:
+; CHECK-NEXT:    [[Z:%.*]] = phi i8 [ [[Z_PRE1]], [[ENTRY_Z_CRIT_EDGE]] ], [ [[X]], [[A]] ], [ [[Y]], [[B]] ]
+; CHECK-NEXT:    [[K:%.*]] = phi i8 [ [[I]], [[ENTRY_Z_CRIT_EDGE]] ], [ [[I]], [[A]] ], [ [[J]], [[B]] ]
+; CHECK-NEXT:    [[PK:%.*]] = getelementptr i8, ptr [[P]], i8 [[K]]
+; CHECK-NEXT:    ret i8 [[Z]]
+;
+
+entry:
+  br i1 %c1, label %A, label %Z
+
+A:
+  %pi = getelementptr i8, ptr %p, i8 %i
+  %pj = getelementptr i8, ptr %p, i8 %j
+  %x = load i8, ptr %pi
+  %y = load i8, ptr %pj
+  %c2 = icmp slt i8 %x, %y
+  br i1 %c2, label %Z, label %B
+
+B:
+  br label %Z
+
+Z:
+  %k = phi i8 [ %i, %entry ], [%i, %A], [ %j, %B ]
+  %pk = getelementptr i8, ptr %p, i8 %k
+  %z = load i8, ptr %pk
+  ret i8 %z
+}
+
+
+
+; In block Z two loads are unavailable (via entry and B blocks), whereas via
+; other predecessor (A) the load is available.
+define i8 @test7b(i1 %c1, ptr %p, i8 %i, i8 %j) {
+; CHECK-LABEL: @test7b(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[C1:%.*]], label [[A:%.*]], label [[ENTRY_Z_CRIT_EDGE:%.*]]
+; CHECK:       entry.Z_crit_edge:
+; CHECK-NEXT:    [[PK_PHI_TRANS_INSERT1:%.*]] = getelementptr i8, ptr [[P:%.*]], i8 [[I:%.*]]
+; CHECK-NEXT:    [[Z_PRE2:%.*]] = load i8, ptr [[PK_PHI_TRANS_INSERT1]], align 1
+; CHECK-NEXT:    br label [[Z:%.*]]
+; CHECK:       A:
+; CHECK-NEXT:    [[PI:%.*]] = getelementptr i8, ptr [[P]], i8 [[I]]
+; CHECK-NEXT:    [[X:%.*]] = load i8, ptr [[PI]], align 1
+; CHECK-NEXT:    [[C2:%.*]] = icmp slt i8 [[X]], 3
+; CHECK-NEXT:    br i1 [[C2]], label [[Z]], label [[B:%.*]]
+; CHECK:       B:
+; CHECK-NEXT:    [[PK_PHI_TRANS_INSERT:%.*]] = getelementptr i8, ptr [[P]], i8 [[J:%.*]]
+; CHECK-NEXT:    [[Z_PRE:%.*]] = load i8, ptr [[PK_PHI_TRANS_INSERT]], align 1
+; CHECK-NEXT:    br label [[Z]]
+; CHECK:       Z:
+; CHECK-NEXT:    [[Z:%.*]] = phi i8 [ [[Z_PRE2]], [[ENTRY_Z_CRIT_EDGE]] ], [ [[X]], [[A]] ], [ [[Z_PRE]], [[B]] ]
+; CHECK-NEXT:    [[K:%.*]] = phi i8 [ [[I]], [[ENTRY_Z_CRIT_EDGE]] ], [ [[I]], [[A]] ], [ [[J]], [[B]] ]
+; CHECK-NEXT:    [[PK:%.*]] = getelementptr i8, ptr [[P]], i8 [[K]]
+; CHECK-NEXT:    ret i8 [[Z]]
+;
+entry:
+  br i1 %c1, label %A, label %Z
+
+A:
+  %pi = getelementptr i8, ptr %p, i8 %i
+  %x = load i8, ptr %pi
+  %c2 = icmp slt i8 %x, 3
+  br i1 %c2, label %Z, label %B
+
+B:
+  br label %Z
+
+Z:
+  %k = phi i8 [ %i, %entry ], [%i, %A], [ %j, %B ]
+  %pk = getelementptr i8, ptr %p, i8 %k
+  %z = load i8, ptr %pk
+  ret i8 %z
+}
diff --git a/llvm/test/Transforms/GVN/condprop.ll b/llvm/test/Transforms/GVN/condprop.ll
index 6b1e4d10601099..f93c50201942bf 100644
--- a/llvm/test/Transforms/GVN/condprop.ll
+++ b/llvm/test/Transforms/GVN/condprop.ll
@@ -214,11 +214,11 @@ define void @test4(i1 %b, i32 %x) {
 ; CHECK-NEXT:    br i1 [[B:%.*]], label [[SW:%.*]], label [[CASE3:%.*]]
 ; CHECK:       sw:
 ; CHECK-NEXT:    switch i32 [[X:%.*]], label [[DEFAULT:%.*]] [
-; CHECK-NEXT:    i32 0, label [[CASE0:%.*]]
-; CHECK-NEXT:    i32 1, label [[CASE1:%.*]]
-; CHECK-NEXT:    i32 2, label [[CASE0]]
-; CHECK-NEXT:    i32 3, label [[CASE3]]
-; CHECK-NEXT:    i32 4, label [[DEFAULT]]
+; CHECK-NEXT:      i32 0, label [[CASE0:%.*]]
+; CHECK-NEXT:      i32 1, label [[CASE1:%.*]]
+; CHECK-NEXT:      i32 2, label [[CASE0]]
+; CHECK-NEXT:      i32 3, label [[CASE3]]
+; CHECK-NEXT:      i32 4, label [[DEFAULT]]
 ; CHECK-NEXT:    ]
 ; CHECK:       default:
 ; CHECK-NEXT:    call void @bar(i32 [[X]])
@@ -570,12 +570,16 @@ define void @test14(ptr %ptr1, ptr noalias %ptr2) {
 ; CHECK-NEXT:    br label [[THEN]]
 ; CHECK:       then:
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq ptr [[GEP2]], [[PTR2:%.*]]
-; CHECK-NEXT:    br i1 [[CMP]], label [[LOOP_END]], label [[IF2:%.*]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[THEN_LOOP_END_CRIT_EDGE:%.*]], label [[IF2:%.*]]
+; CHECK:       then.loop.end_crit_edge:
+; CHECK-NEXT:    [[VAL3_PRE2:%.*]] = load i32, ptr [[PTR2]], align 4
+; CHECK-NEXT:    br label [[LOOP_END]]
 ; CHECK:       if2:
+; CHECK-NEXT:    [[VAL3_PRE:%.*]] = load i32, ptr [[GEP2]], align 4
 ; CHECK-NEXT:    br label [[LOOP_END]]
 ; CHECK:       loop.end:
-; CHECK-NEXT:    [[PHI3:%.*]] = phi ptr [ [[PTR2]], [[THEN]] ], [ [[PTR1]], [[IF2]] ]
-; CHECK-NEXT:    [[VAL3]] = load i32, ptr [[GEP2]], align 4
+; CHECK-NEXT:    [[VAL3]] = phi i32 [ [[VAL3_PRE2]], [[THEN_LOOP_END_CRIT_EDGE]] ], [ [[VAL3_PRE]], [[IF2]] ]
+; CHECK-NEXT:    [[PHI3:%.*]] = phi ptr [ [[PTR2]], [[THEN_LOOP_END_CRIT_EDGE]] ], [ [[PTR1]], [[IF2]] ]
 ; CHECK-NEXT:    store i32 [[VAL3]], ptr [[PHI3]], align 4
 ; CHECK-NEXT:    br i1 undef, label [[LOOP]], label [[IF1]]
 ;

``````````

</details>


https://github.com/llvm/llvm-project/pull/76063


More information about the llvm-commits mailing list