[llvm] [LoopRotate] Set loop back edge weight to not less than exit weight (PR #86496)

Haohai Wen via llvm-commits llvm-commits at lists.llvm.org
Thu Mar 28 00:10:03 PDT 2024


https://github.com/HaohaiWen updated https://github.com/llvm/llvm-project/pull/86496

>From 31d4ce6310fedb2bcac53fe17f59be4dd4a469e9 Mon Sep 17 00:00:00 2001
From: Haohai Wen <haohai.wen at intel.com>
Date: Mon, 25 Mar 2024 17:20:22 +0800
Subject: [PATCH 1/3] [LoopRotate] Add test to track update for inaccurate
 branch weight

Branch weight from sample-based PGO may be not inaccurate due to
sampling. This test tracks such case where updateBranchWeights wraps
unsigned.
---
 .../LoopRotate/update-branch-weights.ll       | 42 +++++++++++++++++++
 1 file changed, 42 insertions(+)

diff --git a/llvm/test/Transforms/LoopRotate/update-branch-weights.ll b/llvm/test/Transforms/LoopRotate/update-branch-weights.ll
index 5d742b64e0adbf..acb2038d17bb83 100644
--- a/llvm/test/Transforms/LoopRotate/update-branch-weights.ll
+++ b/llvm/test/Transforms/LoopRotate/update-branch-weights.ll
@@ -232,6 +232,46 @@ loop_exit:
   ret void
 }
 
+; BFI_BEFORE-LABEL: block-frequency-info: func6_inaccurate_branch_weight
+; BFI_BEFORE: - entry: {{.*}} count = 1024
+; BFI_BEFORE: - loop_header: {{.*}} count = 2047
+; BFI_BEFORE: - loop_body: {{.*}} count = 1023
+; BFI_BEFORE: - loop_exit: {{.*}} count = 1024
+
+; BFI_AFTER-LABEL: block-frequency-info: func6_inaccurate_branch_weight
+; BFI_AFTER: - entry: {{.*}} count = 1024
+; BFI_AFTER: - loop_body: {{.*}} count = 4294967296
+; BFI_AFTER: - loop_exit: {{.*}} count = 1024
+
+; IR-LABEL: define void @func6_inaccurate_branch_weight(
+; IR: entry:
+; IR:   br label %loop_body
+; IR: loop_body:
+; IR:   br i1 %cmp, label %loop_body, label %loop_exit, !prof [[PROF_FUNC6_0:![0-9]+]]
+; IR: loop_exit:
+; IR:   ret void
+
+; Branch weight from sample-based PGO may be inaccurate due to sampling.
+; Count for loop_body in following case should be not less than loop_exit.
+; However this may not hold for Sample-based PGO.
+define void @func6_inaccurate_branch_weight() !prof !3 {
+entry:
+  br label %loop_header
+
+loop_header:
+  %i = phi i32 [0, %entry], [%i_inc, %loop_body]
+  %cmp = icmp slt i32 %i, 2
+  br i1 %cmp, label %loop_body, label %loop_exit, !prof !9
+
+loop_body:
+  store volatile i32 %i, ptr @g, align 4
+  %i_inc = add i32 %i, 1
+  br label %loop_header
+
+loop_exit:
+  ret void
+}
+
 !0 = !{!"function_entry_count", i64 1}
 !1 = !{!"branch_weights", i32 1000, i32 1}
 !2 = !{!"branch_weights", i32 3000, i32 1000}
@@ -241,6 +281,7 @@ loop_exit:
 !6 = !{!"branch_weights", i32 0, i32 1}
 !7 = !{!"branch_weights", i32 1, i32 0}
 !8 = !{!"branch_weights", i32 0, i32 0}
+!9 = !{!"branch_weights", i32 1023, i32 1024}
 
 ; IR: [[PROF_FUNC0_0]] = !{!"branch_weights", i32 2000, i32 1000}
 ; IR: [[PROF_FUNC0_1]] = !{!"branch_weights", i32 999, i32 1}
@@ -251,3 +292,4 @@ loop_exit:
 ; IR: [[PROF_FUNC3_0]] = !{!"branch_weights", i32 0, i32 1}
 ; IR: [[PROF_FUNC4_0]] = !{!"branch_weights", i32 1, i32 0}
 ; IR: [[PROF_FUNC5_0]] = !{!"branch_weights", i32 0, i32 0}
+; IR: [[PROF_FUNC6_0]] = !{!"branch_weights", i32 -1, i32 1024}

>From 2b3c4a2afa0c05ec580c0474173bd483dfa5ea09 Mon Sep 17 00:00:00 2001
From: Haohai Wen <haohai.wen at intel.com>
Date: Mon, 25 Mar 2024 20:20:31 +0800
Subject: [PATCH 2/3] [LoopRotate] Set loop back edge weight to not less than
 exit weight

Branch weight from sample-based PGO may be not inaccurate due to
sampling. If the loop body must be executed, then origin loop back
edge weight must be not less than exit weight.
---
 llvm/lib/Transforms/Utils/LoopRotationUtils.cpp        | 10 ++++++++++
 .../Transforms/LoopRotate/update-branch-weights.ll     |  4 ++--
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp b/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
index bc671171137199..dd31cbd8376c4b 100644
--- a/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
@@ -347,9 +347,19 @@ static void updateBranchWeights(BranchInst &PreHeaderBI, BranchInst &LoopBI,
         // probabilities as if there are only 0-trip and 1-trip cases.
         ExitWeight0 = OrigLoopExitWeight - OrigLoopBackedgeWeight;
       }
+    } else {
+      if (OrigLoopExitWeight > OrigLoopBackedgeWeight) {
+        LLVM_DEBUG(
+            dbgs() << "WARNING: Bad loop back edge weight. Adjust it from "
+                   << OrigLoopBackedgeWeight << " to " << OrigLoopExitWeight
+                   << "\n");
+        OrigLoopBackedgeWeight = OrigLoopExitWeight;
+      }
     }
+    assert(OrigLoopExitWeight >= ExitWeight0 && "Bad branch weight");
     ExitWeight1 = OrigLoopExitWeight - ExitWeight0;
     EnterWeight = ExitWeight1;
+    assert(OrigLoopBackedgeWeight >= EnterWeight && "Bad branch weight");
     LoopBackWeight = OrigLoopBackedgeWeight - EnterWeight;
   } else if (OrigLoopExitWeight == 0) {
     if (OrigLoopBackedgeWeight == 0) {
diff --git a/llvm/test/Transforms/LoopRotate/update-branch-weights.ll b/llvm/test/Transforms/LoopRotate/update-branch-weights.ll
index acb2038d17bb83..9a1f36ec5ff2be 100644
--- a/llvm/test/Transforms/LoopRotate/update-branch-weights.ll
+++ b/llvm/test/Transforms/LoopRotate/update-branch-weights.ll
@@ -240,7 +240,7 @@ loop_exit:
 
 ; BFI_AFTER-LABEL: block-frequency-info: func6_inaccurate_branch_weight
 ; BFI_AFTER: - entry: {{.*}} count = 1024
-; BFI_AFTER: - loop_body: {{.*}} count = 4294967296
+; BFI_AFTER: - loop_body: {{.*}} count = 1024
 ; BFI_AFTER: - loop_exit: {{.*}} count = 1024
 
 ; IR-LABEL: define void @func6_inaccurate_branch_weight(
@@ -292,4 +292,4 @@ loop_exit:
 ; IR: [[PROF_FUNC3_0]] = !{!"branch_weights", i32 0, i32 1}
 ; IR: [[PROF_FUNC4_0]] = !{!"branch_weights", i32 1, i32 0}
 ; IR: [[PROF_FUNC5_0]] = !{!"branch_weights", i32 0, i32 0}
-; IR: [[PROF_FUNC6_0]] = !{!"branch_weights", i32 -1, i32 1024}
+; IR: [[PROF_FUNC6_0]] = !{!"branch_weights", i32 0, i32 1024}

>From db9a07872aa48a30d00a2dea7003c58b29f93335 Mon Sep 17 00:00:00 2001
From: Haohai Wen <haohai.wen at intel.com>
Date: Thu, 28 Mar 2024 15:09:39 +0800
Subject: [PATCH 3/3] Address comment

---
 llvm/lib/Transforms/Utils/LoopRotationUtils.cpp | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp b/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
index dd31cbd8376c4b..0f55af3b6eddf8 100644
--- a/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
@@ -348,13 +348,13 @@ static void updateBranchWeights(BranchInst &PreHeaderBI, BranchInst &LoopBI,
         ExitWeight0 = OrigLoopExitWeight - OrigLoopBackedgeWeight;
       }
     } else {
-      if (OrigLoopExitWeight > OrigLoopBackedgeWeight) {
-        LLVM_DEBUG(
-            dbgs() << "WARNING: Bad loop back edge weight. Adjust it from "
-                   << OrigLoopBackedgeWeight << " to " << OrigLoopExitWeight
-                   << "\n");
+      // Theoretically, if the loop body must be executed at least once, the
+      // backedge count must be not less than exit count. However the branch
+      // weight collected by sampling-based PGO may be not very accurate due to
+      // sampling. Therefore this workaround is required here to avoid underflow
+      // of unsigned in following update of branch weight.
+      if (OrigLoopExitWeight > OrigLoopBackedgeWeight)
         OrigLoopBackedgeWeight = OrigLoopExitWeight;
-      }
     }
     assert(OrigLoopExitWeight >= ExitWeight0 && "Bad branch weight");
     ExitWeight1 = OrigLoopExitWeight - ExitWeight0;



More information about the llvm-commits mailing list