[llvm] [unroll-and-jam] Document dependency patterns in dependencies.ll (NFC) (PR #156577)

Sebastian Pop via llvm-commits llvm-commits at lists.llvm.org
Tue Sep 2 20:29:05 PDT 2025


https://github.com/sebpop created https://github.com/llvm/llvm-project/pull/156577

Add detailed comments explaining each function's memory access patterns and why they should/shouldn't be unroll-and-jammed:

- fore_aft_*: Dependencies between fore block and aft block
- fore_sub_*: Dependencies between fore block and sub block
- sub_aft_*: Dependencies between sub block and aft block
- sub_sub_*: Dependencies within sub block

- *_less: Backward dependency (i-1) - safe for fore/aft, fore/sub, sub/aft; unsafe for sub/sub due to jamming conflicts
- *_eq: Same iteration dependency (i+0) - safe due to preserved execution order
- *_more: Forward dependency (i+1) - unsafe due to write-after-write races between unrolled iterations, except sub/sub case creates conflicts

>From b18f55b4644d337d49fb6daa7970f7a01d06463f Mon Sep 17 00:00:00 2001
From: Sebastian Pop <spop at nvidia.com>
Date: Tue, 2 Sep 2025 21:23:27 -0500
Subject: [PATCH] [unroll-and-jam] Document dependency patterns in
 dependencies.ll (NFC)

Add detailed comments explaining each function's memory access patterns
and why they should/shouldn't be unroll-and-jammed:

- fore_aft_*: Dependencies between fore block and aft block
- fore_sub_*: Dependencies between fore block and sub block
- sub_aft_*: Dependencies between sub block and aft block
- sub_sub_*: Dependencies within sub block

- *_less: Backward dependency (i-1) - safe for fore/aft, fore/sub, sub/aft;
  unsafe for sub/sub due to jamming conflicts
- *_eq: Same iteration dependency (i+0) - safe due to preserved execution order
- *_more: Forward dependency (i+1) - unsafe due to write-after-write races
  between unrolled iterations, except sub/sub case creates conflicts
---
 .../LoopUnrollAndJam/dependencies.ll          | 106 ++++++++++++++++++
 1 file changed, 106 insertions(+)

diff --git a/llvm/test/Transforms/LoopUnrollAndJam/dependencies.ll b/llvm/test/Transforms/LoopUnrollAndJam/dependencies.ll
index 26fb0f9a33da0..8dfe4b1dbf3af 100644
--- a/llvm/test/Transforms/LoopUnrollAndJam/dependencies.ll
+++ b/llvm/test/Transforms/LoopUnrollAndJam/dependencies.ll
@@ -8,6 +8,15 @@ target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
 ; CHECK: %j.1 = phi
 ; CHECK: %j.2 = phi
 ; CHECK: %j.3 = phi
+;
+; fore_aft_less SHOULD be unroll-and-jammed (count=4) as it's safe.
+; Memory accesses:
+;   - Fore block: A[i] = 1      (write in outer loop before inner)
+;   - Aft block:  A[i-1] = sum  (write in outer loop after inner)
+; No dependency conflict: The fore block write A[i] and aft block write A[i-1]
+; access different array elements, so unrolling the outer loop and jamming the
+; inner loop is safe. The backward dependency (i-1) doesn't create conflicts
+; between different unrolled iterations.
 define void @fore_aft_less(ptr noalias nocapture %A, i32 %N, ptr noalias nocapture readonly %B) {
 entry:
   %cmp = icmp sgt i32 %N, 0
@@ -48,6 +57,15 @@ cleanup:
 ; CHECK: %j.1 = phi
 ; CHECK: %j.2 = phi
 ; CHECK: %j.3 = phi
+;
+; fore_aft_eq SHOULD be unroll-and-jammed (count=4) as it's safe.
+; Memory accesses:
+;   - Fore block: A[i] = 1    (write in outer loop before inner)
+;   - Aft block:  A[i] = sum  (write in outer loop after inner)
+; Dependency conflict: Both fore and aft blocks write to A[i], creating a
+; write-after-write (WAW) dependency. However, this is safe for unroll-and-jam
+; because the aft block write always happens after the fore block write in
+; the same iteration, preserving the original execution order.
 define void @fore_aft_eq(ptr noalias nocapture %A, i32 %N, ptr noalias nocapture readonly %B) {
 entry:
   %cmp = icmp sgt i32 %N, 0
@@ -86,6 +104,15 @@ cleanup:
 ; CHECK-LABEL: fore_aft_more
 ; CHECK: %j = phi
 ; CHECK-NOT: %j.1 = phi
+;
+; fore_aft_more should NOT be unroll-and-jammed due to a dependency violation.
+; Memory accesses:
+;   - Fore block: A[i] = 1      (write in outer loop before inner)
+;   - Aft block:  A[i+1] = sum  (write in outer loop after inner)
+; Dependency conflict: The fore block writes A[i] and aft block writes A[i+1].
+; When unroll-and-jamming, iteration i's aft block writes A[i+1] which conflicts
+; with iteration i+1's fore block write to A[i+1], creating a write-after-write
+; race condition that violates the original sequential semantics.
 define void @fore_aft_more(ptr noalias nocapture %A, i32 %N, ptr noalias nocapture readonly %B) {
 entry:
   %cmp = icmp sgt i32 %N, 0
@@ -126,6 +153,14 @@ cleanup:
 ; CHECK: %j.1 = phi
 ; CHECK: %j.2 = phi
 ; CHECK: %j.3 = phi
+;
+; fore_sub_less SHOULD be unroll-and-jammed (count=4) as it's safe.
+; Memory accesses:
+;   - Fore block: A[i] = 1      (write in outer loop before inner)
+;   - Sub block:  A[i-1] = sum  (write inside inner loop)
+; No dependency conflict: The fore block writes A[i] and sub block writes A[i-1].
+; These access different array elements, so unroll-and-jam is safe. The backward
+; dependency pattern doesn't create conflicts between unrolled iterations.
 define void @fore_sub_less(ptr noalias nocapture %A, i32 %N, ptr noalias nocapture readonly %B) {
 entry:
   %cmp = icmp sgt i32 %N, 0
@@ -166,6 +201,15 @@ cleanup:
 ; CHECK: %j.1 = phi
 ; CHECK: %j.2 = phi
 ; CHECK: %j.3 = phi
+;
+; fore_sub_eq SHOULD be unroll-and-jammed (count=4) as it's safe.
+; Memory accesses:
+;   - Fore block: A[i] = 1    (write in outer loop before inner)
+;   - Sub block:  A[i] = sum  (write inside inner loop)
+; Dependency conflict: Both fore and sub blocks write to A[i], creating a
+; write-after-write (WAW) dependency. However, this is safe for unroll-and-jam
+; because the execution order is preserved: fore block executes first, then
+; the entire inner loop (sub block) executes, maintaining the original semantics.
 define void @fore_sub_eq(ptr noalias nocapture %A, i32 %N, ptr noalias nocapture readonly %B) {
 entry:
   %cmp = icmp sgt i32 %N, 0
@@ -204,6 +248,15 @@ cleanup:
 ; CHECK-LABEL: fore_sub_more
 ; CHECK: %j = phi
 ; CHECK-NOT: %j.1 = phi
+;
+; fore_sub_more should NOT be unroll-and-jammed due to a dependency violation.
+; Memory accesses:
+;   - Fore block: A[i] = 1      (write in outer loop before inner)
+;   - Sub block:  A[i+1] = sum  (write inside inner loop)
+; Dependency conflict: The fore block writes A[i] and sub block writes A[i+1].
+; When unroll-and-jamming, iteration i's fore block writes A[i] but iteration i's
+; sub block writes A[i+1]. This conflicts with iteration i+1's fore block write
+; to A[i+1], creating a write-after-write race condition.
 define void @fore_sub_more(ptr noalias nocapture %A, i32 %N, ptr noalias nocapture readonly %B) {
 entry:
   %cmp = icmp sgt i32 %N, 0
@@ -244,6 +297,14 @@ cleanup:
 ; CHECK: %j.1 = phi
 ; CHECK: %j.2 = phi
 ; CHECK: %j.3 = phi
+;
+; sub_aft_less SHOULD be unroll-and-jammed (count=4) as it's safe.
+; Memory accesses:
+;   - Sub block: A[i] = 1      (write inside inner loop)
+;   - Aft block: A[i-1] = sum  (write in outer loop after inner)
+; No dependency conflict: The sub block writes A[i] and aft block writes A[i-1].
+; These access different array elements, so unroll-and-jam is safe. The backward
+; dependency pattern doesn't create conflicts between unrolled iterations.
 define void @sub_aft_less(ptr noalias nocapture %A, i32 %N, ptr noalias nocapture readonly %B) {
 entry:
   %cmp = icmp sgt i32 %N, 0
@@ -284,6 +345,15 @@ cleanup:
 ; CHECK: %j.1 = phi
 ; CHECK: %j.2 = phi
 ; CHECK: %j.3 = phi
+;
+; sub_aft_eq SHOULD be unroll-and-jammed (count=4) as it's safe.
+; Memory accesses:
+;   - Sub block: A[i] = 1    (write inside inner loop)
+;   - Aft block: A[i] = sum  (write in outer loop after inner)
+; Dependency conflict: Both sub and aft blocks write to A[i], creating a
+; write-after-write (WAW) dependency. However, this is safe for unroll-and-jam
+; because the execution order is preserved: the entire inner loop (sub block)
+; executes first, then the aft block executes, maintaining original semantics.
 define void @sub_aft_eq(ptr noalias nocapture %A, i32 %N, ptr noalias nocapture readonly %B) {
 entry:
   %cmp = icmp sgt i32 %N, 0
@@ -322,6 +392,15 @@ cleanup:
 ; CHECK-LABEL: sub_aft_more
 ; CHECK: %j = phi
 ; CHECK-NOT: %j.1 = phi
+;
+; sub_aft_more should NOT be unroll-and-jammed due to a dependency violation.
+; Memory accesses:
+;   - Sub block: A[i] = 1      (write inside inner loop)
+;   - Aft block: A[i+1] = sum  (write in outer loop after inner)
+; Dependency conflict: The sub block writes A[i] and aft block writes A[i+1].
+; When unroll-and-jamming, iteration i's aft block writes A[i+1] which conflicts
+; with iteration i+1's sub block write to A[i+1], creating a write-after-write
+; race condition that violates the original sequential semantics.
 define void @sub_aft_more(ptr noalias nocapture %A, i32 %N, ptr noalias nocapture readonly %B) {
 entry:
   %cmp = icmp sgt i32 %N, 0
@@ -360,6 +439,15 @@ cleanup:
 ; CHECK-LABEL: sub_sub_less
 ; CHECK: %j = phi
 ; CHECK-NOT: %j.1 = phi
+;
+; sub_sub_less should NOT be unroll-and-jammed due to a dependency violation.
+; Memory accesses:
+;   - Sub block: A[i] = 1      (write inside inner loop)
+;   - Sub block: A[i-1] = sum  (write inside inner loop)
+; Dependency conflict: Both writes are in the sub block (inner loop), accessing
+; A[i] and A[i-1]. When unroll-and-jamming, the inner loop is jammed, meaning
+; iterations of the inner loop from different outer iterations execute together.
+; This creates a backward dependency that can cause race conditions.
 define void @sub_sub_less(ptr noalias nocapture %A, i32 %N, ptr noalias nocapture readonly %B) {
 entry:
   %cmp = icmp sgt i32 %N, 0
@@ -400,6 +488,15 @@ cleanup:
 ; CHECK: %j.1 = phi
 ; CHECK: %j.2 = phi
 ; CHECK: %j.3 = phi
+;
+; sub_sub_eq SHOULD be unroll-and-jammed (count=4) as it's safe.
+; Memory accesses:
+;   - Sub block: A[i] = 1    (write inside inner loop)
+;   - Sub block: A[i] = sum  (write inside inner loop)
+; Dependency conflict: Both writes are to A[i] within the sub block, creating a
+; write-after-write (WAW) dependency. However, this is safe for unroll-and-jam
+; because both writes are in the same basic block and maintain their relative
+; order: A[i] = 1 always executes before A[i] = sum in each iteration.
 define void @sub_sub_eq(ptr noalias nocapture %A, i32 %N, ptr noalias nocapture readonly %B) {
 entry:
   %cmp = icmp sgt i32 %N, 0
@@ -438,6 +535,15 @@ cleanup:
 ; CHECK-LABEL: sub_sub_more
 ; CHECK: %j = phi
 ; CHECK-NOT: %j.1 = phi
+;
+; sub_sub_more should NOT be unroll-and-jammed due to a dependency violation.
+; Memory accesses:
+;   - Sub block: A[i] = 1      (write inside inner loop)
+;   - Sub block: A[i+1] = sum  (write inside inner loop)
+; Dependency conflict: Both writes are in the sub block, accessing A[i] and A[i+1].
+; When unroll-and-jamming, iteration i's sub block writes A[i+1] which conflicts
+; with iteration i+1's sub block write to A[i+1]. This creates a forward
+; dependency that causes write-after-write race conditions.
 define void @sub_sub_more(ptr noalias nocapture %A, i32 %N, ptr noalias nocapture readonly %B) {
 entry:
   %cmp = icmp sgt i32 %N, 0



More information about the llvm-commits mailing list