[llvm] [unroll-and-jam] Document dependencies_multidims.ll and fix loop bounds (NFC) (PR #156578)
Sebastian Pop via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 2 20:29:42 PDT 2025
https://github.com/sebpop created https://github.com/llvm/llvm-project/pull/156578
Add detailed comments explaining why each function should/shouldn't be unroll-and-jammed based on memory access patterns and dependencies.
Fix loop bounds to ensure array accesses are within array bounds:
* sub_sub_less: j starts from 1 (not 0) to ensure j-1 >= 0
* sub_sub_less_3d: k starts from 1 (not 0) to ensure k-1 >= 0
* sub_sub_outer_scalar: j starts from 1 (not 0) to ensure j-1 >= 0
>From 823c70d06269a75f5877093b1e797bebfbc27380 Mon Sep 17 00:00:00 2001
From: Sebastian Pop <spop at nvidia.com>
Date: Tue, 2 Sep 2025 21:18:57 -0500
Subject: [PATCH] [unroll-and-jam] Document dependencies_multidims.ll and fix
loop bounds (NFC)
- Add detailed comments explaining why each function should/shouldn't be
unroll-and-jammed based on memory access patterns and dependencies.
- Fix loop bounds to ensure array accesses are within array bounds:
* sub_sub_less: j starts from 1 (not 0) to ensure j-1 >= 0
* sub_sub_less_3d: k starts from 1 (not 0) to ensure k-1 >= 0
* sub_sub_outer_scalar: j starts from 1 (not 0) to ensure j-1 >= 0
---
.../dependencies_multidims.ll | 64 +++++++++++++++----
1 file changed, 52 insertions(+), 12 deletions(-)
diff --git a/llvm/test/Transforms/LoopUnrollAndJam/dependencies_multidims.ll b/llvm/test/Transforms/LoopUnrollAndJam/dependencies_multidims.ll
index b95bbddf11d65..2a2b8c958b31d 100644
--- a/llvm/test/Transforms/LoopUnrollAndJam/dependencies_multidims.ll
+++ b/llvm/test/Transforms/LoopUnrollAndJam/dependencies_multidims.ll
@@ -1,11 +1,17 @@
; RUN: opt -da-disable-delinearization-checks -passes=loop-unroll-and-jam -allow-unroll-and-jam -unroll-and-jam-count=4 < %s -S | FileCheck %s
; RUN: opt -da-disable-delinearization-checks -aa-pipeline=basic-aa -passes='loop-unroll-and-jam' -allow-unroll-and-jam -unroll-and-jam-count=4 < %s -S | FileCheck %s
-target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
-
; CHECK-LABEL: sub_sub_less
; CHECK: %j = phi
; CHECK-NOT: %j.1 = phi
+;
+; sub_sub_less should NOT be unroll-and-jammed due to a loop-carried dependency.
+; Memory accesses:
+; - A[i][j] = 1 (write to current iteration)
+; - A[i+1][j-1] = add (write to next i iteration, previous j iteration)
+; The dependency: A[i+1][j-1] from iteration (i,j) may conflict with A[i'][j']
+; from a later iteration when i'=i+1 and j'=j-1, creating a backward dependency
+; in the j dimension that prevents safe unroll-and-jam.
define void @sub_sub_less(ptr noalias nocapture %A, i32 %N, ptr noalias nocapture readonly %B) {
entry:
%cmp = icmp sgt i32 %N, 0
@@ -16,7 +22,7 @@ for.outer:
br label %for.inner
for.inner:
- %j = phi i32 [ %add6, %for.inner ], [ 0, %for.outer ]
+ %j = phi i32 [ %add6, %for.inner ], [ 1, %for.outer ]
%sum = phi i32 [ %add, %for.inner ], [ 0, %for.outer ]
%arrayidx5 = getelementptr inbounds i32, ptr %B, i32 %j
%0 = load i32, ptr %arrayidx5, align 4
@@ -47,6 +53,14 @@ cleanup:
; CHECK: %j.1 = phi
; CHECK: %j.2 = phi
; CHECK: %j.3 = phi
+;
+; sub_sub_eq SHOULD be unroll-and-jammed (count=4) as it's safe.
+; Memory accesses:
+; - A[i][j] = 1 (write to current iteration)
+; - A[i+1][j] = add (write to next i iteration, same j iteration)
+; No dependency conflict: A[i+1][j] from iteration (i,j) doesn't conflict with
+; any A[i'][j'] from unrolled j iterations since j' values are different and
+; i+1 from current doesn't overlap with i' from unrolled iterations.
define void @sub_sub_eq(ptr noalias nocapture %A, i32 %N, ptr noalias nocapture readonly %B) {
entry:
%cmp = icmp sgt i32 %N, 0
@@ -88,6 +102,14 @@ cleanup:
; CHECK: %j.1 = phi
; CHECK: %j.2 = phi
; CHECK: %j.3 = phi
+;
+; sub_sub_more SHOULD be unroll-and-jammed (count=4) as it's safe.
+; Memory accesses:
+; - A[i][j] = 1 (write to current iteration)
+; - A[i+1][j+1] = add (write to next i iteration, next j iteration)
+; No dependency conflict: A[i+1][j+1] from iteration (i,j) doesn't conflict with
+; any A[i'][j'] from unrolled j iterations since the forward dependency pattern
+; doesn't create overlapping accesses between unrolled iterations.
define void @sub_sub_more(ptr noalias nocapture %A, i32 %N, ptr noalias nocapture readonly %B) {
entry:
%cmp = icmp sgt i32 %N, 0
@@ -126,12 +148,21 @@ cleanup:
; CHECK-LABEL: sub_sub_less_3d
; CHECK: %k = phi
; CHECK-NOT: %k.1 = phi
-
+;
+; sub_sub_less_3d should NOT be unroll-and-jammed due to a loop-carried dependency.
+; Memory accesses:
+; - A3d[i][j][k] = 0 (write to current iteration)
+; - A3d[i+1][j][k-1] = 0 (write to next i iteration, previous k iteration)
+; The dependency: A[i+1][j][k-1] from iteration (i,j,k) may conflict with
+; A[i'][j'][k'] from a later iteration when i'=i+1 and k'=k-1, creating a
+; backward dependency in the k dimension that prevents safe unroll-and-jam.
+; This is a 3D version of the same pattern as sub_sub_less.
+;
; for (long i = 0; i < 100; ++i)
; for (long j = 0; j < 100; ++j)
-; for (long k = 0; k < 100; ++k) {
-; A[i][j][k] = 0;
-; A[i+1][j][k-1] = 0;
+; for (long k = 1; k < 100; ++k) {
+; A[i][j][k] = 5;
+; A[i+1][j][k-1] = 10;
; }
define void @sub_sub_less_3d(ptr noalias %A) {
@@ -147,13 +178,13 @@ for.j:
br label %for.k
for.k:
- %k = phi i32 [ 0, %for.j ], [ %inc.k, %for.k ]
+ %k = phi i32 [ 1, %for.j ], [ %inc.k, %for.k ]
%arrayidx = getelementptr inbounds [100 x [100 x i32]], ptr %A, i32 %i, i32 %j, i32 %k
- store i32 0, ptr %arrayidx, align 4
+ store i32 5, ptr %arrayidx, align 4
%add.i = add nsw i32 %i, 1
%sub.k = add nsw i32 %k, -1
%arrayidx2 = getelementptr inbounds [100 x [100 x i32]], ptr %A, i32 %add.i, i32 %j, i32 %sub.k
- store i32 0, ptr %arrayidx2, align 4
+ store i32 10, ptr %arrayidx2, align 4
%inc.k = add nsw i32 %k, 1
%cmp.k = icmp slt i32 %inc.k, 100
br i1 %cmp.k, label %for.k, label %for.j.latch
@@ -175,7 +206,16 @@ for.end:
; CHECK-LABEL: sub_sub_outer_scalar
; CHECK: %k = phi
; CHECK-NOT: %k.1 = phi
-
+;
+; sub_sub_outer_scalar should NOT be unroll-and-jammed due to a loop-carried dependency.
+; Memory accesses:
+; - load from A[j][k] (read from current j iteration)
+; - store to A[j-1][k] (write to previous j iteration)
+; The dependency: reading A[j][k] and writing A[j-1][k] creates a backward
+; dependency in the j dimension. The test attempts to unroll-and-jam the j loop
+; with the k loop being jammed. When this happens, iterations j, j+1, j+2, j+3
+; would be unrolled and their k loops jammed together, but j+1's write to A[j][k]
+; would conflict with j's read from A[j][k], violating sequential semantics.
define void @sub_sub_outer_scalar(ptr %A) {
entry:
br label %for.i
@@ -185,7 +225,7 @@ for.i:
br label %for.j
for.j:
- %j = phi i64 [ 0, %for.i ], [ %inc.j, %for.j.latch ]
+ %j = phi i64 [ 1, %for.i ], [ %inc.j, %for.j.latch ]
br label %for.k
for.k:
More information about the llvm-commits
mailing list