[llvm] a80dd44 - LAA: pre-commit tests for stride-versioning (#97570)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 21 04:11:22 PDT 2024
Author: Ramkumar Ramachandra
Date: 2024-08-21T12:11:19+01:00
New Revision: a80dd44b0d96fa3ba3fe0501c3ad4b1ee7edff00
URL: https://github.com/llvm/llvm-project/commit/a80dd44b0d96fa3ba3fe0501c3ad4b1ee7edff00
DIFF: https://github.com/llvm/llvm-project/commit/a80dd44b0d96fa3ba3fe0501c3ad4b1ee7edff00.diff
LOG: LAA: pre-commit tests for stride-versioning (#97570)
Add tests for when the Stride is unknown and equal to TC, with different
kinds of casts. In these cases, LAA should not speculate on Stride.
Added:
Modified:
llvm/test/Analysis/LoopAccessAnalysis/symbolic-stride.ll
Removed:
################################################################################
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/symbolic-stride.ll b/llvm/test/Analysis/LoopAccessAnalysis/symbolic-stride.ll
index f0aed2421a96e5..1e12dbf3bbee31 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/symbolic-stride.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/symbolic-stride.ll
@@ -310,3 +310,208 @@ loop:
exit: ; preds = %loop
ret void
}
+
+; Check the scenario where we have an unknown Stride, which happens to also be
+; the loop iteration count. If we speculate Stride==1, it implies that the loop
+; will iterate no more than a single iteration.
+define void @unknown_stride_equalto_tc(i32 %N, ptr %A, ptr %B, i32 %j) {
+; CHECK-LABEL: 'unknown_stride_equalto_tc'
+; CHECK-NEXT: loop:
+; CHECK-NEXT: Memory dependences are safe with run-time checks
+; CHECK-NEXT: Dependences:
+; CHECK-NEXT: Run-time memory checks:
+; CHECK-NEXT: Check 0:
+; CHECK-NEXT: Comparing group ([[GRP5:0x[0-9a-f]+]]):
+; CHECK-NEXT: ptr %A
+; CHECK-NEXT: Against group ([[GRP6:0x[0-9a-f]+]]):
+; CHECK-NEXT: %arrayidx = getelementptr inbounds i16, ptr %B, i32 %add
+; CHECK-NEXT: Grouped accesses:
+; CHECK-NEXT: Group [[GRP5]]:
+; CHECK-NEXT: (Low: %A High: (4 + %A))
+; CHECK-NEXT: Member: %A
+; CHECK-NEXT: Group [[GRP6]]:
+; CHECK-NEXT: (Low: (((2 * (sext i32 %j to i64))<nsw> + %B) umin ((2 * (sext i32 %j to i64))<nsw> + (2 * (zext i32 (-1 + %N) to i64) * (sext i32 %N to i64)) + %B)) High: (2 + (((2 * (sext i32 %j to i64))<nsw> + %B) umax ((2 * (sext i32 %j to i64))<nsw> + (2 * (zext i32 (-1 + %N) to i64) * (sext i32 %N to i64)) + %B))))
+; CHECK-NEXT: Member: {((2 * (sext i32 %j to i64))<nsw> + %B),+,(2 * (sext i32 %N to i64))<nsw>}<%loop>
+; CHECK-EMPTY:
+; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NEXT: SCEV assumptions:
+; CHECK-NEXT: {%j,+,%N}<%loop> Added Flags: <nssw>
+; CHECK-EMPTY:
+; CHECK-NEXT: Expressions re-written:
+; CHECK-NEXT: [PSE] %arrayidx = getelementptr inbounds i16, ptr %B, i32 %add:
+; CHECK-NEXT: ((2 * (sext i32 {%j,+,%N}<%loop> to i64))<nsw> + %B)
+; CHECK-NEXT: --> {((2 * (sext i32 %j to i64))<nsw> + %B),+,(2 * (sext i32 %N to i64))<nsw>}<%loop>
+;
+entry:
+ %cmp = icmp eq i32 %N, 0
+ br i1 %cmp, label %exit, label %loop
+
+loop:
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+ %mul = mul i32 %iv, %N
+ %add = add i32 %mul, %j
+ %arrayidx = getelementptr inbounds i16, ptr %B, i32 %add
+ %load = load i16, ptr %arrayidx
+ %sext = sext i16 %load to i32
+ store i32 %sext, ptr %A
+ %iv.next = add nuw i32 %iv, 1
+ %exitcond = icmp eq i32 %iv.next, %N
+ br i1 %exitcond, label %exit, label %loop
+
+exit:
+ ret void
+}
+
+
+; Check the scenario where we have an unknown Stride, which happens to also be
+; the loop iteration count, but the TC is zero-extended from a narrower type.
+define void @unknown_stride_equalto_zext_tc(i16 zeroext %N, ptr %A, ptr %B, i32 %j) {
+; CHECK-LABEL: 'unknown_stride_equalto_zext_tc'
+; CHECK-NEXT: loop:
+; CHECK-NEXT: Memory dependences are safe with run-time checks
+; CHECK-NEXT: Dependences:
+; CHECK-NEXT: Run-time memory checks:
+; CHECK-NEXT: Check 0:
+; CHECK-NEXT: Comparing group ([[GRP7:0x[0-9a-f]+]]):
+; CHECK-NEXT: ptr %A
+; CHECK-NEXT: Against group ([[GRP8:0x[0-9a-f]+]]):
+; CHECK-NEXT: %arrayidx = getelementptr inbounds i16, ptr %B, i32 %add
+; CHECK-NEXT: Grouped accesses:
+; CHECK-NEXT: Group [[GRP7]]:
+; CHECK-NEXT: (Low: %A High: (4 + %A))
+; CHECK-NEXT: Member: %A
+; CHECK-NEXT: Group [[GRP8]]:
+; CHECK-NEXT: (Low: (((2 * (sext i32 %j to i64))<nsw> + %B) umin ((2 * (sext i32 %j to i64))<nsw> + (2 * (zext i32 (-1 + (zext i16 %N to i32))<nsw> to i64) * (zext i16 %N to i64)) + %B)) High: (2 + (((2 * (sext i32 %j to i64))<nsw> + %B) umax ((2 * (sext i32 %j to i64))<nsw> + (2 * (zext i32 (-1 + (zext i16 %N to i32))<nsw> to i64) * (zext i16 %N to i64)) + %B))))
+; CHECK-NEXT: Member: {((2 * (sext i32 %j to i64))<nsw> + %B),+,(2 * (zext i16 %N to i64))<nuw><nsw>}<%loop>
+; CHECK-EMPTY:
+; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NEXT: SCEV assumptions:
+; CHECK-NEXT: {%j,+,(zext i16 %N to i32)}<nw><%loop> Added Flags: <nssw>
+; CHECK-EMPTY:
+; CHECK-NEXT: Expressions re-written:
+; CHECK-NEXT: [PSE] %arrayidx = getelementptr inbounds i16, ptr %B, i32 %add:
+; CHECK-NEXT: ((2 * (sext i32 {%j,+,(zext i16 %N to i32)}<nw><%loop> to i64))<nsw> + %B)
+; CHECK-NEXT: --> {((2 * (sext i32 %j to i64))<nsw> + %B),+,(2 * (zext i16 %N to i64))<nuw><nsw>}<%loop>
+;
+entry:
+ %N.ext = zext i16 %N to i32
+ %cmp = icmp eq i16 %N, 0
+ br i1 %cmp, label %exit, label %loop
+
+loop:
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+ %mul = mul nuw i32 %iv, %N.ext
+ %add = add i32 %mul, %j
+ %arrayidx = getelementptr inbounds i16, ptr %B, i32 %add
+ %load = load i16, ptr %arrayidx
+ %sext = sext i16 %load to i32
+ store i32 %sext, ptr %A
+ %iv.next = add nuw nsw i32 %iv, 1
+ %exitcond = icmp eq i32 %iv.next, %N.ext
+ br i1 %exitcond, label %exit, label %loop
+
+exit:
+ ret void
+}
+
+; Check the scenario where we have an unknown Stride, which happens to also be
+; the loop iteration count, but the TC is sign-extended from a narrower type.
+define void @unknown_stride_equalto_sext_tc(i16 %N, ptr %A, ptr %B, i32 %j) {
+; CHECK-LABEL: 'unknown_stride_equalto_sext_tc'
+; CHECK-NEXT: loop:
+; CHECK-NEXT: Memory dependences are safe with run-time checks
+; CHECK-NEXT: Dependences:
+; CHECK-NEXT: Run-time memory checks:
+; CHECK-NEXT: Check 0:
+; CHECK-NEXT: Comparing group ([[GRP9:0x[0-9a-f]+]]):
+; CHECK-NEXT: ptr %A
+; CHECK-NEXT: Against group ([[GRP10:0x[0-9a-f]+]]):
+; CHECK-NEXT: %arrayidx = getelementptr inbounds i16, ptr %B, i32 %add
+; CHECK-NEXT: Grouped accesses:
+; CHECK-NEXT: Group [[GRP9]]:
+; CHECK-NEXT: (Low: %A High: (4 + %A))
+; CHECK-NEXT: Member: %A
+; CHECK-NEXT: Group [[GRP10]]:
+; CHECK-NEXT: (Low: (((2 * (sext i32 %j to i64))<nsw> + %B) umin ((2 * (sext i32 %j to i64))<nsw> + (2 * (zext i32 (-1 + (sext i16 %N to i32))<nsw> to i64) * (sext i16 %N to i64)) + %B)) High: (2 + (((2 * (sext i32 %j to i64))<nsw> + %B) umax ((2 * (sext i32 %j to i64))<nsw> + (2 * (zext i32 (-1 + (sext i16 %N to i32))<nsw> to i64) * (sext i16 %N to i64)) + %B))))
+; CHECK-NEXT: Member: {((2 * (sext i32 %j to i64))<nsw> + %B),+,(2 * (sext i16 %N to i64))<nsw>}<%loop>
+; CHECK-EMPTY:
+; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NEXT: SCEV assumptions:
+; CHECK-NEXT: {%j,+,(sext i16 %N to i32)}<nw><%loop> Added Flags: <nssw>
+; CHECK-EMPTY:
+; CHECK-NEXT: Expressions re-written:
+; CHECK-NEXT: [PSE] %arrayidx = getelementptr inbounds i16, ptr %B, i32 %add:
+; CHECK-NEXT: ((2 * (sext i32 {%j,+,(sext i16 %N to i32)}<nw><%loop> to i64))<nsw> + %B)
+; CHECK-NEXT: --> {((2 * (sext i32 %j to i64))<nsw> + %B),+,(2 * (sext i16 %N to i64))<nsw>}<%loop>
+;
+entry:
+ %N.ext = sext i16 %N to i32
+ %cmp = icmp eq i16 %N, 0
+ br i1 %cmp, label %exit, label %loop
+
+loop:
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+ %mul = mul nuw i32 %iv, %N.ext
+ %add = add i32 %mul, %j
+ %arrayidx = getelementptr inbounds i16, ptr %B, i32 %add
+ %load = load i16, ptr %arrayidx
+ %sext = sext i16 %load to i32
+ store i32 %sext, ptr %A
+ %iv.next = add nuw nsw i32 %iv, 1
+ %exitcond = icmp eq i32 %iv.next, %N.ext
+ br i1 %exitcond, label %exit, label %loop
+
+exit:
+ ret void
+}
+
+; Check the scenario where we have an unknown Stride, which happens to also be
+; the loop iteration count, but the TC is truncated from a wider type.
+define void @unknown_stride_equalto_trunc_tc(i64 %N, ptr %A, ptr %B, i32 %j) {
+; CHECK-LABEL: 'unknown_stride_equalto_trunc_tc'
+; CHECK-NEXT: loop:
+; CHECK-NEXT: Memory dependences are safe with run-time checks
+; CHECK-NEXT: Dependences:
+; CHECK-NEXT: Run-time memory checks:
+; CHECK-NEXT: Check 0:
+; CHECK-NEXT: Comparing group ([[GRP11:0x[0-9a-f]+]]):
+; CHECK-NEXT: ptr %A
+; CHECK-NEXT: Against group ([[GRP12:0x[0-9a-f]+]]):
+; CHECK-NEXT: %arrayidx = getelementptr inbounds i16, ptr %B, i32 %add
+; CHECK-NEXT: Grouped accesses:
+; CHECK-NEXT: Group [[GRP11]]:
+; CHECK-NEXT: (Low: %A High: (4 + %A))
+; CHECK-NEXT: Member: %A
+; CHECK-NEXT: Group [[GRP12]]:
+; CHECK-NEXT: (Low: (((2 * (sext i32 %j to i64))<nsw> + %B) umin ((2 * (sext i32 %j to i64))<nsw> + (2 * (zext i32 (-1 + (trunc i64 %N to i32)) to i64) * (sext i32 (trunc i64 %N to i32) to i64)) + %B)) High: (2 + (((2 * (sext i32 %j to i64))<nsw> + %B) umax ((2 * (sext i32 %j to i64))<nsw> + (2 * (zext i32 (-1 + (trunc i64 %N to i32)) to i64) * (sext i32 (trunc i64 %N to i32) to i64)) + %B))))
+; CHECK-NEXT: Member: {((2 * (sext i32 %j to i64))<nsw> + %B),+,(2 * (sext i32 (trunc i64 %N to i32) to i64))<nsw>}<%loop>
+; CHECK-EMPTY:
+; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NEXT: SCEV assumptions:
+; CHECK-NEXT: {%j,+,(trunc i64 %N to i32)}<nw><%loop> Added Flags: <nssw>
+; CHECK-EMPTY:
+; CHECK-NEXT: Expressions re-written:
+; CHECK-NEXT: [PSE] %arrayidx = getelementptr inbounds i16, ptr %B, i32 %add:
+; CHECK-NEXT: ((2 * (sext i32 {%j,+,(trunc i64 %N to i32)}<nw><%loop> to i64))<nsw> + %B)
+; CHECK-NEXT: --> {((2 * (sext i32 %j to i64))<nsw> + %B),+,(2 * (sext i32 (trunc i64 %N to i32) to i64))<nsw>}<%loop>
+;
+entry:
+ %N.trunc = trunc i64 %N to i32
+ %cmp = icmp eq i64 %N, 0
+ br i1 %cmp, label %exit, label %loop
+
+loop:
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+ %mul = mul nuw i32 %iv, %N.trunc
+ %add = add i32 %mul, %j
+ %arrayidx = getelementptr inbounds i16, ptr %B, i32 %add
+ %load = load i16, ptr %arrayidx
+ %sext = sext i16 %load to i32
+ store i32 %sext, ptr %A
+ %iv.next = add nuw nsw i32 %iv, 1
+ %exitcond = icmp eq i32 %iv.next, %N.trunc
+ br i1 %exitcond, label %exit, label %loop
+
+exit:
+ ret void
+}
More information about the llvm-commits
mailing list