[llvm] [SeparateConstOffsetFromGEP] Preserve inbounds flag based on ValueTracking (PR #130617)
Fabian Ritter via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 27 02:03:06 PDT 2025
https://github.com/ritter-x2a updated https://github.com/llvm/llvm-project/pull/130617
>From 5d4946dfcda1a46f88541c4586214e41b4eccf80 Mon Sep 17 00:00:00 2001
From: Fabian Ritter <fabian.ritter at amd.com>
Date: Mon, 10 Mar 2025 06:55:10 -0400
Subject: [PATCH 1/4] [SeparateConstOffsetFromGEP] Preserve inbounds flag based
on ValueTracking
If we know that the initial GEP was inbounds, and we change it to a
sequence of GEPs from the same base pointer where every offset is
non-negative, then the new GEPs are inbounds.
For SWDEV-516125.
---
.../Scalar/SeparateConstOffsetFromGEP.cpp | 18 +++++++++++----
.../AMDGPU/preserve-inbounds.ll | 23 +++++++++++++++++++
.../NVPTX/split-gep-and-gvn.ll | 16 ++++++-------
.../NVPTX/split-gep.ll | 8 +++----
4 files changed, 48 insertions(+), 17 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
index ab8e979e7b40a..7f93115499bc9 100644
--- a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
+++ b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
@@ -1052,6 +1052,8 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
}
}
+ bool MayRecoverInbounds = AccumulativeByteOffset >= 0 && GEP->isInBounds();
+
// Remove the constant offset in each sequential index. The resultant GEP
// computes the variadic base.
// Notice that we don't remove struct field indices here. If LowerGEP is
@@ -1079,6 +1081,8 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
// and the old index if they are not used.
RecursivelyDeleteTriviallyDeadInstructions(UserChainTail);
RecursivelyDeleteTriviallyDeadInstructions(OldIdx);
+ MayRecoverInbounds =
+ MayRecoverInbounds && computeKnownBits(NewIdx, *DL).isNonNegative();
}
}
}
@@ -1100,11 +1104,15 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
// address with silently-wrapping two's complement arithmetic".
// Therefore, the final code will be a semantically equivalent.
//
- // TODO(jingyue): do some range analysis to keep as many inbounds as
- // possible. GEPs with inbounds are more friendly to alias analysis.
- // TODO(gep_nowrap): Preserve nuw at least.
- GEPNoWrapFlags NewGEPFlags = GEPNoWrapFlags::none();
- GEP->setNoWrapFlags(GEPNoWrapFlags::none());
+ // If the initial GEP was inbounds and all variable indices and the
+ // accumulated offsets are non-negative, they can be added in any order and
+ // the intermediate results are in bounds. So, we can preserve the inbounds
+ // flag for both GEPs. GEPs with inbounds are more friendly to alias analysis.
+ //
+ // TODO(gep_nowrap): Preserve nuw?
+ GEPNoWrapFlags NewGEPFlags =
+ MayRecoverInbounds ? GEPNoWrapFlags::inBounds() : GEPNoWrapFlags::none();
+ GEP->setNoWrapFlags(NewGEPFlags);
// Lowers a GEP to either GEPs with a single index or arithmetic operations.
if (LowerGEP) {
diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/preserve-inbounds.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/preserve-inbounds.ll
index 422e5d8215502..01619aa481ddd 100644
--- a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/preserve-inbounds.ll
+++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/preserve-inbounds.ll
@@ -16,3 +16,26 @@ entry:
%arrayidx = getelementptr inbounds i32, ptr %p, i64 %idx
ret ptr %arrayidx
}
+
+; All offsets must be positive, so inbounds can be preserved.
+define void @must_be_inbounds(ptr %dst, ptr %src, i32 %i) {
+; CHECK-LABEL: @must_be_inbounds(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[I_PROM:%.*]] = zext i32 [[I:%.*]] to i64
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], i64 [[I_PROM]]
+; CHECK-NEXT: [[ARRAYIDX_SRC2:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 4
+; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[ARRAYIDX_SRC2]], align 4
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[DST:%.*]], i64 [[I_PROM]]
+; CHECK-NEXT: [[ARRAYIDX_DST4:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 4
+; CHECK-NEXT: store float [[TMP1]], ptr [[ARRAYIDX_DST4]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ %i.prom = zext i32 %i to i64
+ %idx = add nsw i64 %i.prom, 1
+ %arrayidx.src = getelementptr inbounds float, ptr %src, i64 %idx
+ %3 = load float, ptr %arrayidx.src, align 4
+ %arrayidx.dst = getelementptr inbounds float, ptr %dst, i64 %idx
+ store float %3, ptr %arrayidx.dst, align 4
+ ret void
+}
diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll
index 9a73feb2c4b5c..4474585bf9b06 100644
--- a/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll
+++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll
@@ -157,19 +157,19 @@ define void @sum_of_array3(i32 %x, i32 %y, ptr nocapture %output) {
; IR-NEXT: .preheader:
; IR-NEXT: [[TMP0:%.*]] = zext i32 [[Y]] to i64
; IR-NEXT: [[TMP1:%.*]] = zext i32 [[X]] to i64
-; IR-NEXT: [[TMP2:%.*]] = getelementptr [32 x [32 x float]], ptr addrspace(3) @array, i64 0, i64 [[TMP1]], i64 [[TMP0]]
+; IR-NEXT: [[TMP2:%.*]] = getelementptr inbounds [32 x [32 x float]], ptr addrspace(3) @array, i64 0, i64 [[TMP1]], i64 [[TMP0]]
; IR-NEXT: [[TMP3:%.*]] = addrspacecast ptr addrspace(3) [[TMP2]] to ptr
; IR-NEXT: [[TMP4:%.*]] = load float, ptr [[TMP3]], align 4
; IR-NEXT: [[TMP5:%.*]] = fadd float [[TMP4]], 0.000000e+00
-; IR-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP2]], i64 4
+; IR-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP2]], i64 4
; IR-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(3) [[TMP6]] to ptr
; IR-NEXT: [[TMP8:%.*]] = load float, ptr [[TMP7]], align 4
; IR-NEXT: [[TMP9:%.*]] = fadd float [[TMP5]], [[TMP8]]
-; IR-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP2]], i64 128
+; IR-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP2]], i64 128
; IR-NEXT: [[TMP11:%.*]] = addrspacecast ptr addrspace(3) [[TMP10]] to ptr
; IR-NEXT: [[TMP12:%.*]] = load float, ptr [[TMP11]], align 4
; IR-NEXT: [[TMP13:%.*]] = fadd float [[TMP9]], [[TMP12]]
-; IR-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP2]], i64 132
+; IR-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP2]], i64 132
; IR-NEXT: [[TMP15:%.*]] = addrspacecast ptr addrspace(3) [[TMP14]] to ptr
; IR-NEXT: [[TMP16:%.*]] = load float, ptr [[TMP15]], align 4
; IR-NEXT: [[TMP17:%.*]] = fadd float [[TMP13]], [[TMP16]]
@@ -224,19 +224,19 @@ define void @sum_of_array4(i32 %x, i32 %y, ptr nocapture %output) {
; IR-NEXT: .preheader:
; IR-NEXT: [[TMP0:%.*]] = zext i32 [[Y]] to i64
; IR-NEXT: [[TMP1:%.*]] = zext i32 [[X]] to i64
-; IR-NEXT: [[TMP2:%.*]] = getelementptr [32 x [32 x float]], ptr addrspace(3) @array, i64 0, i64 [[TMP1]], i64 [[TMP0]]
+; IR-NEXT: [[TMP2:%.*]] = getelementptr inbounds [32 x [32 x float]], ptr addrspace(3) @array, i64 0, i64 [[TMP1]], i64 [[TMP0]]
; IR-NEXT: [[TMP3:%.*]] = addrspacecast ptr addrspace(3) [[TMP2]] to ptr
; IR-NEXT: [[TMP4:%.*]] = load float, ptr [[TMP3]], align 4
; IR-NEXT: [[TMP5:%.*]] = fadd float [[TMP4]], 0.000000e+00
-; IR-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP2]], i64 4
+; IR-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP2]], i64 4
; IR-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(3) [[TMP6]] to ptr
; IR-NEXT: [[TMP8:%.*]] = load float, ptr [[TMP7]], align 4
; IR-NEXT: [[TMP9:%.*]] = fadd float [[TMP5]], [[TMP8]]
-; IR-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP2]], i64 128
+; IR-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP2]], i64 128
; IR-NEXT: [[TMP11:%.*]] = addrspacecast ptr addrspace(3) [[TMP10]] to ptr
; IR-NEXT: [[TMP12:%.*]] = load float, ptr [[TMP11]], align 4
; IR-NEXT: [[TMP13:%.*]] = fadd float [[TMP9]], [[TMP12]]
-; IR-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP2]], i64 132
+; IR-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP2]], i64 132
; IR-NEXT: [[TMP15:%.*]] = addrspacecast ptr addrspace(3) [[TMP14]] to ptr
; IR-NEXT: [[TMP16:%.*]] = load float, ptr [[TMP15]], align 4
; IR-NEXT: [[TMP17:%.*]] = fadd float [[TMP13]], [[TMP16]]
diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll
index 77b3434f4f159..da04a6e979425 100644
--- a/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll
+++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll
@@ -372,8 +372,8 @@ define ptr @trunk_explicit(ptr %ptr, i64 %idx) {
; CHECK-LABEL: define ptr @trunk_explicit(
; CHECK-SAME: ptr [[PTR:%.*]], i64 [[IDX:%.*]]) {
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT0:%.*]], ptr [[PTR]], i64 0, i32 3, i64 [[IDX]], i32 1
-; CHECK-NEXT: [[PTR21:%.*]] = getelementptr i8, ptr [[TMP0]], i64 3216
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT0:%.*]], ptr [[PTR]], i64 0, i32 3, i64 [[IDX]], i32 1
+; CHECK-NEXT: [[PTR21:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 3216
; CHECK-NEXT: ret ptr [[PTR21]]
;
entry:
@@ -389,8 +389,8 @@ define ptr @trunk_long_idx(ptr %ptr, i64 %idx) {
; CHECK-LABEL: define ptr @trunk_long_idx(
; CHECK-SAME: ptr [[PTR:%.*]], i64 [[IDX:%.*]]) {
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT0:%.*]], ptr [[PTR]], i64 0, i32 3, i64 [[IDX]], i32 1
-; CHECK-NEXT: [[PTR21:%.*]] = getelementptr i8, ptr [[TMP0]], i64 3216
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT0:%.*]], ptr [[PTR]], i64 0, i32 3, i64 [[IDX]], i32 1
+; CHECK-NEXT: [[PTR21:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 3216
; CHECK-NEXT: ret ptr [[PTR21]]
;
entry:
>From 7af5dfd84930aadeb21d5e89cef77bfdf83a2f46 Mon Sep 17 00:00:00 2001
From: Fabian Ritter <fabian.ritter at amd.com>
Date: Tue, 11 Mar 2025 10:05:26 -0400
Subject: [PATCH 2/4] Simplify test case and add more
---
.../AMDGPU/preserve-inbounds.ll | 126 ++++++++++++++++--
1 file changed, 112 insertions(+), 14 deletions(-)
diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/preserve-inbounds.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/preserve-inbounds.ll
index 01619aa481ddd..6865fcf663c58 100644
--- a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/preserve-inbounds.ll
+++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/preserve-inbounds.ll
@@ -17,25 +17,123 @@ entry:
ret ptr %arrayidx
}
-; All offsets must be positive, so inbounds can be preserved.
-define void @must_be_inbounds(ptr %dst, ptr %src, i32 %i) {
+; All indices must be non-negative, so inbounds can be preserved.
+define ptr @must_be_inbounds(ptr %p, i32 %i) {
; CHECK-LABEL: @must_be_inbounds(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[I_PROM:%.*]] = zext i32 [[I:%.*]] to i64
-; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], i64 [[I_PROM]]
-; CHECK-NEXT: [[ARRAYIDX_SRC2:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 4
-; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[ARRAYIDX_SRC2]], align 4
-; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[DST:%.*]], i64 [[I_PROM]]
-; CHECK-NEXT: [[ARRAYIDX_DST4:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 4
-; CHECK-NEXT: store float [[TMP1]], ptr [[ARRAYIDX_DST4]], align 4
-; CHECK-NEXT: ret void
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 [[I_PROM]]
+; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 4
+; CHECK-NEXT: ret ptr [[ARRAYIDX2]]
;
entry:
%i.prom = zext i32 %i to i64
%idx = add nsw i64 %i.prom, 1
- %arrayidx.src = getelementptr inbounds float, ptr %src, i64 %idx
- %3 = load float, ptr %arrayidx.src, align 4
- %arrayidx.dst = getelementptr inbounds float, ptr %dst, i64 %idx
- store float %3, ptr %arrayidx.dst, align 4
- ret void
+ %arrayidx = getelementptr inbounds i32, ptr %p, i64 %idx
+ ret ptr %arrayidx
+}
+
+; idx must be non-negative -> preserve inbounds
+define ptr @sign_bit_clear(ptr %p, i64 %i) {
+; CHECK-LABEL: @sign_bit_clear(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[IDX:%.*]] = and i64 [[I:%.*]], 9223372036854775807
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 [[IDX]]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 4
+; CHECK-NEXT: ret ptr [[ARRAYIDX]]
+;
+entry:
+ %idx = and i64 %i, u0x7fffffffffffffff
+ %idx.add = add i64 %idx, 1
+ %arrayidx = getelementptr inbounds i32, ptr %p, i64 %idx.add
+ ret ptr %arrayidx
+}
+
+; idx may be negative -> don't preserve inbounds
+define ptr @sign_bit_not_clear(ptr %p, i64 %i) {
+; CHECK-LABEL: @sign_bit_not_clear(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[IDX:%.*]] = and i64 [[I:%.*]], -256
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[IDX]]
+; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr i8, ptr [[TMP0]], i64 4
+; CHECK-NEXT: ret ptr [[ARRAYIDX2]]
+;
+entry:
+ %idx = and i64 %i, u0xffffffffffffff00
+ %idx.add = add i64 %idx, 1
+ %arrayidx = getelementptr inbounds i32, ptr %p, i64 %idx.add
+ ret ptr %arrayidx
+}
+
+; idx may be 0 or very negative -> don't preserve inbounds
+define ptr @only_sign_bit_not_clear(ptr %p, i64 %i) {
+; CHECK-LABEL: @only_sign_bit_not_clear(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[IDX:%.*]] = and i64 [[I:%.*]], -9223372036854775808
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[IDX]]
+; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr i8, ptr [[TMP0]], i64 4
+; CHECK-NEXT: ret ptr [[ARRAYIDX2]]
+;
+entry:
+ %idx = and i64 %i, u0x8000000000000000
+ %idx.add = add i64 %idx, 1
+ %arrayidx = getelementptr inbounds i32, ptr %p, i64 %idx.add
+ ret ptr %arrayidx
+}
+
+; all indices non-negative -> preserve inbounds
+define ptr @multi_level_nonnegative(ptr %p, i64 %idx1, i64 %idx2) {
+; CHECK-LABEL: @multi_level_nonnegative(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[MASKED_IDX1:%.*]] = and i64 [[IDX1:%.*]], 255
+; CHECK-NEXT: [[MASKED_IDX2:%.*]] = and i64 [[IDX2:%.*]], 65535
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [10 x [20 x i32]], ptr [[P:%.*]], i64 0, i64 [[MASKED_IDX1]], i64 [[MASKED_IDX2]]
+; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 180
+; CHECK-NEXT: ret ptr [[ARRAYIDX3]]
+;
+entry:
+ %masked.idx1 = and i64 %idx1, u0xff
+ %masked.idx2 = and i64 %idx2, u0xffff
+ %idx1.add = add i64 %masked.idx1, 2
+ %idx2.add = add i64 %masked.idx2, 5
+ %arrayidx = getelementptr inbounds [10 x [20 x i32]], ptr %p, i64 0, i64 %idx1.add, i64 %idx2.add
+ ret ptr %arrayidx
+}
+
+; It doesn't matter that %idx2.add might be negative, the indices in the resulting GEPs are all non-negative -> preserve inbounds
+define ptr @multi_level_mixed_okay(ptr %p, i64 %idx1, i64 %idx2) {
+; CHECK-LABEL: @multi_level_mixed_okay(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[MASKED_IDX1:%.*]] = and i64 [[IDX1:%.*]], 255
+; CHECK-NEXT: [[MASKED_IDX2:%.*]] = and i64 [[IDX2:%.*]], 65535
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [10 x [20 x i32]], ptr [[P:%.*]], i64 0, i64 [[MASKED_IDX1]], i64 [[MASKED_IDX2]]
+; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 156
+; CHECK-NEXT: ret ptr [[ARRAYIDX3]]
+;
+entry:
+ %masked.idx1 = and i64 %idx1, u0xff
+ %masked.idx2 = and i64 %idx2, u0xffff
+ %idx1.add = add i64 %masked.idx1, 2
+ %idx2.add = add i64 %masked.idx2, -1
+ %arrayidx = getelementptr inbounds [10 x [20 x i32]], ptr %p, i64 0, i64 %idx1.add, i64 %idx2.add
+ ret ptr %arrayidx
+}
+
+; One index may be negative -> don't preserve inbounds
+define ptr @multi_level_mixed_not_okay(ptr %p, i64 %idx1, i64 %idx2) {
+; CHECK-LABEL: @multi_level_mixed_not_okay(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[MASKED_IDX1:%.*]] = and i64 [[IDX1:%.*]], -256
+; CHECK-NEXT: [[MASKED_IDX2:%.*]] = and i64 [[IDX2:%.*]], 65535
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr [10 x [20 x i32]], ptr [[P:%.*]], i64 0, i64 [[MASKED_IDX1]], i64 [[MASKED_IDX2]]
+; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr i8, ptr [[TMP0]], i64 156
+; CHECK-NEXT: ret ptr [[ARRAYIDX3]]
+;
+entry:
+ %masked.idx1 = and i64 %idx1, u0xffffffffffffff00
+ %masked.idx2 = and i64 %idx2, u0xffff
+ %idx1.add = add i64 %masked.idx1, 2
+ %idx2.add = add i64 %masked.idx2, -1
+ %arrayidx = getelementptr inbounds [10 x [20 x i32]], ptr %p, i64 0, i64 %idx1.add, i64 %idx2.add
+ ret ptr %arrayidx
}
>From d467f0def6ff9f8c0ab23a80be24271b84f5ed37 Mon Sep 17 00:00:00 2001
From: Fabian Ritter <fabian.ritter at amd.com>
Date: Fri, 21 Mar 2025 09:33:18 -0400
Subject: [PATCH 3/4] Use isKnownNonNegative, add available information to the
SimplifyQuery
---
llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
index 7f93115499bc9..54459fd08d94c 100644
--- a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
+++ b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
@@ -1082,7 +1082,10 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
RecursivelyDeleteTriviallyDeadInstructions(UserChainTail);
RecursivelyDeleteTriviallyDeadInstructions(OldIdx);
MayRecoverInbounds =
- MayRecoverInbounds && computeKnownBits(NewIdx, *DL).isNonNegative();
+ MayRecoverInbounds &&
+ isKnownNonNegative(
+ NewIdx,
+ SimplifyQuery(*DL, TLI, DT, /*AC=*/nullptr, /*CXTI=*/GEP));
}
}
}
>From 40889cc447b19d0b9195f4743636a13b0f3c4056 Mon Sep 17 00:00:00 2001
From: Fabian Ritter <fabian.ritter at amd.com>
Date: Thu, 27 Mar 2025 05:00:29 -0400
Subject: [PATCH 4/4] Preserve more GEP flags based on NUW
---
.../Scalar/SeparateConstOffsetFromGEP.cpp | 94 +++-
.../AMDGPU/preserve-inbounds.ll | 405 +++++++++++++++++-
2 files changed, 480 insertions(+), 19 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
index 54459fd08d94c..f0df0063082bd 100644
--- a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
+++ b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
@@ -235,8 +235,10 @@ class ConstantOffsetExtractor {
/// \p GEP The given GEP
/// \p UserChainTail Outputs the tail of UserChain so that we can
/// garbage-collect unused instructions in UserChain.
+ /// \p PreservesNUW Outputs whether the extraction allows preserving the
+ /// GEP's nuw flag, if it has one.
static Value *Extract(Value *Idx, GetElementPtrInst *GEP,
- User *&UserChainTail);
+ User *&UserChainTail, bool &PreservesNUW);
/// Looks for a constant offset from the given GEP index without extracting
/// it. It returns the numeric value of the extracted constant offset (0 if
@@ -267,6 +269,13 @@ class ConstantOffsetExtractor {
APInt findInEitherOperand(BinaryOperator *BO, bool SignExtended,
bool ZeroExtended);
+ /// A helper function to check if a subsequent call to rebuildWithoutConst
+ /// will allow preserving the GEP's nuw flag. That is the case if all
+ /// reassociated binary operations are add nuw and no non-nuw trunc is
+ /// distributed through an add.
+ /// Can only be called after find has populated the UserChain.
+ bool checkRebuildingPreservesNUW() const;
+
/// After finding the constant offset C from the GEP index I, we build a new
/// index I' s.t. I' + C = I. This function builds and returns the new
/// index I' according to UserChain produced by function "find".
@@ -676,6 +685,30 @@ Value *ConstantOffsetExtractor::applyExts(Value *V) {
return Current;
}
+bool ConstantOffsetExtractor::checkRebuildingPreservesNUW() const {
+ auto AllowsPreservingNUW = [](User *U) {
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(U)) {
+ auto Opcode = BO->getOpcode();
+ if (Opcode == BinaryOperator::Or) {
+ // Ors are only considered here if they are disjoint. The addition that
+ // they represent in this case is NUW.
+ assert(cast<PossiblyDisjointInst>(BO)->isDisjoint());
+ return true;
+ }
+ return Opcode == BinaryOperator::Add && BO->hasNoUnsignedWrap();
+ }
+ // UserChain can only contain ConstantInt, CastInst, or BinaryOperator.
+ // Among the possible CastInsts, only trunc without nuw is a problem: If it
+ // is distributed through an add nuw, wrapping may occur:
+ // "add nuw trunc(a), trunc(b)" is more poisonous than "trunc(add nuw a, b)"
+ if (TruncInst *TI = dyn_cast<TruncInst>(U))
+ return TI->hasNoUnsignedWrap();
+ return true;
+ };
+
+ return all_of(UserChain, AllowsPreservingNUW);
+}
+
Value *ConstantOffsetExtractor::rebuildWithoutConstOffset() {
distributeExtsAndCloneChain(UserChain.size() - 1);
// Remove all nullptrs (used to be s/zext) from UserChain.
@@ -779,7 +812,8 @@ Value *ConstantOffsetExtractor::removeConstOffset(unsigned ChainIndex) {
}
Value *ConstantOffsetExtractor::Extract(Value *Idx, GetElementPtrInst *GEP,
- User *&UserChainTail) {
+ User *&UserChainTail,
+ bool &PreservesNUW) {
ConstantOffsetExtractor Extractor(GEP->getIterator());
// Find a non-zero constant offset first.
APInt ConstantOffset =
@@ -787,8 +821,12 @@ Value *ConstantOffsetExtractor::Extract(Value *Idx, GetElementPtrInst *GEP,
GEP->isInBounds());
if (ConstantOffset == 0) {
UserChainTail = nullptr;
+ PreservesNUW = true;
return nullptr;
}
+
+ PreservesNUW = Extractor.checkRebuildingPreservesNUW();
+
// Separates the constant offset from the GEP index.
Value *IdxWithoutConstOffset = Extractor.rebuildWithoutConstOffset();
UserChainTail = Extractor.UserChain.back();
@@ -1052,7 +1090,9 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
}
}
- bool MayRecoverInbounds = AccumulativeByteOffset >= 0 && GEP->isInBounds();
+ // Track information for preserving GEP flags.
+ bool AllOffsetsNonNegative = AccumulativeByteOffset >= 0;
+ bool AllNUWPreserved = true;
// Remove the constant offset in each sequential index. The resultant GEP
// computes the variadic base.
@@ -1072,8 +1112,9 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
// uses the variadic part as the new index.
Value *OldIdx = GEP->getOperand(I);
User *UserChainTail;
- Value *NewIdx =
- ConstantOffsetExtractor::Extract(OldIdx, GEP, UserChainTail);
+ bool PreservesNUW;
+ Value *NewIdx = ConstantOffsetExtractor::Extract(
+ OldIdx, GEP, UserChainTail, PreservesNUW);
if (NewIdx != nullptr) {
// Switches to the index with the constant offset removed.
GEP->setOperand(I, NewIdx);
@@ -1081,11 +1122,9 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
// and the old index if they are not used.
RecursivelyDeleteTriviallyDeadInstructions(UserChainTail);
RecursivelyDeleteTriviallyDeadInstructions(OldIdx);
- MayRecoverInbounds =
- MayRecoverInbounds &&
- isKnownNonNegative(
- NewIdx,
- SimplifyQuery(*DL, TLI, DT, /*AC=*/nullptr, /*CXTI=*/GEP));
+ AllOffsetsNonNegative =
+ AllOffsetsNonNegative && isKnownNonNegative(NewIdx, *DL);
+ AllNUWPreserved &= PreservesNUW;
}
}
}
@@ -1106,15 +1145,34 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
// inbounds keyword is not present, the offsets are added to the base
// address with silently-wrapping two's complement arithmetic".
// Therefore, the final code will be a semantically equivalent.
- //
- // If the initial GEP was inbounds and all variable indices and the
+ GEPNoWrapFlags NewGEPFlags = GEPNoWrapFlags::none();
+
+ // If the initial GEP was inbounds/nusw and all variable indices and the
// accumulated offsets are non-negative, they can be added in any order and
- // the intermediate results are in bounds. So, we can preserve the inbounds
- // flag for both GEPs. GEPs with inbounds are more friendly to alias analysis.
- //
- // TODO(gep_nowrap): Preserve nuw?
- GEPNoWrapFlags NewGEPFlags =
- MayRecoverInbounds ? GEPNoWrapFlags::inBounds() : GEPNoWrapFlags::none();
+ // the intermediate results are in bounds and don't overflow in a nusw sense.
+ // So, we can preserve the inbounds/nusw flag for both GEPs.
+ bool CanPreserveInBoundsNUSW = AllOffsetsNonNegative;
+
+ // If the initial GEP was NUW and all operations that we reassociate were NUW
+ // additions, the resulting GEPs are also NUW.
+ if (GEP->hasNoUnsignedWrap() && AllNUWPreserved) {
+ NewGEPFlags |= GEPNoWrapFlags::noUnsignedWrap();
+ // If the initial GEP additionally had NUSW (or inbounds, which implies
+ // NUSW), we know that the indices in the initial GEP must all have their
+ // signbit not set. For indices that are the result of NUW adds, the
+ // add-operands therefore also don't have their signbit set. Therefore, all
+ // indices of the resulting GEPs are non-negative -> we can preserve
+ // the inbounds/nusw flag.
+ CanPreserveInBoundsNUSW |= GEP->hasNoUnsignedSignedWrap();
+ }
+
+ if (CanPreserveInBoundsNUSW) {
+ if (GEP->isInBounds())
+ NewGEPFlags |= GEPNoWrapFlags::inBounds();
+ else if (GEP->hasNoUnsignedSignedWrap())
+ NewGEPFlags |= GEPNoWrapFlags::noUnsignedSignedWrap();
+ }
+
GEP->setNoWrapFlags(NewGEPFlags);
// Lowers a GEP to either GEPs with a single index or arithmetic operations.
diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/preserve-inbounds.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/preserve-inbounds.ll
index 6865fcf663c58..a4f3e2b913116 100644
--- a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/preserve-inbounds.ll
+++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/preserve-inbounds.ll
@@ -1,5 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -passes=separate-const-offset-from-gep -S | FileCheck %s
+; RUN: opt < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -passes=separate-const-offset-from-gep -S | FileCheck %s
+
+; gfx1200 is particularly interesting since it allows negative immediate offsets
+; in flat instructions, so the transformation is applied in more cases.
; The inbounds flags cannot be preserved here: If the pointers point to the
; beginning of an object and %i is 1, the intermediate GEPs are out of bounds.
@@ -137,3 +140,403 @@ entry:
%arrayidx = getelementptr inbounds [10 x [20 x i32]], ptr %p, i64 0, i64 %idx1.add, i64 %idx2.add
ret ptr %arrayidx
}
+
+
+define ptr @nuw_implies_nuw(ptr %p, i64 %i) {
+; CHECK-LABEL: @nuw_implies_nuw(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr nuw i32, ptr [[P:%.*]], i64 [[I:%.*]]
+; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr nuw i8, ptr [[TMP0]], i64 4
+; CHECK-NEXT: ret ptr [[ARRAYIDX2]]
+;
+entry:
+ %idx = add nuw i64 %i, 1
+ %arrayidx = getelementptr nuw i32, ptr %p, i64 %idx
+ ret ptr %arrayidx
+}
+
+define ptr @nuw_implies_nuw_negative(ptr %p, i64 %i) {
+; CHECK-LABEL: @nuw_implies_nuw_negative(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr nuw i32, ptr [[P:%.*]], i64 [[I:%.*]]
+; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr nuw i8, ptr [[TMP0]], i64 -64
+; CHECK-NEXT: ret ptr [[ARRAYIDX2]]
+;
+entry:
+ %idx = add nuw i64 %i, -16
+ %arrayidx = getelementptr nuw i32, ptr %p, i64 %idx
+ ret ptr %arrayidx
+}
+
+define ptr @nuw_inbounds_implies_nuw_inbounds(ptr %p, i64 %i) {
+; CHECK-LABEL: @nuw_inbounds_implies_nuw_inbounds(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw i32, ptr [[P:%.*]], i64 [[I:%.*]]
+; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 4
+; CHECK-NEXT: ret ptr [[ARRAYIDX2]]
+;
+entry:
+ %idx = add nuw i64 %i, 1
+ %arrayidx = getelementptr inbounds nuw i32, ptr %p, i64 %idx
+ ret ptr %arrayidx
+}
+
+; This is poison anyway, so we can preserve the flags.
+define ptr @nuw_inbounds_implies_nuw_inbounds_negative(ptr %p, i64 %i) {
+; CHECK-LABEL: @nuw_inbounds_implies_nuw_inbounds_negative(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw i32, ptr [[P:%.*]], i64 [[I:%.*]]
+; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 -64
+; CHECK-NEXT: ret ptr [[ARRAYIDX2]]
+;
+entry:
+ %idx = add nuw i64 %i, -16
+ %arrayidx = getelementptr inbounds nuw i32, ptr %p, i64 %idx
+ ret ptr %arrayidx
+}
+
+define ptr @nuw_nusw_implies_nuw_nusw(ptr %p, i64 %i) {
+; CHECK-LABEL: @nuw_nusw_implies_nuw_nusw(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr nusw nuw i32, ptr [[P:%.*]], i64 [[I:%.*]]
+; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr nusw nuw i8, ptr [[TMP0]], i64 4
+; CHECK-NEXT: ret ptr [[ARRAYIDX2]]
+;
+entry:
+ %idx = add nuw i64 %i, 1
+ %arrayidx = getelementptr nusw nuw i32, ptr %p, i64 %idx
+ ret ptr %arrayidx
+}
+
+; Also poison.
+define ptr @nuw_implies_nuw_nusw_negative(ptr %p, i64 %i) {
+; CHECK-LABEL: @nuw_implies_nuw_nusw_negative(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr nusw nuw i32, ptr [[P:%.*]], i64 [[I:%.*]]
+; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr nusw nuw i8, ptr [[TMP0]], i64 -64
+; CHECK-NEXT: ret ptr [[ARRAYIDX2]]
+;
+entry:
+ %idx = add nuw i64 %i, -16
+ %arrayidx = getelementptr nusw nuw i32, ptr %p, i64 %idx
+ ret ptr %arrayidx
+}
+
+
+define ptr @nuw_inbounds_implies_nuw_inbounds_ordisjoint(ptr %p, i64 %i) {
+; CHECK-LABEL: @nuw_inbounds_implies_nuw_inbounds_ordisjoint(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw i32, ptr [[P:%.*]], i64 [[I:%.*]]
+; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 4
+; CHECK-NEXT: ret ptr [[ARRAYIDX2]]
+;
+entry:
+ %idx = or disjoint i64 %i, 1
+ %arrayidx = getelementptr inbounds nuw i32, ptr %p, i64 %idx
+ ret ptr %arrayidx
+}
+
+; This is poison anyway, so we can do the transformation.
+define ptr @nuw_inbounds_implies_nuw_inbounds_ordisjoint_negative(ptr %p, i64 %i) {
+; CHECK-LABEL: @nuw_inbounds_implies_nuw_inbounds_ordisjoint_negative(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw i32, ptr [[P:%.*]], i64 [[I:%.*]]
+; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 -64
+; CHECK-NEXT: ret ptr [[ARRAYIDX2]]
+;
+entry:
+ %idx = or disjoint i64 %i, -16
+ %arrayidx = getelementptr inbounds nuw i32, ptr %p, i64 %idx
+ ret ptr %arrayidx
+}
+
+; Check that nothing happens for non-disjoint ors
+define ptr @or_no_disjoint(ptr %p, i64 %i) {
+; CHECK-LABEL: @or_no_disjoint(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[IDX:%.*]] = or i64 [[I:%.*]], 1
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[P:%.*]], i64 [[IDX]]
+; CHECK-NEXT: ret ptr [[ARRAYIDX]]
+;
+entry:
+ %idx = or i64 %i, 1
+ %arrayidx = getelementptr inbounds nuw i32, ptr %p, i64 %idx
+ ret ptr %arrayidx
+}
+
+define ptr @no_nuw_inbounds_for_sub(ptr %p, i64 %i) {
+; CHECK-LABEL: @no_nuw_inbounds_for_sub(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[I:%.*]]
+; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr i8, ptr [[TMP0]], i64 -4
+; CHECK-NEXT: ret ptr [[ARRAYIDX2]]
+;
+entry:
+ %idx = sub nuw i64 %i, 1
+ %arrayidx = getelementptr inbounds nuw i32, ptr %p, i64 %idx
+ ret ptr %arrayidx
+}
+
+define ptr @no_nuw_inbounds_for_sub_negative(ptr %p, i64 %i) {
+; CHECK-LABEL: @no_nuw_inbounds_for_sub_negative(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[I:%.*]]
+; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr i8, ptr [[TMP0]], i64 64
+; CHECK-NEXT: ret ptr [[ARRAYIDX2]]
+;
+entry:
+ %idx = sub nuw i64 %i, -16
+ %arrayidx = getelementptr inbounds nuw i32, ptr %p, i64 %idx
+ ret ptr %arrayidx
+}
+
+; Can't preserved nuw and other flags here as distributing the trunc towards the
+; leaves can introduce new wraps.
+define ptr @nuw_inbounds_trunc(ptr %p, i128 %i) {
+; CHECK-LABEL: @nuw_inbounds_trunc(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = trunc i128 [[I:%.*]] to i64
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[TMP0]]
+; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr i8, ptr [[TMP1]], i64 4
+; CHECK-NEXT: ret ptr [[ARRAYIDX2]]
+;
+entry:
+ %idx = add nuw i128 %i, 1
+ %idx.conv = trunc i128 %idx to i64
+ %arrayidx = getelementptr inbounds nuw i32, ptr %p, i64 %idx.conv
+ ret ptr %arrayidx
+}
+
+; trunc nuw is not a problem.
+define ptr @nuw_inbounds_implies_nuw_inbounds_trunc_nuw(ptr %p, i128 %i) {
+; CHECK-LABEL: @nuw_inbounds_implies_nuw_inbounds_trunc_nuw(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = trunc nuw i128 [[I:%.*]] to i64
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i32, ptr [[P:%.*]], i64 [[TMP0]]
+; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 4
+; CHECK-NEXT: ret ptr [[ARRAYIDX2]]
+;
+entry:
+ %idx = add nuw i128 %i, 1
+ %idx.conv = trunc nuw i128 %idx to i64
+ %arrayidx = getelementptr inbounds nuw i32, ptr %p, i64 %idx.conv
+ ret ptr %arrayidx
+}
+
+define ptr @nuw_inbounds_implies_nuw_inbounds_sext(ptr %p, i32 %i) {
+; CHECK-LABEL: @nuw_inbounds_implies_nuw_inbounds_sext(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[I:%.*]] to i64
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i32, ptr [[P:%.*]], i64 [[TMP0]]
+; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 4
+; CHECK-NEXT: ret ptr [[ARRAYIDX2]]
+;
+entry:
+ %idx = add nuw i32 %i, 1
+ %idx.conv = sext i32 %idx to i64
+ %arrayidx = getelementptr inbounds nuw i32, ptr %p, i64 %idx.conv
+ ret ptr %arrayidx
+}
+
+define ptr @nuw_inbounds_implies_nuw_inbounds_zext(ptr %p, i32 %i) {
+; CHECK-LABEL: @nuw_inbounds_implies_nuw_inbounds_zext(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[I:%.*]] to i64
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i32, ptr [[P:%.*]], i64 [[TMP0]]
+; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 4
+; CHECK-NEXT: ret ptr [[ARRAYIDX2]]
+;
+entry:
+ %idx = add nuw i32 %i, 1
+ %idx.conv = zext i32 %idx to i64
+ %arrayidx = getelementptr inbounds nuw i32, ptr %p, i64 %idx.conv
+ ret ptr %arrayidx
+}
+
+define ptr @nuw_inbounds_implies_nuw_inbounds_zext_negative(ptr %p, i8 %i) {
+; CHECK-LABEL: @nuw_inbounds_implies_nuw_inbounds_zext_negative(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[IDX_CONV:%.*]] = zext i8 [[I:%.*]] to i64
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[P:%.*]], i64 [[IDX_CONV]]
+; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX]], i64 960
+; CHECK-NEXT: ret ptr [[ARRAYIDX2]]
+;
+entry:
+ %idx = add nuw i8 %i, -16
+ %idx.conv = zext i8 %idx to i64
+ %arrayidx = getelementptr inbounds nuw i32, ptr %p, i64 %idx.conv
+ ret ptr %arrayidx
+}
+
+
+; This test and the following ones mask most bits of %v off to facilitate
+; validation with alive2 while still allowing interesting values.
+define ptr @nuw_inbounds_implies_nuw_inbounds_nested(ptr %p, i64 %i, i64 %v) {
+; CHECK-LABEL: @nuw_inbounds_implies_nuw_inbounds_nested(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[V_MASKED:%.*]] = and i64 [[V:%.*]], -1152921488500719601
+; CHECK-NEXT: [[IDX22:%.*]] = add i64 [[I:%.*]], [[V_MASKED]]
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw i32, ptr [[P:%.*]], i64 [[IDX22]]
+; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 4
+; CHECK-NEXT: ret ptr [[ARRAYIDX3]]
+;
+entry:
+ %v.masked = and i64 %v, u0xf0000003c000000f
+ %idx1 = add nuw i64 %i, 1
+ %idx2 = add nuw i64 %idx1, %v.masked
+ %arrayidx = getelementptr inbounds nuw i32, ptr %p, i64 %idx2
+ ret ptr %arrayidx
+}
+
+define ptr @nuw_inbounds_implies_nuw_inbounds_nested_negative(ptr %p, i64 %i, i64 %v) {
+; CHECK-LABEL: @nuw_inbounds_implies_nuw_inbounds_nested_negative(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[V:%.*]] = and i64 [[V1:%.*]], -1152921488500719601
+; CHECK-NEXT: [[IDX22:%.*]] = add i64 [[I:%.*]], [[V]]
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw i32, ptr [[P:%.*]], i64 [[IDX22]]
+; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 4
+; CHECK-NEXT: ret ptr [[ARRAYIDX3]]
+;
+entry:
+ %v.masked = and i64 %v, u0xf0000003c000000f
+ %idx1 = add nuw i64 %i, 1
+ %idx2 = add nuw i64 %idx1, %v.masked
+ %arrayidx = getelementptr inbounds nuw i32, ptr %p, i64 %idx2
+ ret ptr %arrayidx
+}
+
+define ptr @nuw_implies_nuw_nested(ptr %p, i64 %i, i64 %v) {
+; CHECK-LABEL: @nuw_implies_nuw_nested(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[V:%.*]] = and i64 [[V1:%.*]], -1152921488500719601
+; CHECK-NEXT: [[IDX22:%.*]] = add i64 [[I:%.*]], [[V]]
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr nuw i32, ptr [[P:%.*]], i64 [[IDX22]]
+; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr nuw i8, ptr [[TMP0]], i64 4
+; CHECK-NEXT: ret ptr [[ARRAYIDX3]]
+;
+entry:
+ %v.masked = and i64 %v, u0xf0000003c000000f
+ %idx1 = add nuw i64 %i, 1
+ %idx2 = add nuw i64 %idx1, %v.masked
+ %arrayidx = getelementptr nuw i32, ptr %p, i64 %idx2
+ ret ptr %arrayidx
+}
+
+define ptr @nuw_implies_nuw_nested_negative(ptr %p, i64 %i, i64 %v) {
+; CHECK-LABEL: @nuw_implies_nuw_nested_negative(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[V:%.*]] = and i64 [[V1:%.*]], -1152921488500719601
+; CHECK-NEXT: [[IDX22:%.*]] = add i64 [[I:%.*]], [[V]]
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr nuw i32, ptr [[P:%.*]], i64 [[IDX22]]
+; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr nuw i8, ptr [[TMP0]], i64 -64
+; CHECK-NEXT: ret ptr [[ARRAYIDX3]]
+;
+entry:
+ %v.masked = and i64 %v, u0xf0000003c000000f
+ %idx1 = add nuw i64 %i, -16
+ %idx2 = add nuw i64 %idx1, %v.masked
+ %arrayidx = getelementptr nuw i32, ptr %p, i64 %idx2
+ ret ptr %arrayidx
+}
+
+define ptr @nuw_nusw_implies_nuw_nusw_nested(ptr %p, i64 %i, i64 %v) {
+; CHECK-LABEL: @nuw_nusw_implies_nuw_nusw_nested(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[V:%.*]] = and i64 [[V1:%.*]], -1152921488500719601
+; CHECK-NEXT: [[IDX22:%.*]] = add i64 [[I:%.*]], [[V]]
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr nusw nuw i32, ptr [[P:%.*]], i64 [[IDX22]]
+; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr nusw nuw i8, ptr [[TMP0]], i64 4
+; CHECK-NEXT: ret ptr [[ARRAYIDX3]]
+;
+entry:
+ %v.masked = and i64 %v, u0xf0000003c000000f
+ %idx1 = add nuw i64 %i, 1
+ %idx2 = add nuw i64 %idx1, %v.masked
+ %arrayidx = getelementptr nusw nuw i32, ptr %p, i64 %idx2
+ ret ptr %arrayidx
+}
+
+define ptr @nuw_nusw_implies_nuw_nusw_nested_negative(ptr %p, i64 %i, i64 %v) {
+; CHECK-LABEL: @nuw_nusw_implies_nuw_nusw_nested_negative(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[V:%.*]] = and i64 [[V1:%.*]], -1152921488500719601
+; CHECK-NEXT: [[IDX22:%.*]] = add i64 [[I:%.*]], [[V]]
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr nusw nuw i32, ptr [[P:%.*]], i64 [[IDX22]]
+; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr nusw nuw i8, ptr [[TMP0]], i64 4
+; CHECK-NEXT: ret ptr [[ARRAYIDX3]]
+;
+entry:
+ %v.masked = and i64 %v, u0xf0000003c000000f
+ %idx1 = add nuw i64 %i, 1
+ %idx2 = add nuw i64 %idx1, %v.masked
+ %arrayidx = getelementptr nusw nuw i32, ptr %p, i64 %idx2
+ ret ptr %arrayidx
+}
+
+
+; Neither inbounds nor nuw can be preserved.
+define ptr @nuw_inbounds_nested_not_all_nuw(ptr %p, i64 %i, i64 %v) {
+; CHECK-LABEL: @nuw_inbounds_nested_not_all_nuw(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[V:%.*]] = and i64 [[V1:%.*]], -1152921488500719601
+; CHECK-NEXT: [[IDX22:%.*]] = add i64 [[I:%.*]], [[V]]
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[IDX22]]
+; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr i8, ptr [[TMP0]], i64 4
+; CHECK-NEXT: ret ptr [[ARRAYIDX3]]
+;
+entry:
+ %v.masked = and i64 %v, u0xf0000003c000000f
+ %idx1 = add nuw i64 %i, 1
+ %idx2 = add i64 %idx1, %v.masked
+ %arrayidx = getelementptr inbounds nuw i32, ptr %p, i64 %idx2
+ ret ptr %arrayidx
+}
+
+
+define ptr @nuw_inbounds_implies_nuw_inbounds_multilevel(ptr %src, i64 %i1, i64 %i2) {
+; CHECK-LABEL: @nuw_inbounds_implies_nuw_inbounds_multilevel(
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [4 x i32], ptr [[SRC:%.*]], i64 [[I1:%.*]], i64 [[I2:%.*]]
+; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 24
+; CHECK-NEXT: ret ptr [[ARRAYIDX3]]
+;
+ %idx1 = add nuw i64 %i1, 1
+ %idx2 = add nuw i64 2, %i2
+ %arrayidx = getelementptr inbounds nuw [4 x i32], ptr %src, i64 %idx1, i64 %idx2
+ ret ptr %arrayidx
+}
+
+; Neither inbounds nor nuw can be preserved.
+define ptr @nuw_inbounds_multilevel_not_all_nuw(ptr %src, i64 %i1, i64 %i2) {
+; CHECK-LABEL: @nuw_inbounds_multilevel_not_all_nuw(
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [4 x i32], ptr [[SRC:%.*]], i64 [[I1:%.*]], i64 [[I2:%.*]]
+; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr i8, ptr [[TMP1]], i64 24
+; CHECK-NEXT: ret ptr [[ARRAYIDX3]]
+;
+ %idx1 = add nuw i64 %i1, 1
+ %idx2 = add i64 2, %i2
+ %arrayidx = getelementptr inbounds nuw [4 x i32], ptr %src, i64 %idx1, i64 %idx2
+ ret ptr %arrayidx
+}
+
+; Missing information about non-extracted indices does not matter.
+define ptr @nuw_inbounds_implies_nuw_inbounds_multilevel_one_unfolded(ptr %src, i64 %i1, i64 %v) {
+; CHECK-LABEL: @nuw_inbounds_implies_nuw_inbounds_multilevel_one_unfolded(
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [4 x i32], ptr [[SRC:%.*]], i64 [[I1:%.*]], i64 [[V:%.*]]
+; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 16
+; CHECK-NEXT: ret ptr [[ARRAYIDX2]]
+;
+ %idx1 = add nuw i64 %i1, 1
+ %arrayidx = getelementptr inbounds nuw [4 x i32], ptr %src, i64 %idx1, i64 %v
+ ret ptr %arrayidx
+}
+
+define ptr @nuw_inbounds_implies_nuw_inbounds_multilevel_other_unfolded(ptr %src, i64 %i1, i64 %v) {
+; CHECK-LABEL: @nuw_inbounds_implies_nuw_inbounds_multilevel_other_unfolded(
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [4 x i32], ptr [[SRC:%.*]], i64 [[V:%.*]], i64 [[I1:%.*]]
+; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 4
+; CHECK-NEXT: ret ptr [[ARRAYIDX2]]
+;
+ %idx1 = add nuw i64 %i1, 1
+ %arrayidx = getelementptr inbounds nuw [4 x i32], ptr %src, i64 %v, i64 %idx1
+ ret ptr %arrayidx
+}
More information about the llvm-commits
mailing list