[llvm] [SeparateConstOffsetFromGEP] Preserve inbounds flag based on ValueTracking (PR #130617)

Fabian Ritter via llvm-commits llvm-commits at lists.llvm.org
Fri Mar 21 06:35:50 PDT 2025


https://github.com/ritter-x2a updated https://github.com/llvm/llvm-project/pull/130617

>From 5d4946dfcda1a46f88541c4586214e41b4eccf80 Mon Sep 17 00:00:00 2001
From: Fabian Ritter <fabian.ritter at amd.com>
Date: Mon, 10 Mar 2025 06:55:10 -0400
Subject: [PATCH 1/3] [SeparateConstOffsetFromGEP] Preserve inbounds flag based
 on ValueTracking

If we know that the initial GEP was inbounds, and we change it to a
sequence of GEPs from the same base pointer where every offset is
non-negative, then the new GEPs are inbounds.

For SWDEV-516125.
---
 .../Scalar/SeparateConstOffsetFromGEP.cpp     | 18 +++++++++++----
 .../AMDGPU/preserve-inbounds.ll               | 23 +++++++++++++++++++
 .../NVPTX/split-gep-and-gvn.ll                | 16 ++++++-------
 .../NVPTX/split-gep.ll                        |  8 +++----
 4 files changed, 48 insertions(+), 17 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
index ab8e979e7b40a..7f93115499bc9 100644
--- a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
+++ b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
@@ -1052,6 +1052,8 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
     }
   }
 
+  bool MayRecoverInbounds = AccumulativeByteOffset >= 0 && GEP->isInBounds();
+
   // Remove the constant offset in each sequential index. The resultant GEP
   // computes the variadic base.
   // Notice that we don't remove struct field indices here. If LowerGEP is
@@ -1079,6 +1081,8 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
         // and the old index if they are not used.
         RecursivelyDeleteTriviallyDeadInstructions(UserChainTail);
         RecursivelyDeleteTriviallyDeadInstructions(OldIdx);
+        MayRecoverInbounds =
+            MayRecoverInbounds && computeKnownBits(NewIdx, *DL).isNonNegative();
       }
     }
   }
@@ -1100,11 +1104,15 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
   // address with silently-wrapping two's complement arithmetic".
   // Therefore, the final code will be a semantically equivalent.
   //
-  // TODO(jingyue): do some range analysis to keep as many inbounds as
-  // possible. GEPs with inbounds are more friendly to alias analysis.
-  // TODO(gep_nowrap): Preserve nuw at least.
-  GEPNoWrapFlags NewGEPFlags = GEPNoWrapFlags::none();
-  GEP->setNoWrapFlags(GEPNoWrapFlags::none());
+  // If the initial GEP was inbounds and all variable indices and the
+  // accumulated offsets are non-negative, they can be added in any order and
+  // the intermediate results are in bounds. So, we can preserve the inbounds
+  // flag for both GEPs. GEPs with inbounds are more friendly to alias analysis.
+  //
+  // TODO(gep_nowrap): Preserve nuw?
+  GEPNoWrapFlags NewGEPFlags =
+      MayRecoverInbounds ? GEPNoWrapFlags::inBounds() : GEPNoWrapFlags::none();
+  GEP->setNoWrapFlags(NewGEPFlags);
 
   // Lowers a GEP to either GEPs with a single index or arithmetic operations.
   if (LowerGEP) {
diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/preserve-inbounds.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/preserve-inbounds.ll
index 422e5d8215502..01619aa481ddd 100644
--- a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/preserve-inbounds.ll
+++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/preserve-inbounds.ll
@@ -16,3 +16,26 @@ entry:
   %arrayidx = getelementptr inbounds i32, ptr %p, i64 %idx
   ret ptr %arrayidx
 }
+
+; All offsets must be positive, so inbounds can be preserved.
+define void @must_be_inbounds(ptr %dst, ptr %src, i32 %i) {
+; CHECK-LABEL: @must_be_inbounds(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[I_PROM:%.*]] = zext i32 [[I:%.*]] to i64
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], i64 [[I_PROM]]
+; CHECK-NEXT:    [[ARRAYIDX_SRC2:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[ARRAYIDX_SRC2]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[DST:%.*]], i64 [[I_PROM]]
+; CHECK-NEXT:    [[ARRAYIDX_DST4:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 4
+; CHECK-NEXT:    store float [[TMP1]], ptr [[ARRAYIDX_DST4]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %i.prom = zext i32 %i to i64
+  %idx = add nsw i64 %i.prom, 1
+  %arrayidx.src = getelementptr inbounds float, ptr %src, i64 %idx
+  %3 = load float, ptr %arrayidx.src, align 4
+  %arrayidx.dst = getelementptr inbounds float, ptr %dst, i64 %idx
+  store float %3, ptr %arrayidx.dst, align 4
+  ret void
+}
diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll
index 9a73feb2c4b5c..4474585bf9b06 100644
--- a/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll
+++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll
@@ -157,19 +157,19 @@ define void @sum_of_array3(i32 %x, i32 %y, ptr nocapture %output) {
 ; IR-NEXT:  .preheader:
 ; IR-NEXT:    [[TMP0:%.*]] = zext i32 [[Y]] to i64
 ; IR-NEXT:    [[TMP1:%.*]] = zext i32 [[X]] to i64
-; IR-NEXT:    [[TMP2:%.*]] = getelementptr [32 x [32 x float]], ptr addrspace(3) @array, i64 0, i64 [[TMP1]], i64 [[TMP0]]
+; IR-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [32 x [32 x float]], ptr addrspace(3) @array, i64 0, i64 [[TMP1]], i64 [[TMP0]]
 ; IR-NEXT:    [[TMP3:%.*]] = addrspacecast ptr addrspace(3) [[TMP2]] to ptr
 ; IR-NEXT:    [[TMP4:%.*]] = load float, ptr [[TMP3]], align 4
 ; IR-NEXT:    [[TMP5:%.*]] = fadd float [[TMP4]], 0.000000e+00
-; IR-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP2]], i64 4
+; IR-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP2]], i64 4
 ; IR-NEXT:    [[TMP7:%.*]] = addrspacecast ptr addrspace(3) [[TMP6]] to ptr
 ; IR-NEXT:    [[TMP8:%.*]] = load float, ptr [[TMP7]], align 4
 ; IR-NEXT:    [[TMP9:%.*]] = fadd float [[TMP5]], [[TMP8]]
-; IR-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP2]], i64 128
+; IR-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP2]], i64 128
 ; IR-NEXT:    [[TMP11:%.*]] = addrspacecast ptr addrspace(3) [[TMP10]] to ptr
 ; IR-NEXT:    [[TMP12:%.*]] = load float, ptr [[TMP11]], align 4
 ; IR-NEXT:    [[TMP13:%.*]] = fadd float [[TMP9]], [[TMP12]]
-; IR-NEXT:    [[TMP14:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP2]], i64 132
+; IR-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP2]], i64 132
 ; IR-NEXT:    [[TMP15:%.*]] = addrspacecast ptr addrspace(3) [[TMP14]] to ptr
 ; IR-NEXT:    [[TMP16:%.*]] = load float, ptr [[TMP15]], align 4
 ; IR-NEXT:    [[TMP17:%.*]] = fadd float [[TMP13]], [[TMP16]]
@@ -224,19 +224,19 @@ define void @sum_of_array4(i32 %x, i32 %y, ptr nocapture %output) {
 ; IR-NEXT:  .preheader:
 ; IR-NEXT:    [[TMP0:%.*]] = zext i32 [[Y]] to i64
 ; IR-NEXT:    [[TMP1:%.*]] = zext i32 [[X]] to i64
-; IR-NEXT:    [[TMP2:%.*]] = getelementptr [32 x [32 x float]], ptr addrspace(3) @array, i64 0, i64 [[TMP1]], i64 [[TMP0]]
+; IR-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [32 x [32 x float]], ptr addrspace(3) @array, i64 0, i64 [[TMP1]], i64 [[TMP0]]
 ; IR-NEXT:    [[TMP3:%.*]] = addrspacecast ptr addrspace(3) [[TMP2]] to ptr
 ; IR-NEXT:    [[TMP4:%.*]] = load float, ptr [[TMP3]], align 4
 ; IR-NEXT:    [[TMP5:%.*]] = fadd float [[TMP4]], 0.000000e+00
-; IR-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP2]], i64 4
+; IR-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP2]], i64 4
 ; IR-NEXT:    [[TMP7:%.*]] = addrspacecast ptr addrspace(3) [[TMP6]] to ptr
 ; IR-NEXT:    [[TMP8:%.*]] = load float, ptr [[TMP7]], align 4
 ; IR-NEXT:    [[TMP9:%.*]] = fadd float [[TMP5]], [[TMP8]]
-; IR-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP2]], i64 128
+; IR-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP2]], i64 128
 ; IR-NEXT:    [[TMP11:%.*]] = addrspacecast ptr addrspace(3) [[TMP10]] to ptr
 ; IR-NEXT:    [[TMP12:%.*]] = load float, ptr [[TMP11]], align 4
 ; IR-NEXT:    [[TMP13:%.*]] = fadd float [[TMP9]], [[TMP12]]
-; IR-NEXT:    [[TMP14:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP2]], i64 132
+; IR-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP2]], i64 132
 ; IR-NEXT:    [[TMP15:%.*]] = addrspacecast ptr addrspace(3) [[TMP14]] to ptr
 ; IR-NEXT:    [[TMP16:%.*]] = load float, ptr [[TMP15]], align 4
 ; IR-NEXT:    [[TMP17:%.*]] = fadd float [[TMP13]], [[TMP16]]
diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll
index 77b3434f4f159..da04a6e979425 100644
--- a/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll
+++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll
@@ -372,8 +372,8 @@ define ptr @trunk_explicit(ptr %ptr, i64 %idx) {
 ; CHECK-LABEL: define ptr @trunk_explicit(
 ; CHECK-SAME: ptr [[PTR:%.*]], i64 [[IDX:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT0:%.*]], ptr [[PTR]], i64 0, i32 3, i64 [[IDX]], i32 1
-; CHECK-NEXT:    [[PTR21:%.*]] = getelementptr i8, ptr [[TMP0]], i64 3216
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT0:%.*]], ptr [[PTR]], i64 0, i32 3, i64 [[IDX]], i32 1
+; CHECK-NEXT:    [[PTR21:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 3216
 ; CHECK-NEXT:    ret ptr [[PTR21]]
 ;
 entry:
@@ -389,8 +389,8 @@ define ptr @trunk_long_idx(ptr %ptr, i64 %idx) {
 ; CHECK-LABEL: define ptr @trunk_long_idx(
 ; CHECK-SAME: ptr [[PTR:%.*]], i64 [[IDX:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT0:%.*]], ptr [[PTR]], i64 0, i32 3, i64 [[IDX]], i32 1
-; CHECK-NEXT:    [[PTR21:%.*]] = getelementptr i8, ptr [[TMP0]], i64 3216
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT0:%.*]], ptr [[PTR]], i64 0, i32 3, i64 [[IDX]], i32 1
+; CHECK-NEXT:    [[PTR21:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 3216
 ; CHECK-NEXT:    ret ptr [[PTR21]]
 ;
 entry:

>From 7af5dfd84930aadeb21d5e89cef77bfdf83a2f46 Mon Sep 17 00:00:00 2001
From: Fabian Ritter <fabian.ritter at amd.com>
Date: Tue, 11 Mar 2025 10:05:26 -0400
Subject: [PATCH 2/3] Simplify test case and add more

---
 .../AMDGPU/preserve-inbounds.ll               | 126 ++++++++++++++++--
 1 file changed, 112 insertions(+), 14 deletions(-)

diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/preserve-inbounds.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/preserve-inbounds.ll
index 01619aa481ddd..6865fcf663c58 100644
--- a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/preserve-inbounds.ll
+++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/preserve-inbounds.ll
@@ -17,25 +17,123 @@ entry:
   ret ptr %arrayidx
 }
 
-; All offsets must be positive, so inbounds can be preserved.
-define void @must_be_inbounds(ptr %dst, ptr %src, i32 %i) {
+; All indices must be non-negative, so inbounds can be preserved.
+define ptr @must_be_inbounds(ptr %p, i32 %i) {
 ; CHECK-LABEL: @must_be_inbounds(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[I_PROM:%.*]] = zext i32 [[I:%.*]] to i64
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], i64 [[I_PROM]]
-; CHECK-NEXT:    [[ARRAYIDX_SRC2:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 4
-; CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[ARRAYIDX_SRC2]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[DST:%.*]], i64 [[I_PROM]]
-; CHECK-NEXT:    [[ARRAYIDX_DST4:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 4
-; CHECK-NEXT:    store float [[TMP1]], ptr [[ARRAYIDX_DST4]], align 4
-; CHECK-NEXT:    ret void
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 [[I_PROM]]
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 4
+; CHECK-NEXT:    ret ptr [[ARRAYIDX2]]
 ;
 entry:
   %i.prom = zext i32 %i to i64
   %idx = add nsw i64 %i.prom, 1
-  %arrayidx.src = getelementptr inbounds float, ptr %src, i64 %idx
-  %3 = load float, ptr %arrayidx.src, align 4
-  %arrayidx.dst = getelementptr inbounds float, ptr %dst, i64 %idx
-  store float %3, ptr %arrayidx.dst, align 4
-  ret void
+  %arrayidx = getelementptr inbounds i32, ptr %p, i64 %idx
+  ret ptr %arrayidx
+}
+
+; idx must be non-negative -> preserve inbounds
+define ptr @sign_bit_clear(ptr %p, i64 %i) {
+; CHECK-LABEL: @sign_bit_clear(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[IDX:%.*]] = and i64 [[I:%.*]], 9223372036854775807
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 [[IDX]]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 4
+; CHECK-NEXT:    ret ptr [[ARRAYIDX]]
+;
+entry:
+  %idx = and i64 %i, u0x7fffffffffffffff
+  %idx.add = add i64 %idx, 1
+  %arrayidx = getelementptr inbounds i32, ptr %p, i64 %idx.add
+  ret ptr %arrayidx
+}
+
+; idx may be negative -> don't preserve inbounds
+define ptr @sign_bit_not_clear(ptr %p, i64 %i) {
+; CHECK-LABEL: @sign_bit_not_clear(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[IDX:%.*]] = and i64 [[I:%.*]], -256
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[IDX]]
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr i8, ptr [[TMP0]], i64 4
+; CHECK-NEXT:    ret ptr [[ARRAYIDX2]]
+;
+entry:
+  %idx = and i64 %i, u0xffffffffffffff00
+  %idx.add = add i64 %idx, 1
+  %arrayidx = getelementptr inbounds i32, ptr %p, i64 %idx.add
+  ret ptr %arrayidx
+}
+
+; idx may be 0 or very negative -> don't preserve inbounds
+define ptr @only_sign_bit_not_clear(ptr %p, i64 %i) {
+; CHECK-LABEL: @only_sign_bit_not_clear(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[IDX:%.*]] = and i64 [[I:%.*]], -9223372036854775808
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[IDX]]
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr i8, ptr [[TMP0]], i64 4
+; CHECK-NEXT:    ret ptr [[ARRAYIDX2]]
+;
+entry:
+  %idx = and i64 %i, u0x8000000000000000
+  %idx.add = add i64 %idx, 1
+  %arrayidx = getelementptr inbounds i32, ptr %p, i64 %idx.add
+  ret ptr %arrayidx
+}
+
+; all indices non-negative -> preserve inbounds
+define ptr @multi_level_nonnegative(ptr %p, i64 %idx1, i64 %idx2) {
+; CHECK-LABEL: @multi_level_nonnegative(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[MASKED_IDX1:%.*]] = and i64 [[IDX1:%.*]], 255
+; CHECK-NEXT:    [[MASKED_IDX2:%.*]] = and i64 [[IDX2:%.*]], 65535
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [10 x [20 x i32]], ptr [[P:%.*]], i64 0, i64 [[MASKED_IDX1]], i64 [[MASKED_IDX2]]
+; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 180
+; CHECK-NEXT:    ret ptr [[ARRAYIDX3]]
+;
+entry:
+  %masked.idx1 = and i64 %idx1, u0xff
+  %masked.idx2 = and i64 %idx2, u0xffff
+  %idx1.add = add i64 %masked.idx1, 2
+  %idx2.add = add i64 %masked.idx2, 5
+  %arrayidx = getelementptr inbounds [10 x [20 x i32]], ptr %p, i64 0, i64 %idx1.add, i64 %idx2.add
+  ret ptr %arrayidx
+}
+
+; It doesn't matter that %idx2.add might be negative, the indices in the resulting GEPs are all non-negative -> preserve inbounds
+define ptr @multi_level_mixed_okay(ptr %p, i64 %idx1, i64 %idx2) {
+; CHECK-LABEL: @multi_level_mixed_okay(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[MASKED_IDX1:%.*]] = and i64 [[IDX1:%.*]], 255
+; CHECK-NEXT:    [[MASKED_IDX2:%.*]] = and i64 [[IDX2:%.*]], 65535
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [10 x [20 x i32]], ptr [[P:%.*]], i64 0, i64 [[MASKED_IDX1]], i64 [[MASKED_IDX2]]
+; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 156
+; CHECK-NEXT:    ret ptr [[ARRAYIDX3]]
+;
+entry:
+  %masked.idx1 = and i64 %idx1, u0xff
+  %masked.idx2 = and i64 %idx2, u0xffff
+  %idx1.add = add i64 %masked.idx1, 2
+  %idx2.add = add i64 %masked.idx2, -1
+  %arrayidx = getelementptr inbounds [10 x [20 x i32]], ptr %p, i64 0, i64 %idx1.add, i64 %idx2.add
+  ret ptr %arrayidx
+}
+
+; One index may be negative -> don't preserve inbounds
+define ptr @multi_level_mixed_not_okay(ptr %p, i64 %idx1, i64 %idx2) {
+; CHECK-LABEL: @multi_level_mixed_not_okay(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[MASKED_IDX1:%.*]] = and i64 [[IDX1:%.*]], -256
+; CHECK-NEXT:    [[MASKED_IDX2:%.*]] = and i64 [[IDX2:%.*]], 65535
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr [10 x [20 x i32]], ptr [[P:%.*]], i64 0, i64 [[MASKED_IDX1]], i64 [[MASKED_IDX2]]
+; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr i8, ptr [[TMP0]], i64 156
+; CHECK-NEXT:    ret ptr [[ARRAYIDX3]]
+;
+entry:
+  %masked.idx1 = and i64 %idx1, u0xffffffffffffff00
+  %masked.idx2 = and i64 %idx2, u0xffff
+  %idx1.add = add i64 %masked.idx1, 2
+  %idx2.add = add i64 %masked.idx2, -1
+  %arrayidx = getelementptr inbounds [10 x [20 x i32]], ptr %p, i64 0, i64 %idx1.add, i64 %idx2.add
+  ret ptr %arrayidx
 }

>From d467f0def6ff9f8c0ab23a80be24271b84f5ed37 Mon Sep 17 00:00:00 2001
From: Fabian Ritter <fabian.ritter at amd.com>
Date: Fri, 21 Mar 2025 09:33:18 -0400
Subject: [PATCH 3/3] Use isKnownNonNegative, add available information to the
 SimplifyQuery

---
 llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
index 7f93115499bc9..54459fd08d94c 100644
--- a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
+++ b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
@@ -1082,7 +1082,10 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
         RecursivelyDeleteTriviallyDeadInstructions(UserChainTail);
         RecursivelyDeleteTriviallyDeadInstructions(OldIdx);
         MayRecoverInbounds =
-            MayRecoverInbounds && computeKnownBits(NewIdx, *DL).isNonNegative();
+            MayRecoverInbounds &&
+            isKnownNonNegative(
+                NewIdx,
+                SimplifyQuery(*DL, TLI, DT, /*AC=*/nullptr, /*CXTI=*/GEP));
       }
     }
   }



More information about the llvm-commits mailing list