[llvm] [InstCombine] Fold `getelementptr inbounds null, idx -> null` (PR #130742)

Yingwei Zheng via llvm-commits llvm-commits at lists.llvm.org
Thu Apr 10 22:02:08 PDT 2025


https://github.com/dtcxzyw updated https://github.com/llvm/llvm-project/pull/130742

>From 22a081220f6857097c49c7f499afae89e3902c23 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Tue, 11 Mar 2025 17:44:06 +0800
Subject: [PATCH 1/2] [InstCombine] Add pre-commit tests. NFC.

---
 .../InstCombine/InstructionCombining.cpp      |  5 +++
 .../Transforms/InstCombine/getelementptr.ll   | 35 +++++++++++++++++++
 llvm/test/Transforms/InstCombine/sub-gep.ll   |  4 +--
 3 files changed, 42 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 856e02c9f1ddb..25117bc14de8a 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -2911,6 +2911,11 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
                           SQ.getWithInstruction(&GEP)))
     return replaceInstUsesWith(GEP, V);
 
+  // // getelementptr inbounds null, idx -> null
+  // if (auto *BaseC = dyn_cast<Constant>(PtrOp))
+  //   if (GEP.isInBounds() && BaseC->isNullValue() && !NullPointerIsDefined(GEP.getFunction(), GEPType->getPointerAddressSpace()))
+  //     return replaceInstUsesWith(GEP, PtrOp);
+
   // For vector geps, use the generic demanded vector support.
   // Skip if GEP return type is scalable. The number of elements is unknown at
   // compile-time.
diff --git a/llvm/test/Transforms/InstCombine/getelementptr.ll b/llvm/test/Transforms/InstCombine/getelementptr.ll
index ec03d9a2dae2b..6755a74940ee2 100644
--- a/llvm/test/Transforms/InstCombine/getelementptr.ll
+++ b/llvm/test/Transforms/InstCombine/getelementptr.ll
@@ -2019,5 +2019,40 @@ define ptr @gep_merge_nusw_const(ptr %p, i64 %idx, i64 %idx2) {
   ret ptr %gep
 }
 
+define <2 x ptr> @gep_inbounds_null_vec(i64 %idx) {
+; CHECK-LABEL: @gep_inbounds_null_vec(
+; CHECK-NEXT:    [[P:%.*]] = getelementptr inbounds i8, <2 x ptr> zeroinitializer, i64 [[IDX:%.*]]
+; CHECK-NEXT:    ret <2 x ptr> [[P]]
+;
+  %p = getelementptr inbounds i8, <2 x ptr> zeroinitializer, i64 %idx
+  ret <2 x ptr> %p
+}
+
+define <2 x ptr> @gep_inbounds_null_vec_broadcast(<2 x i64> %idx) {
+; CHECK-LABEL: @gep_inbounds_null_vec_broadcast(
+; CHECK-NEXT:    [[P:%.*]] = getelementptr inbounds i8, ptr null, <2 x i64> [[IDX:%.*]]
+; CHECK-NEXT:    ret <2 x ptr> [[P]]
+;
+  %p = getelementptr inbounds i8, ptr null, <2 x i64> %idx
+  ret <2 x ptr> %p
+}
+
+define ptr @gep_noinbounds_null(i64 %idx) {
+; CHECK-LABEL: @gep_noinbounds_null(
+; CHECK-NEXT:    [[P:%.*]] = getelementptr i8, ptr null, i64 [[IDX:%.*]]
+; CHECK-NEXT:    ret ptr [[P]]
+;
+  %p = getelementptr i8, ptr null, i64 %idx
+  ret ptr %p
+}
+
+define ptr @gep_inbounds_null_null_is_valid(i64 %idx) null_pointer_is_valid {
+; CHECK-LABEL: @gep_inbounds_null_null_is_valid(
+; CHECK-NEXT:    [[P:%.*]] = getelementptr inbounds i8, ptr null, i64 [[IDX:%.*]]
+; CHECK-NEXT:    ret ptr [[P]]
+;
+  %p = getelementptr inbounds i8, ptr null, i64 %idx
+  ret ptr %p
+}
 
 !0 = !{!"branch_weights", i32 2, i32 10}
diff --git a/llvm/test/Transforms/InstCombine/sub-gep.ll b/llvm/test/Transforms/InstCombine/sub-gep.ll
index 3f8728d3a4381..c86a1a37bd7ad 100644
--- a/llvm/test/Transforms/InstCombine/sub-gep.ll
+++ b/llvm/test/Transforms/InstCombine/sub-gep.ll
@@ -741,7 +741,7 @@ define i64 @nullptrtoint_scalable_c() {
 ; CHECK-NEXT:    ret i64 [[PTR_IDX]]
 ;
 entry:
-  %ptr = getelementptr inbounds <vscale x 4 x i32>, ptr null, i64 8
+  %ptr = getelementptr nusw <vscale x 4 x i32>, ptr null, i64 8
   %ret = ptrtoint ptr %ptr to i64
   ret i64 %ret
 }
@@ -755,7 +755,7 @@ define i64 @nullptrtoint_scalable_x(i64 %x) {
 ; CHECK-NEXT:    ret i64 [[PTR_IDX]]
 ;
 entry:
-  %ptr = getelementptr inbounds <vscale x 4 x i32>, ptr null, i64 %x
+  %ptr = getelementptr nusw <vscale x 4 x i32>, ptr null, i64 %x
   %ret = ptrtoint ptr %ptr to i64
   ret i64 %ret
 }

>From 471662ed2797fc6b3f3a08649095d66e84f150d2 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Fri, 11 Apr 2025 13:01:39 +0800
Subject: [PATCH 2/2] [InstCombine] Fold `getelementptr inbounds null, idx ->
 null`

---
 llvm/lib/Analysis/InstructionSimplify.cpp     |   8 +
 .../InstCombine/InstructionCombining.cpp      |   5 -
 .../CodeGen/AMDGPU/memcpy-crash-issue63986.ll | 193 +++++++++---------
 .../Transforms/InstCombine/getelementptr.ll   |  72 -------
 llvm/test/Transforms/InstCombine/store.ll     |   3 +-
 llvm/test/Transforms/InstSimplify/gep.ll      |  59 ++++++
 6 files changed, 166 insertions(+), 174 deletions(-)

diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp
index 718d272dd0ac7..8ffb471070d91 100644
--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -5042,6 +5042,14 @@ static Value *simplifyGEPInst(Type *SrcTy, Value *Ptr,
   if (Q.isUndefValue(Ptr))
     return UndefValue::get(GEPTy);
 
+  // getelementptr inbounds null, idx -> null
+  if (NW.isInBounds() && Q.IIQ.UseInstrInfo && Q.CxtI) {
+    if (auto *BaseC = dyn_cast<Constant>(Ptr))
+      if (BaseC->isNullValue() &&
+          !NullPointerIsDefined(Q.CxtI->getFunction(), AS))
+        return Constant::getNullValue(GEPTy);
+  }
+
   bool IsScalableVec =
       SrcTy->isScalableTy() || any_of(Indices, [](const Value *V) {
         return isa<ScalableVectorType>(V->getType());
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 25117bc14de8a..856e02c9f1ddb 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -2911,11 +2911,6 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
                           SQ.getWithInstruction(&GEP)))
     return replaceInstUsesWith(GEP, V);
 
-  // // getelementptr inbounds null, idx -> null
-  // if (auto *BaseC = dyn_cast<Constant>(PtrOp))
-  //   if (GEP.isInBounds() && BaseC->isNullValue() && !NullPointerIsDefined(GEP.getFunction(), GEPType->getPointerAddressSpace()))
-  //     return replaceInstUsesWith(GEP, PtrOp);
-
   // For vector geps, use the generic demanded vector support.
   // Skip if GEP return type is scalable. The number of elements is unknown at
   // compile-time.
diff --git a/llvm/test/CodeGen/AMDGPU/memcpy-crash-issue63986.ll b/llvm/test/CodeGen/AMDGPU/memcpy-crash-issue63986.ll
index 29d9164f95d99..4f9440c59a9f4 100644
--- a/llvm/test/CodeGen/AMDGPU/memcpy-crash-issue63986.ll
+++ b/llvm/test/CodeGen/AMDGPU/memcpy-crash-issue63986.ll
@@ -7,138 +7,141 @@ define void @issue63986(i64 %0, i64 %idxprom, ptr inreg %ptr) {
 ; CHECK-LABEL: issue63986:
 ; CHECK:       ; %bb.0: ; %entry
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT:    v_lshlrev_b64 v[4:5], 6, v[2:3]
-; CHECK-NEXT:    v_mov_b32_e32 v6, s17
-; CHECK-NEXT:    v_add_co_u32_e32 v8, vcc, s16, v4
-; CHECK-NEXT:    v_addc_co_u32_e32 v9, vcc, v6, v5, vcc
+; CHECK-NEXT:    v_lshlrev_b64 v[8:9], 6, v[2:3]
+; CHECK-NEXT:    v_mov_b32_e32 v4, s17
+; CHECK-NEXT:    v_add_co_u32_e32 v10, vcc, s16, v8
+; CHECK-NEXT:    v_addc_co_u32_e32 v11, vcc, v4, v9, vcc
+; CHECK-NEXT:  ; %bb.1: ; %entry.loop-memcpy-expansion_crit_edge
+; CHECK-NEXT:    v_mov_b32_e32 v4, 0
+; CHECK-NEXT:    v_mov_b32_e32 v5, 0
+; CHECK-NEXT:    flat_load_dwordx4 v[4:7], v[4:5]
 ; CHECK-NEXT:    s_mov_b64 s[4:5], 0
-; CHECK-NEXT:  .LBB0_1: ; %loop-memcpy-expansion
+; CHECK-NEXT:    s_waitcnt vmcnt(0)
+; CHECK-NEXT:  .LBB0_2: ; %loop-memcpy-expansion
 ; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    v_mov_b32_e32 v7, s5
-; CHECK-NEXT:    v_mov_b32_e32 v6, s4
-; CHECK-NEXT:    flat_load_dwordx4 v[10:13], v[6:7]
-; CHECK-NEXT:    v_add_co_u32_e32 v6, vcc, s4, v8
+; CHECK-NEXT:    v_add_co_u32_e32 v12, vcc, s4, v10
 ; CHECK-NEXT:    s_add_u32 s4, s4, 16
+; CHECK-NEXT:    v_mov_b32_e32 v13, s5
 ; CHECK-NEXT:    s_addc_u32 s5, s5, 0
 ; CHECK-NEXT:    v_cmp_ge_u64_e64 s[6:7], s[4:5], 32
-; CHECK-NEXT:    v_addc_co_u32_e32 v7, vcc, v9, v7, vcc
+; CHECK-NEXT:    v_addc_co_u32_e32 v13, vcc, v11, v13, vcc
 ; CHECK-NEXT:    s_and_b64 vcc, exec, s[6:7]
-; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; CHECK-NEXT:    flat_store_dwordx4 v[6:7], v[10:13]
-; CHECK-NEXT:    s_cbranch_vccz .LBB0_1
-; CHECK-NEXT:  ; %bb.2: ; %loop-memcpy-residual-header
-; CHECK-NEXT:    s_branch .LBB0_4
-; CHECK-NEXT:  ; %bb.3:
-; CHECK-NEXT:    ; implicit-def: $vgpr6_vgpr7
+; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
+; CHECK-NEXT:    flat_store_dwordx4 v[12:13], v[4:7]
+; CHECK-NEXT:    s_cbranch_vccz .LBB0_2
+; CHECK-NEXT:  ; %bb.3: ; %loop-memcpy-residual-header
 ; CHECK-NEXT:    s_branch .LBB0_5
-; CHECK-NEXT:  .LBB0_4: ; %loop-memcpy-residual-header.post-loop-memcpy-expansion_crit_edge
-; CHECK-NEXT:    v_lshlrev_b64 v[6:7], 6, v[2:3]
-; CHECK-NEXT:    s_cbranch_execnz .LBB0_8
-; CHECK-NEXT:  .LBB0_5: ; %loop-memcpy-residual.preheader
-; CHECK-NEXT:    s_add_u32 s4, s16, 32
-; CHECK-NEXT:    s_addc_u32 s5, s17, 0
-; CHECK-NEXT:    v_mov_b32_e32 v3, s5
-; CHECK-NEXT:    v_add_co_u32_e32 v2, vcc, s4, v4
-; CHECK-NEXT:    v_addc_co_u32_e32 v3, vcc, v3, v5, vcc
+; CHECK-NEXT:  ; %bb.4:
+; CHECK-NEXT:    ; implicit-def: $vgpr2_vgpr3
+; CHECK-NEXT:    s_branch .LBB0_6
+; CHECK-NEXT:  .LBB0_5: ; %loop-memcpy-residual-header.post-loop-memcpy-expansion_crit_edge
+; CHECK-NEXT:    v_lshlrev_b64 v[2:3], 6, v[2:3]
+; CHECK-NEXT:    s_cbranch_execnz .LBB0_9
+; CHECK-NEXT:  .LBB0_6: ; %loop-memcpy-residual-header.loop-memcpy-residual_crit_edge
+; CHECK-NEXT:    v_mov_b32_e32 v2, 0
+; CHECK-NEXT:    v_mov_b32_e32 v3, 0
+; CHECK-NEXT:    flat_load_ubyte v2, v[2:3]
+; CHECK-NEXT:    s_add_u32 s6, s16, 32
+; CHECK-NEXT:    s_addc_u32 s4, s17, 0
+; CHECK-NEXT:    v_mov_b32_e32 v4, s4
+; CHECK-NEXT:    v_add_co_u32_e32 v3, vcc, s6, v8
 ; CHECK-NEXT:    s_mov_b64 s[4:5], 0
-; CHECK-NEXT:  ; %bb.6: ; %loop-memcpy-residual
-; CHECK-NEXT:    s_add_u32 s6, 32, s4
-; CHECK-NEXT:    s_addc_u32 s7, 0, s5
-; CHECK-NEXT:    v_mov_b32_e32 v6, s6
-; CHECK-NEXT:    v_mov_b32_e32 v7, s7
-; CHECK-NEXT:    flat_load_ubyte v10, v[6:7]
-; CHECK-NEXT:    v_mov_b32_e32 v7, s5
-; CHECK-NEXT:    v_add_co_u32_e32 v6, vcc, s4, v2
-; CHECK-NEXT:    v_addc_co_u32_e32 v7, vcc, v3, v7, vcc
+; CHECK-NEXT:    v_addc_co_u32_e32 v4, vcc, v4, v9, vcc
+; CHECK-NEXT:    s_waitcnt vmcnt(0)
+; CHECK-NEXT:  ; %bb.7: ; %loop-memcpy-residual
+; CHECK-NEXT:    v_mov_b32_e32 v6, s5
+; CHECK-NEXT:    v_add_co_u32_e32 v5, vcc, s4, v3
 ; CHECK-NEXT:    s_add_u32 s4, s4, 1
+; CHECK-NEXT:    v_addc_co_u32_e32 v6, vcc, v4, v6, vcc
 ; CHECK-NEXT:    s_addc_u32 s5, s5, 0
-; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; CHECK-NEXT:    flat_store_byte v[6:7], v10
-; CHECK-NEXT:  ; %bb.7:
-; CHECK-NEXT:    v_mov_b32_e32 v7, v5
-; CHECK-NEXT:    v_mov_b32_e32 v6, v4
-; CHECK-NEXT:  .LBB0_8: ; %post-loop-memcpy-expansion
-; CHECK-NEXT:    v_and_b32_e32 v2, 15, v0
+; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
+; CHECK-NEXT:    flat_store_byte v[5:6], v2
+; CHECK-NEXT:  ; %bb.8:
+; CHECK-NEXT:    v_mov_b32_e32 v2, v8
+; CHECK-NEXT:    v_mov_b32_e32 v3, v9
+; CHECK-NEXT:  .LBB0_9: ; %post-loop-memcpy-expansion
+; CHECK-NEXT:    v_and_b32_e32 v6, 15, v0
 ; CHECK-NEXT:    v_and_b32_e32 v0, -16, v0
-; CHECK-NEXT:    v_add_co_u32_e32 v4, vcc, v6, v0
-; CHECK-NEXT:    v_mov_b32_e32 v3, 0
-; CHECK-NEXT:    v_addc_co_u32_e32 v5, vcc, v7, v1, vcc
+; CHECK-NEXT:    v_add_co_u32_e32 v2, vcc, v2, v0
+; CHECK-NEXT:    v_mov_b32_e32 v7, 0
+; CHECK-NEXT:    v_addc_co_u32_e32 v3, vcc, v3, v1, vcc
 ; CHECK-NEXT:    v_cmp_ne_u64_e64 s[4:5], 0, v[0:1]
-; CHECK-NEXT:    v_cmp_ne_u64_e64 s[6:7], 0, v[2:3]
-; CHECK-NEXT:    v_mov_b32_e32 v6, s17
-; CHECK-NEXT:    v_add_co_u32_e32 v4, vcc, s16, v4
-; CHECK-NEXT:    v_addc_co_u32_e32 v5, vcc, v6, v5, vcc
-; CHECK-NEXT:    s_branch .LBB0_11
-; CHECK-NEXT:  .LBB0_9: ; %Flow14
-; CHECK-NEXT:    ; in Loop: Header=BB0_11 Depth=1
+; CHECK-NEXT:    v_cmp_ne_u64_e64 s[6:7], 0, v[6:7]
+; CHECK-NEXT:    v_mov_b32_e32 v4, s17
+; CHECK-NEXT:    v_mov_b32_e32 v8, 0
+; CHECK-NEXT:    v_add_co_u32_e32 v12, vcc, s16, v2
+; CHECK-NEXT:    v_mov_b32_e32 v9, 0
+; CHECK-NEXT:    v_addc_co_u32_e32 v13, vcc, v4, v3, vcc
+; CHECK-NEXT:    s_branch .LBB0_12
+; CHECK-NEXT:  .LBB0_10: ; %Flow14
+; CHECK-NEXT:    ; in Loop: Header=BB0_12 Depth=1
 ; CHECK-NEXT:    s_or_b64 exec, exec, s[10:11]
 ; CHECK-NEXT:    s_mov_b64 s[8:9], 0
-; CHECK-NEXT:  .LBB0_10: ; %Flow16
-; CHECK-NEXT:    ; in Loop: Header=BB0_11 Depth=1
+; CHECK-NEXT:  .LBB0_11: ; %Flow16
+; CHECK-NEXT:    ; in Loop: Header=BB0_12 Depth=1
 ; CHECK-NEXT:    s_andn2_b64 vcc, exec, s[8:9]
-; CHECK-NEXT:    s_cbranch_vccz .LBB0_19
-; CHECK-NEXT:  .LBB0_11: ; %while.cond
+; CHECK-NEXT:    s_cbranch_vccz .LBB0_20
+; CHECK-NEXT:  .LBB0_12: ; %while.cond
 ; CHECK-NEXT:    ; =>This Loop Header: Depth=1
-; CHECK-NEXT:    ; Child Loop BB0_13 Depth 2
-; CHECK-NEXT:    ; Child Loop BB0_17 Depth 2
+; CHECK-NEXT:    ; Child Loop BB0_14 Depth 2
+; CHECK-NEXT:    ; Child Loop BB0_18 Depth 2
 ; CHECK-NEXT:    s_and_saveexec_b64 s[8:9], s[4:5]
-; CHECK-NEXT:    s_cbranch_execz .LBB0_14
-; CHECK-NEXT:  ; %bb.12: ; %loop-memcpy-expansion2.preheader
-; CHECK-NEXT:    ; in Loop: Header=BB0_11 Depth=1
+; CHECK-NEXT:    s_cbranch_execz .LBB0_15
+; CHECK-NEXT:  ; %bb.13: ; %while.cond.loop-memcpy-expansion2_crit_edge
+; CHECK-NEXT:    ; in Loop: Header=BB0_12 Depth=1
+; CHECK-NEXT:    flat_load_dwordx4 v[2:5], v[8:9]
 ; CHECK-NEXT:    s_mov_b64 s[10:11], 0
 ; CHECK-NEXT:    s_mov_b64 s[12:13], 0
-; CHECK-NEXT:  .LBB0_13: ; %loop-memcpy-expansion2
-; CHECK-NEXT:    ; Parent Loop BB0_11 Depth=1
+; CHECK-NEXT:    s_waitcnt vmcnt(0)
+; CHECK-NEXT:  .LBB0_14: ; %loop-memcpy-expansion2
+; CHECK-NEXT:    ; Parent Loop BB0_12 Depth=1
 ; CHECK-NEXT:    ; => This Inner Loop Header: Depth=2
-; CHECK-NEXT:    v_mov_b32_e32 v6, s12
-; CHECK-NEXT:    v_mov_b32_e32 v7, s13
-; CHECK-NEXT:    flat_load_dwordx4 v[10:13], v[6:7]
-; CHECK-NEXT:    v_add_co_u32_e32 v6, vcc, s12, v8
+; CHECK-NEXT:    v_mov_b32_e32 v15, s13
+; CHECK-NEXT:    v_add_co_u32_e32 v14, vcc, s12, v10
 ; CHECK-NEXT:    s_add_u32 s12, s12, 16
-; CHECK-NEXT:    v_addc_co_u32_e32 v7, vcc, v9, v7, vcc
+; CHECK-NEXT:    v_addc_co_u32_e32 v15, vcc, v11, v15, vcc
 ; CHECK-NEXT:    s_addc_u32 s13, s13, 0
 ; CHECK-NEXT:    v_cmp_ge_u64_e32 vcc, s[12:13], v[0:1]
+; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
+; CHECK-NEXT:    flat_store_dwordx4 v[14:15], v[2:5]
 ; CHECK-NEXT:    s_or_b64 s[10:11], vcc, s[10:11]
-; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; CHECK-NEXT:    flat_store_dwordx4 v[6:7], v[10:13]
 ; CHECK-NEXT:    s_andn2_b64 exec, exec, s[10:11]
-; CHECK-NEXT:    s_cbranch_execnz .LBB0_13
-; CHECK-NEXT:  .LBB0_14: ; %Flow15
-; CHECK-NEXT:    ; in Loop: Header=BB0_11 Depth=1
+; CHECK-NEXT:    s_cbranch_execnz .LBB0_14
+; CHECK-NEXT:  .LBB0_15: ; %Flow15
+; CHECK-NEXT:    ; in Loop: Header=BB0_12 Depth=1
 ; CHECK-NEXT:    s_or_b64 exec, exec, s[8:9]
 ; CHECK-NEXT:    s_mov_b64 s[8:9], -1
-; CHECK-NEXT:    s_cbranch_execz .LBB0_10
-; CHECK-NEXT:  ; %bb.15: ; %loop-memcpy-residual-header5
-; CHECK-NEXT:    ; in Loop: Header=BB0_11 Depth=1
+; CHECK-NEXT:    s_cbranch_execz .LBB0_11
+; CHECK-NEXT:  ; %bb.16: ; %loop-memcpy-residual-header5
+; CHECK-NEXT:    ; in Loop: Header=BB0_12 Depth=1
 ; CHECK-NEXT:    s_and_saveexec_b64 s[8:9], s[6:7]
 ; CHECK-NEXT:    s_xor_b64 s[10:11], exec, s[8:9]
-; CHECK-NEXT:    s_cbranch_execz .LBB0_9
-; CHECK-NEXT:  ; %bb.16: ; %loop-memcpy-residual4.preheader
-; CHECK-NEXT:    ; in Loop: Header=BB0_11 Depth=1
+; CHECK-NEXT:    s_cbranch_execz .LBB0_10
+; CHECK-NEXT:  ; %bb.17: ; %loop-memcpy-residual-header5.loop-memcpy-residual4_crit_edge
+; CHECK-NEXT:    ; in Loop: Header=BB0_12 Depth=1
+; CHECK-NEXT:    flat_load_ubyte v2, v[8:9]
 ; CHECK-NEXT:    s_mov_b64 s[12:13], 0
 ; CHECK-NEXT:    s_mov_b64 s[14:15], 0
-; CHECK-NEXT:  .LBB0_17: ; %loop-memcpy-residual4
-; CHECK-NEXT:    ; Parent Loop BB0_11 Depth=1
+; CHECK-NEXT:    s_waitcnt vmcnt(0)
+; CHECK-NEXT:  .LBB0_18: ; %loop-memcpy-residual4
+; CHECK-NEXT:    ; Parent Loop BB0_12 Depth=1
 ; CHECK-NEXT:    ; => This Inner Loop Header: Depth=2
-; CHECK-NEXT:    v_mov_b32_e32 v10, s15
-; CHECK-NEXT:    v_add_co_u32_e32 v6, vcc, s14, v0
-; CHECK-NEXT:    v_addc_co_u32_e32 v7, vcc, v1, v10, vcc
-; CHECK-NEXT:    flat_load_ubyte v11, v[6:7]
-; CHECK-NEXT:    v_add_co_u32_e32 v6, vcc, s14, v4
+; CHECK-NEXT:    v_add_co_u32_e32 v3, vcc, s14, v12
 ; CHECK-NEXT:    s_add_u32 s14, s14, 1
+; CHECK-NEXT:    v_mov_b32_e32 v4, s15
 ; CHECK-NEXT:    s_addc_u32 s15, s15, 0
-; CHECK-NEXT:    v_cmp_ge_u64_e64 s[8:9], s[14:15], v[2:3]
-; CHECK-NEXT:    v_addc_co_u32_e32 v7, vcc, v5, v10, vcc
+; CHECK-NEXT:    v_cmp_ge_u64_e64 s[8:9], s[14:15], v[6:7]
+; CHECK-NEXT:    v_addc_co_u32_e32 v4, vcc, v13, v4, vcc
 ; CHECK-NEXT:    s_or_b64 s[12:13], s[8:9], s[12:13]
-; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; CHECK-NEXT:    flat_store_byte v[6:7], v11
+; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
+; CHECK-NEXT:    flat_store_byte v[3:4], v2
 ; CHECK-NEXT:    s_andn2_b64 exec, exec, s[12:13]
-; CHECK-NEXT:    s_cbranch_execnz .LBB0_17
-; CHECK-NEXT:  ; %bb.18: ; %Flow
-; CHECK-NEXT:    ; in Loop: Header=BB0_11 Depth=1
+; CHECK-NEXT:    s_cbranch_execnz .LBB0_18
+; CHECK-NEXT:  ; %bb.19: ; %Flow
+; CHECK-NEXT:    ; in Loop: Header=BB0_12 Depth=1
 ; CHECK-NEXT:    s_or_b64 exec, exec, s[12:13]
-; CHECK-NEXT:    s_branch .LBB0_9
-; CHECK-NEXT:  .LBB0_19: ; %DummyReturnBlock
+; CHECK-NEXT:    s_branch .LBB0_10
+; CHECK-NEXT:  .LBB0_20: ; %DummyReturnBlock
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; CHECK-NEXT:    s_setpc_b64 s[30:31]
 entry:
diff --git a/llvm/test/Transforms/InstCombine/getelementptr.ll b/llvm/test/Transforms/InstCombine/getelementptr.ll
index 6755a74940ee2..feba952919b9a 100644
--- a/llvm/test/Transforms/InstCombine/getelementptr.ll
+++ b/llvm/test/Transforms/InstCombine/getelementptr.ll
@@ -1326,42 +1326,6 @@ define ptr @PR45084_extra_use(i1 %cond, ptr %p) {
   ret ptr %sel
 }
 
-define ptr @gep_null_inbounds(i64 %idx) {
-; CHECK-LABEL: @gep_null_inbounds(
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr null, i64 [[IDX:%.*]]
-; CHECK-NEXT:    ret ptr [[GEP]]
-;
-  %gep = getelementptr inbounds i8, ptr null, i64 %idx
-  ret ptr %gep
-}
-
-define ptr @gep_null_not_inbounds(i64 %idx) {
-; CHECK-LABEL: @gep_null_not_inbounds(
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i8, ptr null, i64 [[IDX:%.*]]
-; CHECK-NEXT:    ret ptr [[GEP]]
-;
-  %gep = getelementptr i8, ptr null, i64 %idx
-  ret ptr %gep
-}
-
-define ptr @gep_null_defined(i64 %idx) null_pointer_is_valid {
-; CHECK-LABEL: @gep_null_defined(
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr null, i64 [[IDX:%.*]]
-; CHECK-NEXT:    ret ptr [[GEP]]
-;
-  %gep = getelementptr inbounds i8, ptr null, i64 %idx
-  ret ptr %gep
-}
-
-define ptr @gep_null_inbounds_different_type(i64 %idx1, i64 %idx2) {
-; CHECK-LABEL: @gep_null_inbounds_different_type(
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds [0 x i8], ptr null, i64 0, i64 [[IDX2:%.*]]
-; CHECK-NEXT:    ret ptr [[GEP]]
-;
-  %gep = getelementptr inbounds [0 x i8], ptr null, i64 %idx1, i64 %idx2
-  ret ptr %gep
-}
-
 define ptr @D98588(ptr %c1, i64 %offset) {
 ; CHECK-LABEL: @D98588(
 ; CHECK-NEXT:    [[C2_NEXT_IDX:%.*]] = shl nsw i64 [[OFFSET:%.*]], 3
@@ -2019,40 +1983,4 @@ define ptr @gep_merge_nusw_const(ptr %p, i64 %idx, i64 %idx2) {
   ret ptr %gep
 }
 
-define <2 x ptr> @gep_inbounds_null_vec(i64 %idx) {
-; CHECK-LABEL: @gep_inbounds_null_vec(
-; CHECK-NEXT:    [[P:%.*]] = getelementptr inbounds i8, <2 x ptr> zeroinitializer, i64 [[IDX:%.*]]
-; CHECK-NEXT:    ret <2 x ptr> [[P]]
-;
-  %p = getelementptr inbounds i8, <2 x ptr> zeroinitializer, i64 %idx
-  ret <2 x ptr> %p
-}
-
-define <2 x ptr> @gep_inbounds_null_vec_broadcast(<2 x i64> %idx) {
-; CHECK-LABEL: @gep_inbounds_null_vec_broadcast(
-; CHECK-NEXT:    [[P:%.*]] = getelementptr inbounds i8, ptr null, <2 x i64> [[IDX:%.*]]
-; CHECK-NEXT:    ret <2 x ptr> [[P]]
-;
-  %p = getelementptr inbounds i8, ptr null, <2 x i64> %idx
-  ret <2 x ptr> %p
-}
-
-define ptr @gep_noinbounds_null(i64 %idx) {
-; CHECK-LABEL: @gep_noinbounds_null(
-; CHECK-NEXT:    [[P:%.*]] = getelementptr i8, ptr null, i64 [[IDX:%.*]]
-; CHECK-NEXT:    ret ptr [[P]]
-;
-  %p = getelementptr i8, ptr null, i64 %idx
-  ret ptr %p
-}
-
-define ptr @gep_inbounds_null_null_is_valid(i64 %idx) null_pointer_is_valid {
-; CHECK-LABEL: @gep_inbounds_null_null_is_valid(
-; CHECK-NEXT:    [[P:%.*]] = getelementptr inbounds i8, ptr null, i64 [[IDX:%.*]]
-; CHECK-NEXT:    ret ptr [[P]]
-;
-  %p = getelementptr inbounds i8, ptr null, i64 %idx
-  ret ptr %p
-}
-
 !0 = !{!"branch_weights", i32 2, i32 10}
diff --git a/llvm/test/Transforms/InstCombine/store.ll b/llvm/test/Transforms/InstCombine/store.ll
index daa40da1828b5..48c63c6f24c72 100644
--- a/llvm/test/Transforms/InstCombine/store.ll
+++ b/llvm/test/Transforms/InstCombine/store.ll
@@ -49,8 +49,7 @@ define void @test2(ptr %P) {
 
 define void @store_at_gep_off_null_inbounds(i64 %offset) {
 ; CHECK-LABEL: @store_at_gep_off_null_inbounds(
-; CHECK-NEXT:    [[PTR:%.*]] = getelementptr inbounds i32, ptr null, i64 [[OFFSET:%.*]]
-; CHECK-NEXT:    store i32 poison, ptr [[PTR]], align 4
+; CHECK-NEXT:    store i32 poison, ptr null, align 4
 ; CHECK-NEXT:    ret void
 ;
   %ptr = getelementptr inbounds i32, ptr null, i64 %offset
diff --git a/llvm/test/Transforms/InstSimplify/gep.ll b/llvm/test/Transforms/InstSimplify/gep.ll
index a330f5cbc9268..a73c902fac647 100644
--- a/llvm/test/Transforms/InstSimplify/gep.ll
+++ b/llvm/test/Transforms/InstSimplify/gep.ll
@@ -386,3 +386,62 @@ define i64 @gep_array_of_scalable_vectors_ptrdiff(ptr %ptr) {
   %diff = sub i64 %c2.int, %c1.int
   ret i64 %diff
 }
+
+define ptr @gep_null_inbounds(i64 %idx) {
+; CHECK-LABEL: @gep_null_inbounds(
+; CHECK-NEXT:    ret ptr null
+;
+  %gep = getelementptr inbounds i8, ptr null, i64 %idx
+  ret ptr %gep
+}
+
+define ptr @gep_null_not_inbounds(i64 %idx) {
+; CHECK-LABEL: @gep_null_not_inbounds(
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i8, ptr null, i64 [[IDX:%.*]]
+; CHECK-NEXT:    ret ptr [[GEP]]
+;
+  %gep = getelementptr i8, ptr null, i64 %idx
+  ret ptr %gep
+}
+
+define ptr @gep_null_defined(i64 %idx) null_pointer_is_valid {
+; CHECK-LABEL: @gep_null_defined(
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr null, i64 [[IDX:%.*]]
+; CHECK-NEXT:    ret ptr [[GEP]]
+;
+  %gep = getelementptr inbounds i8, ptr null, i64 %idx
+  ret ptr %gep
+}
+
+define ptr @gep_null_inbounds_different_type(i64 %idx1, i64 %idx2) {
+; CHECK-LABEL: @gep_null_inbounds_different_type(
+; CHECK-NEXT:    ret ptr null
+;
+  %gep = getelementptr inbounds [0 x i8], ptr null, i64 %idx1, i64 %idx2
+  ret ptr %gep
+}
+
+define <2 x ptr> @gep_inbounds_null_vec(i64 %idx) {
+; CHECK-LABEL: @gep_inbounds_null_vec(
+; CHECK-NEXT:    ret <2 x ptr> zeroinitializer
+;
+  %p = getelementptr inbounds i8, <2 x ptr> zeroinitializer, i64 %idx
+  ret <2 x ptr> %p
+}
+
+define <2 x ptr> @gep_inbounds_null_vec_broadcast(<2 x i64> %idx) {
+; CHECK-LABEL: @gep_inbounds_null_vec_broadcast(
+; CHECK-NEXT:    ret <2 x ptr> zeroinitializer
+;
+  %p = getelementptr inbounds i8, ptr null, <2 x i64> %idx
+  ret <2 x ptr> %p
+}
+
+define ptr @gep_noinbounds_null(i64 %idx) {
+; CHECK-LABEL: @gep_noinbounds_null(
+; CHECK-NEXT:    [[P:%.*]] = getelementptr i8, ptr null, i64 [[IDX:%.*]]
+; CHECK-NEXT:    ret ptr [[P]]
+;
+  %p = getelementptr i8, ptr null, i64 %idx
+  ret ptr %p
+}



More information about the llvm-commits mailing list