[llvm] [InstCombine] Fold `getelementptr inbounds null, idx -> null` (PR #130742)

Thu Apr 10 19:53:46 PDT 2025

https://github.com/dtcxzyw updated https://github.com/llvm/llvm-project/pull/130742

>From c2501db1fed96605ce2ce67200ec3463547ee603 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Tue, 11 Mar 2025 17:44:06 +0800
Subject: [PATCH 1/4] [InstCombine] Add pre-commit tests. NFC.

---
 .../InstCombine/InstructionCombining.cpp      |  5 +++
 .../Transforms/InstCombine/getelementptr.ll   | 35 +++++++++++++++++++
 llvm/test/Transforms/InstCombine/sub-gep.ll   |  4 +--
 3 files changed, 42 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 856e02c9f1ddb..25117bc14de8a 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -2911,6 +2911,11 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
                           SQ.getWithInstruction(&GEP)))
     return replaceInstUsesWith(GEP, V);
 
+  // // getelementptr inbounds null, idx -> null
+  // if (auto *BaseC = dyn_cast<Constant>(PtrOp))
+  //   if (GEP.isInBounds() && BaseC->isNullValue() && !NullPointerIsDefined(GEP.getFunction(), GEPType->getPointerAddressSpace()))
+  //     return replaceInstUsesWith(GEP, PtrOp);
+
   // For vector geps, use the generic demanded vector support.
   // Skip if GEP return type is scalable. The number of elements is unknown at
   // compile-time.
diff --git a/llvm/test/Transforms/InstCombine/getelementptr.ll b/llvm/test/Transforms/InstCombine/getelementptr.ll
index ec03d9a2dae2b..6755a74940ee2 100644
--- a/llvm/test/Transforms/InstCombine/getelementptr.ll
+++ b/llvm/test/Transforms/InstCombine/getelementptr.ll
@@ -2019,5 +2019,40 @@ define ptr @gep_merge_nusw_const(ptr %p, i64 %idx, i64 %idx2) {
   ret ptr %gep
 }
 
+define <2 x ptr> @gep_inbounds_null_vec(i64 %idx) {
+; CHECK-LABEL: @gep_inbounds_null_vec(
+; CHECK-NEXT:    [[P:%.*]] = getelementptr inbounds i8, <2 x ptr> zeroinitializer, i64 [[IDX:%.*]]
+; CHECK-NEXT:    ret <2 x ptr> [[P]]
+;
+  %p = getelementptr inbounds i8, <2 x ptr> zeroinitializer, i64 %idx
+  ret <2 x ptr> %p
+}
+
+define <2 x ptr> @gep_inbounds_null_vec_broadcast(<2 x i64> %idx) {
+; CHECK-LABEL: @gep_inbounds_null_vec_broadcast(
+; CHECK-NEXT:    [[P:%.*]] = getelementptr inbounds i8, ptr null, <2 x i64> [[IDX:%.*]]
+; CHECK-NEXT:    ret <2 x ptr> [[P]]
+;
+  %p = getelementptr inbounds i8, ptr null, <2 x i64> %idx
+  ret <2 x ptr> %p
+}
+
+define ptr @gep_noinbounds_null(i64 %idx) {
+; CHECK-LABEL: @gep_noinbounds_null(
+; CHECK-NEXT:    [[P:%.*]] = getelementptr i8, ptr null, i64 [[IDX:%.*]]
+; CHECK-NEXT:    ret ptr [[P]]
+;
+  %p = getelementptr i8, ptr null, i64 %idx
+  ret ptr %p
+}
+
+define ptr @gep_inbounds_null_null_is_valid(i64 %idx) null_pointer_is_valid {
+; CHECK-LABEL: @gep_inbounds_null_null_is_valid(
+; CHECK-NEXT:    [[P:%.*]] = getelementptr inbounds i8, ptr null, i64 [[IDX:%.*]]
+; CHECK-NEXT:    ret ptr [[P]]
+;
+  %p = getelementptr inbounds i8, ptr null, i64 %idx
+  ret ptr %p
+}
 
 !0 = !{!"branch_weights", i32 2, i32 10}
diff --git a/llvm/test/Transforms/InstCombine/sub-gep.ll b/llvm/test/Transforms/InstCombine/sub-gep.ll
index 3f8728d3a4381..c86a1a37bd7ad 100644
--- a/llvm/test/Transforms/InstCombine/sub-gep.ll
+++ b/llvm/test/Transforms/InstCombine/sub-gep.ll
@@ -741,7 +741,7 @@ define i64 @nullptrtoint_scalable_c() {
 ; CHECK-NEXT:    ret i64 [[PTR_IDX]]
 ;
 entry:
-  %ptr = getelementptr inbounds <vscale x 4 x i32>, ptr null, i64 8
+  %ptr = getelementptr nusw <vscale x 4 x i32>, ptr null, i64 8
   %ret = ptrtoint ptr %ptr to i64
   ret i64 %ret
 }
@@ -755,7 +755,7 @@ define i64 @nullptrtoint_scalable_x(i64 %x) {
 ; CHECK-NEXT:    ret i64 [[PTR_IDX]]
 ;
 entry:
-  %ptr = getelementptr inbounds <vscale x 4 x i32>, ptr null, i64 %x
+  %ptr = getelementptr nusw <vscale x 4 x i32>, ptr null, i64 %x
   %ret = ptrtoint ptr %ptr to i64
   ret i64 %ret
 }

>From bbc2574d4e0470370e1db3a7edb7f01982a9f0fb Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Tue, 11 Mar 2025 18:09:06 +0800
Subject: [PATCH 2/4] [InstCombine] Fold `getelementptr inbounds null, idx ->
 null`

---
 .../Transforms/InstCombine/InstructionCombining.cpp  | 10 ++++++----
 llvm/test/Transforms/InstCombine/getelementptr.ll    | 12 ++++--------
 llvm/test/Transforms/InstCombine/store.ll            |  3 +--
 3 files changed, 11 insertions(+), 14 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 25117bc14de8a..a583d58d7f88d 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -2911,10 +2911,12 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
                           SQ.getWithInstruction(&GEP)))
     return replaceInstUsesWith(GEP, V);
 
-  // // getelementptr inbounds null, idx -> null
-  // if (auto *BaseC = dyn_cast<Constant>(PtrOp))
-  //   if (GEP.isInBounds() && BaseC->isNullValue() && !NullPointerIsDefined(GEP.getFunction(), GEPType->getPointerAddressSpace()))
-  //     return replaceInstUsesWith(GEP, PtrOp);
+  // getelementptr inbounds null, idx -> null
+  if (auto *BaseC = dyn_cast<Constant>(PtrOp))
+    if (GEP.isInBounds() && BaseC->isNullValue() &&
+        !NullPointerIsDefined(GEP.getFunction(),
+                              GEPType->getPointerAddressSpace()))
+      return replaceInstUsesWith(GEP, Constant::getNullValue(GEPType));
 
   // For vector geps, use the generic demanded vector support.
   // Skip if GEP return type is scalable. The number of elements is unknown at
diff --git a/llvm/test/Transforms/InstCombine/getelementptr.ll b/llvm/test/Transforms/InstCombine/getelementptr.ll
index 6755a74940ee2..c1bd6806eae86 100644
--- a/llvm/test/Transforms/InstCombine/getelementptr.ll
+++ b/llvm/test/Transforms/InstCombine/getelementptr.ll
@@ -1328,8 +1328,7 @@ define ptr @PR45084_extra_use(i1 %cond, ptr %p) {
 
 define ptr @gep_null_inbounds(i64 %idx) {
 ; CHECK-LABEL: @gep_null_inbounds(
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr null, i64 [[IDX:%.*]]
-; CHECK-NEXT:    ret ptr [[GEP]]
+; CHECK-NEXT:    ret ptr null
 ;
   %gep = getelementptr inbounds i8, ptr null, i64 %idx
   ret ptr %gep
@@ -1355,8 +1354,7 @@ define ptr @gep_null_defined(i64 %idx) null_pointer_is_valid {
 
 define ptr @gep_null_inbounds_different_type(i64 %idx1, i64 %idx2) {
 ; CHECK-LABEL: @gep_null_inbounds_different_type(
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds [0 x i8], ptr null, i64 0, i64 [[IDX2:%.*]]
-; CHECK-NEXT:    ret ptr [[GEP]]
+; CHECK-NEXT:    ret ptr null
 ;
   %gep = getelementptr inbounds [0 x i8], ptr null, i64 %idx1, i64 %idx2
   ret ptr %gep
@@ -2021,8 +2019,7 @@ define ptr @gep_merge_nusw_const(ptr %p, i64 %idx, i64 %idx2) {
 
 define <2 x ptr> @gep_inbounds_null_vec(i64 %idx) {
 ; CHECK-LABEL: @gep_inbounds_null_vec(
-; CHECK-NEXT:    [[P:%.*]] = getelementptr inbounds i8, <2 x ptr> zeroinitializer, i64 [[IDX:%.*]]
-; CHECK-NEXT:    ret <2 x ptr> [[P]]
+; CHECK-NEXT:    ret <2 x ptr> zeroinitializer
 ;
   %p = getelementptr inbounds i8, <2 x ptr> zeroinitializer, i64 %idx
   ret <2 x ptr> %p
@@ -2030,8 +2027,7 @@ define <2 x ptr> @gep_inbounds_null_vec(i64 %idx) {
 
 define <2 x ptr> @gep_inbounds_null_vec_broadcast(<2 x i64> %idx) {
 ; CHECK-LABEL: @gep_inbounds_null_vec_broadcast(
-; CHECK-NEXT:    [[P:%.*]] = getelementptr inbounds i8, ptr null, <2 x i64> [[IDX:%.*]]
-; CHECK-NEXT:    ret <2 x ptr> [[P]]
+; CHECK-NEXT:    ret <2 x ptr> zeroinitializer
 ;
   %p = getelementptr inbounds i8, ptr null, <2 x i64> %idx
   ret <2 x ptr> %p
diff --git a/llvm/test/Transforms/InstCombine/store.ll b/llvm/test/Transforms/InstCombine/store.ll
index daa40da1828b5..48c63c6f24c72 100644
--- a/llvm/test/Transforms/InstCombine/store.ll
+++ b/llvm/test/Transforms/InstCombine/store.ll
@@ -49,8 +49,7 @@ define void @test2(ptr %P) {
 
 define void @store_at_gep_off_null_inbounds(i64 %offset) {
 ; CHECK-LABEL: @store_at_gep_off_null_inbounds(
-; CHECK-NEXT:    [[PTR:%.*]] = getelementptr inbounds i32, ptr null, i64 [[OFFSET:%.*]]
-; CHECK-NEXT:    store i32 poison, ptr [[PTR]], align 4
+; CHECK-NEXT:    store i32 poison, ptr null, align 4
 ; CHECK-NEXT:    ret void
 ;
   %ptr = getelementptr inbounds i32, ptr null, i64 %offset

>From 42f7808286735a05a086e15f9072aa84f50330aa Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Thu, 13 Mar 2025 01:03:46 +0800
Subject: [PATCH 3/4] [InstSimplify] Move fold into InstSimplify

---
 llvm/lib/Analysis/InstructionSimplify.cpp     |  8 +++
 .../InstCombine/InstructionCombining.cpp      |  7 --
 .../MemoryDependenceAnalysis/InvariantLoad.ll |  2 +-
 .../ValueTracking/gep-negative-issue.ll       |  2 +-
 llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll    |  2 +-
 .../CodeGen/AMDGPU/memcpy-crash-issue63986.ll |  2 +-
 ...ector-constainsundef-crash-inseltpoison.ll |  2 +-
 .../constexpr-vector-constainsundef-crash.ll  |  2 +-
 .../Transforms/InstCombine/getelementptr.ll   | 68 -------------------
 .../InstSimplify/ConstProp/cast-vector.ll     |  4 +-
 llvm/test/Transforms/InstSimplify/gep.ll      | 59 ++++++++++++++++
 11 files changed, 75 insertions(+), 83 deletions(-)

diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp
index 718d272dd0ac7..8ffb471070d91 100644
--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -5042,6 +5042,14 @@ static Value *simplifyGEPInst(Type *SrcTy, Value *Ptr,
   if (Q.isUndefValue(Ptr))
     return UndefValue::get(GEPTy);
 
+  // getelementptr inbounds null, idx -> null
+  if (NW.isInBounds() && Q.IIQ.UseInstrInfo && Q.CxtI) {
+    if (auto *BaseC = dyn_cast<Constant>(Ptr))
+      if (BaseC->isNullValue() &&
+          !NullPointerIsDefined(Q.CxtI->getFunction(), AS))
+        return Constant::getNullValue(GEPTy);
+  }
+
   bool IsScalableVec =
       SrcTy->isScalableTy() || any_of(Indices, [](const Value *V) {
         return isa<ScalableVectorType>(V->getType());
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index a583d58d7f88d..856e02c9f1ddb 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -2911,13 +2911,6 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
                           SQ.getWithInstruction(&GEP)))
     return replaceInstUsesWith(GEP, V);
 
-  // getelementptr inbounds null, idx -> null
-  if (auto *BaseC = dyn_cast<Constant>(PtrOp))
-    if (GEP.isInBounds() && BaseC->isNullValue() &&
-        !NullPointerIsDefined(GEP.getFunction(),
-                              GEPType->getPointerAddressSpace()))
-      return replaceInstUsesWith(GEP, Constant::getNullValue(GEPType));
-
   // For vector geps, use the generic demanded vector support.
   // Skip if GEP return type is scalable. The number of elements is unknown at
   // compile-time.
diff --git a/llvm/test/Analysis/MemoryDependenceAnalysis/InvariantLoad.ll b/llvm/test/Analysis/MemoryDependenceAnalysis/InvariantLoad.ll
index 60c97b4c275a1..e49db3d8c3e8e 100644
--- a/llvm/test/Analysis/MemoryDependenceAnalysis/InvariantLoad.ll
+++ b/llvm/test/Analysis/MemoryDependenceAnalysis/InvariantLoad.ll
@@ -135,7 +135,7 @@ alive:
 
 ; This is reduced test case catching regression in the first version of the
 ; fix for invariant loads (https://reviews.llvm.org/D64405).
-define void @test4() {
+define void @test4() null_pointer_is_valid {
 ; CHECK-LABEL: @test4(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr inttoptr (i64 8 to ptr), align 4
diff --git a/llvm/test/Analysis/ValueTracking/gep-negative-issue.ll b/llvm/test/Analysis/ValueTracking/gep-negative-issue.ll
index 98b6fa7958f0c..ede5994ce5c3b 100644
--- a/llvm/test/Analysis/ValueTracking/gep-negative-issue.ll
+++ b/llvm/test/Analysis/ValueTracking/gep-negative-issue.ll
@@ -5,7 +5,7 @@ target triple = "x86_64-unknown-linux-gnu"
 %ArrayImpl = type { i64, ptr addrspace(100), [1 x i64], [1 x i64], [1 x i64], i64, i64, ptr addrspace(100), ptr addrspace(100), i8, i64 }
 %_array = type { i64, ptr addrspace(100), i8 }
 
-define void @test(i64 %n_chpl) {
+define void @test(i64 %n_chpl) null_pointer_is_valid {
 entry:
   ; First section is some code
   %0 = getelementptr inbounds %_array, ptr null, i32 0, i32 1
diff --git a/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll b/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll
index c739ba2183ef9..1cd55fa8dac18 100644
--- a/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll
+++ b/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll
@@ -402,7 +402,7 @@ bb:
   ret <2 x half> %result
 }
 
-define <2 x half> @chain_hi_to_lo_flat() {
+define <2 x half> @chain_hi_to_lo_flat() null_pointer_is_valid {
 ; GCN-LABEL: chain_hi_to_lo_flat:
 ; GCN:       ; %bb.0: ; %bb
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
diff --git a/llvm/test/CodeGen/AMDGPU/memcpy-crash-issue63986.ll b/llvm/test/CodeGen/AMDGPU/memcpy-crash-issue63986.ll
index 8157b1a7f7c80..18055121e3d5f 100644
--- a/llvm/test/CodeGen/AMDGPU/memcpy-crash-issue63986.ll
+++ b/llvm/test/CodeGen/AMDGPU/memcpy-crash-issue63986.ll
@@ -3,7 +3,7 @@
 
 %"struct.__llvm_libc::rpc::Buffer" = type { [8 x i64] }
 
-define void @issue63986(i64 %0, i64 %idxprom) {
+define void @issue63986(i64 %0, i64 %idxprom) null_pointer_is_valid {
 ; CHECK-LABEL: issue63986:
 ; CHECK:       ; %bb.0: ; %entry
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
diff --git a/llvm/test/Transforms/GVN/constexpr-vector-constainsundef-crash-inseltpoison.ll b/llvm/test/Transforms/GVN/constexpr-vector-constainsundef-crash-inseltpoison.ll
index a40811aa1dde1..b1db5b0314ac6 100644
--- a/llvm/test/Transforms/GVN/constexpr-vector-constainsundef-crash-inseltpoison.ll
+++ b/llvm/test/Transforms/GVN/constexpr-vector-constainsundef-crash-inseltpoison.ll
@@ -5,7 +5,7 @@
 ; https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=24278
 
 ; Make sure we do not crash when dealing with a vector constant expression.
-define <4 x ptr> @test(ptr %ptr) {
+define <4 x ptr> @test(ptr %ptr) null_pointer_is_valid {
 ; CHECK-LABEL: @test(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[L3:%.*]] = load i64, ptr [[PTR:%.*]], align 4
diff --git a/llvm/test/Transforms/GVN/constexpr-vector-constainsundef-crash.ll b/llvm/test/Transforms/GVN/constexpr-vector-constainsundef-crash.ll
index ddb223fffab5e..f676e81c114e8 100644
--- a/llvm/test/Transforms/GVN/constexpr-vector-constainsundef-crash.ll
+++ b/llvm/test/Transforms/GVN/constexpr-vector-constainsundef-crash.ll
@@ -5,7 +5,7 @@
 ; https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=24278
 
 ; Make sure we do not crash when dealing with a vector constant expression.
-define <4 x ptr> @test(ptr %ptr) {
+define <4 x ptr> @test(ptr %ptr) null_pointer_is_valid {
 ; CHECK-LABEL: @test(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[L3:%.*]] = load i64, ptr [[PTR:%.*]], align 4
diff --git a/llvm/test/Transforms/InstCombine/getelementptr.ll b/llvm/test/Transforms/InstCombine/getelementptr.ll
index c1bd6806eae86..feba952919b9a 100644
--- a/llvm/test/Transforms/InstCombine/getelementptr.ll
+++ b/llvm/test/Transforms/InstCombine/getelementptr.ll
@@ -1326,40 +1326,6 @@ define ptr @PR45084_extra_use(i1 %cond, ptr %p) {
   ret ptr %sel
 }
 
-define ptr @gep_null_inbounds(i64 %idx) {
-; CHECK-LABEL: @gep_null_inbounds(
-; CHECK-NEXT:    ret ptr null
-;
-  %gep = getelementptr inbounds i8, ptr null, i64 %idx
-  ret ptr %gep
-}
-
-define ptr @gep_null_not_inbounds(i64 %idx) {
-; CHECK-LABEL: @gep_null_not_inbounds(
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i8, ptr null, i64 [[IDX:%.*]]
-; CHECK-NEXT:    ret ptr [[GEP]]
-;
-  %gep = getelementptr i8, ptr null, i64 %idx
-  ret ptr %gep
-}
-
-define ptr @gep_null_defined(i64 %idx) null_pointer_is_valid {
-; CHECK-LABEL: @gep_null_defined(
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr null, i64 [[IDX:%.*]]
-; CHECK-NEXT:    ret ptr [[GEP]]
-;
-  %gep = getelementptr inbounds i8, ptr null, i64 %idx
-  ret ptr %gep
-}
-
-define ptr @gep_null_inbounds_different_type(i64 %idx1, i64 %idx2) {
-; CHECK-LABEL: @gep_null_inbounds_different_type(
-; CHECK-NEXT:    ret ptr null
-;
-  %gep = getelementptr inbounds [0 x i8], ptr null, i64 %idx1, i64 %idx2
-  ret ptr %gep
-}
-
 define ptr @D98588(ptr %c1, i64 %offset) {
 ; CHECK-LABEL: @D98588(
 ; CHECK-NEXT:    [[C2_NEXT_IDX:%.*]] = shl nsw i64 [[OFFSET:%.*]], 3
@@ -2017,38 +1983,4 @@ define ptr @gep_merge_nusw_const(ptr %p, i64 %idx, i64 %idx2) {
   ret ptr %gep
 }
 
-define <2 x ptr> @gep_inbounds_null_vec(i64 %idx) {
-; CHECK-LABEL: @gep_inbounds_null_vec(
-; CHECK-NEXT:    ret <2 x ptr> zeroinitializer
-;
-  %p = getelementptr inbounds i8, <2 x ptr> zeroinitializer, i64 %idx
-  ret <2 x ptr> %p
-}
-
-define <2 x ptr> @gep_inbounds_null_vec_broadcast(<2 x i64> %idx) {
-; CHECK-LABEL: @gep_inbounds_null_vec_broadcast(
-; CHECK-NEXT:    ret <2 x ptr> zeroinitializer
-;
-  %p = getelementptr inbounds i8, ptr null, <2 x i64> %idx
-  ret <2 x ptr> %p
-}
-
-define ptr @gep_noinbounds_null(i64 %idx) {
-; CHECK-LABEL: @gep_noinbounds_null(
-; CHECK-NEXT:    [[P:%.*]] = getelementptr i8, ptr null, i64 [[IDX:%.*]]
-; CHECK-NEXT:    ret ptr [[P]]
-;
-  %p = getelementptr i8, ptr null, i64 %idx
-  ret ptr %p
-}
-
-define ptr @gep_inbounds_null_null_is_valid(i64 %idx) null_pointer_is_valid {
-; CHECK-LABEL: @gep_inbounds_null_null_is_valid(
-; CHECK-NEXT:    [[P:%.*]] = getelementptr inbounds i8, ptr null, i64 [[IDX:%.*]]
-; CHECK-NEXT:    ret ptr [[P]]
-;
-  %p = getelementptr inbounds i8, ptr null, i64 %idx
-  ret ptr %p
-}
-
 !0 = !{!"branch_weights", i32 2, i32 10}
diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/cast-vector.ll b/llvm/test/Transforms/InstSimplify/ConstProp/cast-vector.ll
index f42f4071ac239..ffd774ffbba0c 100644
--- a/llvm/test/Transforms/InstSimplify/ConstProp/cast-vector.ll
+++ b/llvm/test/Transforms/InstSimplify/ConstProp/cast-vector.ll
@@ -5,7 +5,7 @@
 ; "offsetof-like expression" case).
 ; This used to hit an assert due to not supporting vectors in
 ; llvm::ConstantFoldCastInstruction when handling ptrtoint.
-define <2 x i16> @test1() {
+define <2 x i16> @test1() null_pointer_is_valid {
 ; CHECK-LABEL: @test1(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    ret <2 x i16> <i16 ptrtoint (ptr getelementptr inbounds ([10 x i32], ptr null, i64 0, i64 5) to i16), i16 ptrtoint (ptr getelementptr inbounds ([10 x i32], ptr null, i64 0, i64 7) to i16)>
@@ -20,7 +20,7 @@ entry:
 ; "sizeof-like expression" case).
 ; This used to hit an assert due to not supporting vectors in
 ; llvm::ConstantFoldCastInstruction when handling ptrtoint.
-define <2 x i16> @test2() {
+define <2 x i16> @test2() null_pointer_is_valid {
 ; CHECK-LABEL: @test2(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    ret <2 x i16> <i16 ptrtoint (ptr getelementptr (i32, ptr null, i64 5) to i16), i16 ptrtoint (ptr getelementptr (i32, ptr null, i64 7) to i16)>
diff --git a/llvm/test/Transforms/InstSimplify/gep.ll b/llvm/test/Transforms/InstSimplify/gep.ll
index a330f5cbc9268..a73c902fac647 100644
--- a/llvm/test/Transforms/InstSimplify/gep.ll
+++ b/llvm/test/Transforms/InstSimplify/gep.ll
@@ -386,3 +386,62 @@ define i64 @gep_array_of_scalable_vectors_ptrdiff(ptr %ptr) {
   %diff = sub i64 %c2.int, %c1.int
   ret i64 %diff
 }
+
+define ptr @gep_null_inbounds(i64 %idx) {
+; CHECK-LABEL: @gep_null_inbounds(
+; CHECK-NEXT:    ret ptr null
+;
+  %gep = getelementptr inbounds i8, ptr null, i64 %idx
+  ret ptr %gep
+}
+
+define ptr @gep_null_not_inbounds(i64 %idx) {
+; CHECK-LABEL: @gep_null_not_inbounds(
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i8, ptr null, i64 [[IDX:%.*]]
+; CHECK-NEXT:    ret ptr [[GEP]]
+;
+  %gep = getelementptr i8, ptr null, i64 %idx
+  ret ptr %gep
+}
+
+define ptr @gep_null_defined(i64 %idx) null_pointer_is_valid {
+; CHECK-LABEL: @gep_null_defined(
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr null, i64 [[IDX:%.*]]
+; CHECK-NEXT:    ret ptr [[GEP]]
+;
+  %gep = getelementptr inbounds i8, ptr null, i64 %idx
+  ret ptr %gep
+}
+
+define ptr @gep_null_inbounds_different_type(i64 %idx1, i64 %idx2) {
+; CHECK-LABEL: @gep_null_inbounds_different_type(
+; CHECK-NEXT:    ret ptr null
+;
+  %gep = getelementptr inbounds [0 x i8], ptr null, i64 %idx1, i64 %idx2
+  ret ptr %gep
+}
+
+define <2 x ptr> @gep_inbounds_null_vec(i64 %idx) {
+; CHECK-LABEL: @gep_inbounds_null_vec(
+; CHECK-NEXT:    ret <2 x ptr> zeroinitializer
+;
+  %p = getelementptr inbounds i8, <2 x ptr> zeroinitializer, i64 %idx
+  ret <2 x ptr> %p
+}
+
+define <2 x ptr> @gep_inbounds_null_vec_broadcast(<2 x i64> %idx) {
+; CHECK-LABEL: @gep_inbounds_null_vec_broadcast(
+; CHECK-NEXT:    ret <2 x ptr> zeroinitializer
+;
+  %p = getelementptr inbounds i8, ptr null, <2 x i64> %idx
+  ret <2 x ptr> %p
+}
+
+define ptr @gep_noinbounds_null(i64 %idx) {
+; CHECK-LABEL: @gep_noinbounds_null(
+; CHECK-NEXT:    [[P:%.*]] = getelementptr i8, ptr null, i64 [[IDX:%.*]]
+; CHECK-NEXT:    ret ptr [[P]]
+;
+  %p = getelementptr i8, ptr null, i64 %idx
+  ret ptr %p
+}

>From 18701090c4854c87370e1bbc66783005bbb7cd06 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Thu, 13 Mar 2025 09:34:23 +0800
Subject: [PATCH 4/4] [AMDGPU] Fix tests. NFC.

---
 llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll    |  92 ++++++---
 .../CodeGen/AMDGPU/memcpy-crash-issue63986.ll | 189 +++++++++---------
 2 files changed, 160 insertions(+), 121 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll b/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll
index 1cd55fa8dac18..9f48c8b5fe49c 100644
--- a/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll
+++ b/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll
@@ -402,39 +402,68 @@ bb:
   ret <2 x half> %result
 }
 
-define <2 x half> @chain_hi_to_lo_flat() null_pointer_is_valid {
-; GCN-LABEL: chain_hi_to_lo_flat:
-; GCN:       ; %bb.0: ; %bb
-; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_mov_b32_e32 v0, 2
-; GCN-NEXT:    v_mov_b32_e32 v1, 0
-; GCN-NEXT:    flat_load_ushort v0, v[0:1]
-; GCN-NEXT:    v_mov_b32_e32 v1, 0
-; GCN-NEXT:    v_mov_b32_e32 v2, 0
-; GCN-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN-NEXT:    flat_load_short_d16_hi v0, v[1:2]
-; GCN-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN-NEXT:    s_setpc_b64 s[30:31]
+define <2 x half> @chain_hi_to_lo_flat(ptr inreg %ptr) {
+; GFX900-LABEL: chain_hi_to_lo_flat:
+; GFX900:       ; %bb.0: ; %bb
+; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT:    v_mov_b32_e32 v0, s16
+; GFX900-NEXT:    v_mov_b32_e32 v1, s17
+; GFX900-NEXT:    flat_load_ushort v0, v[0:1] offset:2
+; GFX900-NEXT:    v_mov_b32_e32 v1, 0
+; GFX900-NEXT:    v_mov_b32_e32 v2, 0
+; GFX900-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX900-NEXT:    flat_load_short_d16_hi v0, v[1:2]
+; GFX900-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX10-LABEL: chain_hi_to_lo_flat:
-; GFX10:       ; %bb.0: ; %bb
-; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_mov_b32_e32 v0, 2
-; GFX10-NEXT:    v_mov_b32_e32 v1, 0
-; GFX10-NEXT:    flat_load_ushort v0, v[0:1]
-; GFX10-NEXT:    v_mov_b32_e32 v1, 0
-; GFX10-NEXT:    v_mov_b32_e32 v2, 0
-; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    flat_load_short_d16_hi v0, v[1:2]
-; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    s_setpc_b64 s[30:31]
+; FLATSCR-LABEL: chain_hi_to_lo_flat:
+; FLATSCR:       ; %bb.0: ; %bb
+; FLATSCR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; FLATSCR-NEXT:    v_mov_b32_e32 v0, s0
+; FLATSCR-NEXT:    v_mov_b32_e32 v1, s1
+; FLATSCR-NEXT:    flat_load_ushort v0, v[0:1] offset:2
+; FLATSCR-NEXT:    v_mov_b32_e32 v1, 0
+; FLATSCR-NEXT:    v_mov_b32_e32 v2, 0
+; FLATSCR-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; FLATSCR-NEXT:    flat_load_short_d16_hi v0, v[1:2]
+; FLATSCR-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; FLATSCR-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10_DEFAULT-LABEL: chain_hi_to_lo_flat:
+; GFX10_DEFAULT:       ; %bb.0: ; %bb
+; GFX10_DEFAULT-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10_DEFAULT-NEXT:    s_add_u32 s4, s16, 2
+; GFX10_DEFAULT-NEXT:    s_addc_u32 s5, s17, 0
+; GFX10_DEFAULT-NEXT:    v_mov_b32_e32 v0, s4
+; GFX10_DEFAULT-NEXT:    v_mov_b32_e32 v1, s5
+; GFX10_DEFAULT-NEXT:    flat_load_ushort v0, v[0:1]
+; GFX10_DEFAULT-NEXT:    v_mov_b32_e32 v1, 0
+; GFX10_DEFAULT-NEXT:    v_mov_b32_e32 v2, 0
+; GFX10_DEFAULT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX10_DEFAULT-NEXT:    flat_load_short_d16_hi v0, v[1:2]
+; GFX10_DEFAULT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX10_DEFAULT-NEXT:    s_setpc_b64 s[30:31]
+;
+; FLATSCR_GFX10-LABEL: chain_hi_to_lo_flat:
+; FLATSCR_GFX10:       ; %bb.0: ; %bb
+; FLATSCR_GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; FLATSCR_GFX10-NEXT:    s_add_u32 s0, s0, 2
+; FLATSCR_GFX10-NEXT:    s_addc_u32 s1, s1, 0
+; FLATSCR_GFX10-NEXT:    v_mov_b32_e32 v0, s0
+; FLATSCR_GFX10-NEXT:    v_mov_b32_e32 v1, s1
+; FLATSCR_GFX10-NEXT:    flat_load_ushort v0, v[0:1]
+; FLATSCR_GFX10-NEXT:    v_mov_b32_e32 v1, 0
+; FLATSCR_GFX10-NEXT:    v_mov_b32_e32 v2, 0
+; FLATSCR_GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; FLATSCR_GFX10-NEXT:    flat_load_short_d16_hi v0, v[1:2]
+; FLATSCR_GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; FLATSCR_GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-TRUE16-LABEL: chain_hi_to_lo_flat:
 ; GFX11-TRUE16:       ; %bb.0: ; %bb
 ; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-TRUE16-NEXT:    v_mov_b32_e32 v0, 2
-; GFX11-TRUE16-NEXT:    v_mov_b32_e32 v1, 0
-; GFX11-TRUE16-NEXT:    flat_load_d16_b16 v0, v[0:1]
+; GFX11-TRUE16-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX11-TRUE16-NEXT:    flat_load_d16_b16 v0, v[0:1] offset:2
 ; GFX11-TRUE16-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX11-TRUE16-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
@@ -445,9 +474,8 @@ define <2 x half> @chain_hi_to_lo_flat() null_pointer_is_valid {
 ; GFX11-FAKE16-LABEL: chain_hi_to_lo_flat:
 ; GFX11-FAKE16:       ; %bb.0: ; %bb
 ; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FAKE16-NEXT:    v_mov_b32_e32 v0, 2
-; GFX11-FAKE16-NEXT:    v_mov_b32_e32 v1, 0
-; GFX11-FAKE16-NEXT:    flat_load_u16 v0, v[0:1]
+; GFX11-FAKE16-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX11-FAKE16-NEXT:    flat_load_u16 v0, v[0:1] offset:2
 ; GFX11-FAKE16-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX11-FAKE16-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
@@ -455,7 +483,7 @@ define <2 x half> @chain_hi_to_lo_flat() null_pointer_is_valid {
 ; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 bb:
-  %gep_lo = getelementptr inbounds half, ptr null, i64 1
+  %gep_lo = getelementptr inbounds half, ptr %ptr, i64 1
   %load_lo = load half, ptr %gep_lo
   %load_hi = load half, ptr null
 
diff --git a/llvm/test/CodeGen/AMDGPU/memcpy-crash-issue63986.ll b/llvm/test/CodeGen/AMDGPU/memcpy-crash-issue63986.ll
index 18055121e3d5f..4f9440c59a9f4 100644
--- a/llvm/test/CodeGen/AMDGPU/memcpy-crash-issue63986.ll
+++ b/llvm/test/CodeGen/AMDGPU/memcpy-crash-issue63986.ll
@@ -3,138 +3,149 @@
 
 %"struct.__llvm_libc::rpc::Buffer" = type { [8 x i64] }
 
-define void @issue63986(i64 %0, i64 %idxprom) null_pointer_is_valid {
+define void @issue63986(i64 %0, i64 %idxprom, ptr inreg %ptr) {
 ; CHECK-LABEL: issue63986:
 ; CHECK:       ; %bb.0: ; %entry
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT:    v_lshlrev_b64 v[4:5], 6, v[2:3]
+; CHECK-NEXT:    v_lshlrev_b64 v[8:9], 6, v[2:3]
+; CHECK-NEXT:    v_mov_b32_e32 v4, s17
+; CHECK-NEXT:    v_add_co_u32_e32 v10, vcc, s16, v8
+; CHECK-NEXT:    v_addc_co_u32_e32 v11, vcc, v4, v9, vcc
+; CHECK-NEXT:  ; %bb.1: ; %entry.loop-memcpy-expansion_crit_edge
+; CHECK-NEXT:    v_mov_b32_e32 v4, 0
+; CHECK-NEXT:    v_mov_b32_e32 v5, 0
+; CHECK-NEXT:    flat_load_dwordx4 v[4:7], v[4:5]
 ; CHECK-NEXT:    s_mov_b64 s[4:5], 0
-; CHECK-NEXT:  .LBB0_1: ; %loop-memcpy-expansion
+; CHECK-NEXT:    s_waitcnt vmcnt(0)
+; CHECK-NEXT:  .LBB0_2: ; %loop-memcpy-expansion
 ; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    v_mov_b32_e32 v7, s5
-; CHECK-NEXT:    v_mov_b32_e32 v6, s4
-; CHECK-NEXT:    flat_load_dwordx4 v[6:9], v[6:7]
-; CHECK-NEXT:    v_add_co_u32_e32 v10, vcc, s4, v4
+; CHECK-NEXT:    v_add_co_u32_e32 v12, vcc, s4, v10
 ; CHECK-NEXT:    s_add_u32 s4, s4, 16
-; CHECK-NEXT:    v_mov_b32_e32 v11, s5
+; CHECK-NEXT:    v_mov_b32_e32 v13, s5
 ; CHECK-NEXT:    s_addc_u32 s5, s5, 0
 ; CHECK-NEXT:    v_cmp_ge_u64_e64 s[6:7], s[4:5], 32
-; CHECK-NEXT:    v_addc_co_u32_e32 v11, vcc, v5, v11, vcc
+; CHECK-NEXT:    v_addc_co_u32_e32 v13, vcc, v11, v13, vcc
 ; CHECK-NEXT:    s_and_b64 vcc, exec, s[6:7]
-; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; CHECK-NEXT:    flat_store_dwordx4 v[10:11], v[6:9]
-; CHECK-NEXT:    s_cbranch_vccz .LBB0_1
-; CHECK-NEXT:  ; %bb.2: ; %loop-memcpy-residual-header
-; CHECK-NEXT:    s_branch .LBB0_4
-; CHECK-NEXT:  ; %bb.3:
-; CHECK-NEXT:    ; implicit-def: $vgpr6_vgpr7
+; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
+; CHECK-NEXT:    flat_store_dwordx4 v[12:13], v[4:7]
+; CHECK-NEXT:    s_cbranch_vccz .LBB0_2
+; CHECK-NEXT:  ; %bb.3: ; %loop-memcpy-residual-header
 ; CHECK-NEXT:    s_branch .LBB0_5
-; CHECK-NEXT:  .LBB0_4: ; %loop-memcpy-residual-header.post-loop-memcpy-expansion_crit_edge
-; CHECK-NEXT:    v_lshlrev_b64 v[6:7], 6, v[2:3]
-; CHECK-NEXT:    s_cbranch_execnz .LBB0_7
-; CHECK-NEXT:  .LBB0_5: ; %loop-memcpy-residual.preheader
-; CHECK-NEXT:    v_or_b32_e32 v2, 32, v4
-; CHECK-NEXT:    v_mov_b32_e32 v3, v5
+; CHECK-NEXT:  ; %bb.4:
+; CHECK-NEXT:    ; implicit-def: $vgpr2_vgpr3
+; CHECK-NEXT:    s_branch .LBB0_6
+; CHECK-NEXT:  .LBB0_5: ; %loop-memcpy-residual-header.post-loop-memcpy-expansion_crit_edge
+; CHECK-NEXT:    v_lshlrev_b64 v[2:3], 6, v[2:3]
+; CHECK-NEXT:    s_cbranch_execnz .LBB0_9
+; CHECK-NEXT:  .LBB0_6: ; %loop-memcpy-residual-header.loop-memcpy-residual_crit_edge
+; CHECK-NEXT:    v_mov_b32_e32 v2, 0
+; CHECK-NEXT:    v_mov_b32_e32 v3, 0
+; CHECK-NEXT:    flat_load_ubyte v2, v[2:3]
+; CHECK-NEXT:    s_add_u32 s6, s16, 32
+; CHECK-NEXT:    s_addc_u32 s4, s17, 0
+; CHECK-NEXT:    v_mov_b32_e32 v4, s4
+; CHECK-NEXT:    v_add_co_u32_e32 v3, vcc, s6, v8
 ; CHECK-NEXT:    s_mov_b64 s[4:5], 0
-; CHECK-NEXT:  ; %bb.6: ; %loop-memcpy-residual
-; CHECK-NEXT:    s_add_u32 s6, 32, s4
-; CHECK-NEXT:    s_addc_u32 s7, 0, s5
-; CHECK-NEXT:    v_mov_b32_e32 v6, s6
-; CHECK-NEXT:    v_mov_b32_e32 v7, s7
-; CHECK-NEXT:    flat_load_ubyte v10, v[6:7]
-; CHECK-NEXT:    v_mov_b32_e32 v9, s5
-; CHECK-NEXT:    v_add_co_u32_e32 v8, vcc, s4, v2
-; CHECK-NEXT:    v_mov_b32_e32 v7, v5
-; CHECK-NEXT:    v_addc_co_u32_e32 v9, vcc, v3, v9, vcc
+; CHECK-NEXT:    v_addc_co_u32_e32 v4, vcc, v4, v9, vcc
+; CHECK-NEXT:    s_waitcnt vmcnt(0)
+; CHECK-NEXT:  ; %bb.7: ; %loop-memcpy-residual
+; CHECK-NEXT:    v_mov_b32_e32 v6, s5
+; CHECK-NEXT:    v_add_co_u32_e32 v5, vcc, s4, v3
 ; CHECK-NEXT:    s_add_u32 s4, s4, 1
-; CHECK-NEXT:    v_mov_b32_e32 v6, v4
+; CHECK-NEXT:    v_addc_co_u32_e32 v6, vcc, v4, v6, vcc
 ; CHECK-NEXT:    s_addc_u32 s5, s5, 0
-; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; CHECK-NEXT:    flat_store_byte v[8:9], v10
-; CHECK-NEXT:  .LBB0_7: ; %post-loop-memcpy-expansion
-; CHECK-NEXT:    v_and_b32_e32 v2, 15, v0
-; CHECK-NEXT:    v_mov_b32_e32 v3, 0
+; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
+; CHECK-NEXT:    flat_store_byte v[5:6], v2
+; CHECK-NEXT:  ; %bb.8:
+; CHECK-NEXT:    v_mov_b32_e32 v2, v8
+; CHECK-NEXT:    v_mov_b32_e32 v3, v9
+; CHECK-NEXT:  .LBB0_9: ; %post-loop-memcpy-expansion
+; CHECK-NEXT:    v_and_b32_e32 v6, 15, v0
 ; CHECK-NEXT:    v_and_b32_e32 v0, -16, v0
+; CHECK-NEXT:    v_add_co_u32_e32 v2, vcc, v2, v0
+; CHECK-NEXT:    v_mov_b32_e32 v7, 0
+; CHECK-NEXT:    v_addc_co_u32_e32 v3, vcc, v3, v1, vcc
 ; CHECK-NEXT:    v_cmp_ne_u64_e64 s[4:5], 0, v[0:1]
-; CHECK-NEXT:    v_cmp_ne_u64_e64 s[6:7], 0, v[2:3]
-; CHECK-NEXT:    v_add_co_u32_e32 v6, vcc, v6, v0
-; CHECK-NEXT:    v_addc_co_u32_e32 v7, vcc, v7, v1, vcc
-; CHECK-NEXT:    s_branch .LBB0_10
-; CHECK-NEXT:  .LBB0_8: ; %Flow14
-; CHECK-NEXT:    ; in Loop: Header=BB0_10 Depth=1
+; CHECK-NEXT:    v_cmp_ne_u64_e64 s[6:7], 0, v[6:7]
+; CHECK-NEXT:    v_mov_b32_e32 v4, s17
+; CHECK-NEXT:    v_mov_b32_e32 v8, 0
+; CHECK-NEXT:    v_add_co_u32_e32 v12, vcc, s16, v2
+; CHECK-NEXT:    v_mov_b32_e32 v9, 0
+; CHECK-NEXT:    v_addc_co_u32_e32 v13, vcc, v4, v3, vcc
+; CHECK-NEXT:    s_branch .LBB0_12
+; CHECK-NEXT:  .LBB0_10: ; %Flow14
+; CHECK-NEXT:    ; in Loop: Header=BB0_12 Depth=1
 ; CHECK-NEXT:    s_or_b64 exec, exec, s[10:11]
 ; CHECK-NEXT:    s_mov_b64 s[8:9], 0
-; CHECK-NEXT:  .LBB0_9: ; %Flow16
-; CHECK-NEXT:    ; in Loop: Header=BB0_10 Depth=1
+; CHECK-NEXT:  .LBB0_11: ; %Flow16
+; CHECK-NEXT:    ; in Loop: Header=BB0_12 Depth=1
 ; CHECK-NEXT:    s_andn2_b64 vcc, exec, s[8:9]
-; CHECK-NEXT:    s_cbranch_vccz .LBB0_18
-; CHECK-NEXT:  .LBB0_10: ; %while.cond
+; CHECK-NEXT:    s_cbranch_vccz .LBB0_20
+; CHECK-NEXT:  .LBB0_12: ; %while.cond
 ; CHECK-NEXT:    ; =>This Loop Header: Depth=1
-; CHECK-NEXT:    ; Child Loop BB0_12 Depth 2
-; CHECK-NEXT:    ; Child Loop BB0_16 Depth 2
+; CHECK-NEXT:    ; Child Loop BB0_14 Depth 2
+; CHECK-NEXT:    ; Child Loop BB0_18 Depth 2
 ; CHECK-NEXT:    s_and_saveexec_b64 s[8:9], s[4:5]
-; CHECK-NEXT:    s_cbranch_execz .LBB0_13
-; CHECK-NEXT:  ; %bb.11: ; %loop-memcpy-expansion2.preheader
-; CHECK-NEXT:    ; in Loop: Header=BB0_10 Depth=1
+; CHECK-NEXT:    s_cbranch_execz .LBB0_15
+; CHECK-NEXT:  ; %bb.13: ; %while.cond.loop-memcpy-expansion2_crit_edge
+; CHECK-NEXT:    ; in Loop: Header=BB0_12 Depth=1
+; CHECK-NEXT:    flat_load_dwordx4 v[2:5], v[8:9]
 ; CHECK-NEXT:    s_mov_b64 s[10:11], 0
 ; CHECK-NEXT:    s_mov_b64 s[12:13], 0
-; CHECK-NEXT:  .LBB0_12: ; %loop-memcpy-expansion2
-; CHECK-NEXT:    ; Parent Loop BB0_10 Depth=1
+; CHECK-NEXT:    s_waitcnt vmcnt(0)
+; CHECK-NEXT:  .LBB0_14: ; %loop-memcpy-expansion2
+; CHECK-NEXT:    ; Parent Loop BB0_12 Depth=1
 ; CHECK-NEXT:    ; => This Inner Loop Header: Depth=2
-; CHECK-NEXT:    v_mov_b32_e32 v8, s12
-; CHECK-NEXT:    v_mov_b32_e32 v9, s13
-; CHECK-NEXT:    flat_load_dwordx4 v[8:11], v[8:9]
-; CHECK-NEXT:    v_mov_b32_e32 v13, s13
-; CHECK-NEXT:    v_add_co_u32_e32 v12, vcc, s12, v4
+; CHECK-NEXT:    v_mov_b32_e32 v15, s13
+; CHECK-NEXT:    v_add_co_u32_e32 v14, vcc, s12, v10
 ; CHECK-NEXT:    s_add_u32 s12, s12, 16
-; CHECK-NEXT:    v_addc_co_u32_e32 v13, vcc, v5, v13, vcc
+; CHECK-NEXT:    v_addc_co_u32_e32 v15, vcc, v11, v15, vcc
 ; CHECK-NEXT:    s_addc_u32 s13, s13, 0
 ; CHECK-NEXT:    v_cmp_ge_u64_e32 vcc, s[12:13], v[0:1]
+; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
+; CHECK-NEXT:    flat_store_dwordx4 v[14:15], v[2:5]
 ; CHECK-NEXT:    s_or_b64 s[10:11], vcc, s[10:11]
-; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; CHECK-NEXT:    flat_store_dwordx4 v[12:13], v[8:11]
 ; CHECK-NEXT:    s_andn2_b64 exec, exec, s[10:11]
-; CHECK-NEXT:    s_cbranch_execnz .LBB0_12
-; CHECK-NEXT:  .LBB0_13: ; %Flow15
-; CHECK-NEXT:    ; in Loop: Header=BB0_10 Depth=1
+; CHECK-NEXT:    s_cbranch_execnz .LBB0_14
+; CHECK-NEXT:  .LBB0_15: ; %Flow15
+; CHECK-NEXT:    ; in Loop: Header=BB0_12 Depth=1
 ; CHECK-NEXT:    s_or_b64 exec, exec, s[8:9]
 ; CHECK-NEXT:    s_mov_b64 s[8:9], -1
-; CHECK-NEXT:    s_cbranch_execz .LBB0_9
-; CHECK-NEXT:  ; %bb.14: ; %loop-memcpy-residual-header5
-; CHECK-NEXT:    ; in Loop: Header=BB0_10 Depth=1
+; CHECK-NEXT:    s_cbranch_execz .LBB0_11
+; CHECK-NEXT:  ; %bb.16: ; %loop-memcpy-residual-header5
+; CHECK-NEXT:    ; in Loop: Header=BB0_12 Depth=1
 ; CHECK-NEXT:    s_and_saveexec_b64 s[8:9], s[6:7]
 ; CHECK-NEXT:    s_xor_b64 s[10:11], exec, s[8:9]
-; CHECK-NEXT:    s_cbranch_execz .LBB0_8
-; CHECK-NEXT:  ; %bb.15: ; %loop-memcpy-residual4.preheader
-; CHECK-NEXT:    ; in Loop: Header=BB0_10 Depth=1
+; CHECK-NEXT:    s_cbranch_execz .LBB0_10
+; CHECK-NEXT:  ; %bb.17: ; %loop-memcpy-residual-header5.loop-memcpy-residual4_crit_edge
+; CHECK-NEXT:    ; in Loop: Header=BB0_12 Depth=1
+; CHECK-NEXT:    flat_load_ubyte v2, v[8:9]
 ; CHECK-NEXT:    s_mov_b64 s[12:13], 0
 ; CHECK-NEXT:    s_mov_b64 s[14:15], 0
-; CHECK-NEXT:  .LBB0_16: ; %loop-memcpy-residual4
-; CHECK-NEXT:    ; Parent Loop BB0_10 Depth=1
+; CHECK-NEXT:    s_waitcnt vmcnt(0)
+; CHECK-NEXT:  .LBB0_18: ; %loop-memcpy-residual4
+; CHECK-NEXT:    ; Parent Loop BB0_12 Depth=1
 ; CHECK-NEXT:    ; => This Inner Loop Header: Depth=2
-; CHECK-NEXT:    v_mov_b32_e32 v10, s15
-; CHECK-NEXT:    v_add_co_u32_e32 v8, vcc, s14, v0
-; CHECK-NEXT:    v_addc_co_u32_e32 v9, vcc, v1, v10, vcc
-; CHECK-NEXT:    flat_load_ubyte v11, v[8:9]
-; CHECK-NEXT:    v_add_co_u32_e32 v8, vcc, s14, v6
+; CHECK-NEXT:    v_add_co_u32_e32 v3, vcc, s14, v12
 ; CHECK-NEXT:    s_add_u32 s14, s14, 1
+; CHECK-NEXT:    v_mov_b32_e32 v4, s15
 ; CHECK-NEXT:    s_addc_u32 s15, s15, 0
-; CHECK-NEXT:    v_cmp_ge_u64_e64 s[8:9], s[14:15], v[2:3]
-; CHECK-NEXT:    v_addc_co_u32_e32 v9, vcc, v7, v10, vcc
+; CHECK-NEXT:    v_cmp_ge_u64_e64 s[8:9], s[14:15], v[6:7]
+; CHECK-NEXT:    v_addc_co_u32_e32 v4, vcc, v13, v4, vcc
 ; CHECK-NEXT:    s_or_b64 s[12:13], s[8:9], s[12:13]
-; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; CHECK-NEXT:    flat_store_byte v[8:9], v11
+; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
+; CHECK-NEXT:    flat_store_byte v[3:4], v2
 ; CHECK-NEXT:    s_andn2_b64 exec, exec, s[12:13]
-; CHECK-NEXT:    s_cbranch_execnz .LBB0_16
-; CHECK-NEXT:  ; %bb.17: ; %Flow
-; CHECK-NEXT:    ; in Loop: Header=BB0_10 Depth=1
+; CHECK-NEXT:    s_cbranch_execnz .LBB0_18
+; CHECK-NEXT:  ; %bb.19: ; %Flow
+; CHECK-NEXT:    ; in Loop: Header=BB0_12 Depth=1
 ; CHECK-NEXT:    s_or_b64 exec, exec, s[12:13]
-; CHECK-NEXT:    s_branch .LBB0_8
-; CHECK-NEXT:  .LBB0_18: ; %DummyReturnBlock
+; CHECK-NEXT:    s_branch .LBB0_10
+; CHECK-NEXT:  .LBB0_20: ; %DummyReturnBlock
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; CHECK-NEXT:    s_setpc_b64 s[30:31]
 entry:
-  %arrayidx = getelementptr [32 x %"struct.__llvm_libc::rpc::Buffer"], ptr null, i64 0, i64 %idxprom
+  %arrayidx = getelementptr [32 x %"struct.__llvm_libc::rpc::Buffer"], ptr %ptr, i64 0, i64 %idxprom
   %spec.select = tail call i64 @llvm.umin.i64(i64 sub (i64 ptrtoint (ptr addrspacecast (ptr addrspace(4) inttoptr (i64 32 to ptr addrspace(4)) to ptr) to i64), i64 ptrtoint (ptr addrspacecast (ptr addrspace(4) null to ptr) to i64)), i64 56)
   tail call void @llvm.memcpy.p0.p0.i64(ptr %arrayidx, ptr null, i64 %spec.select, i1 false)
   br label %while.cond