[llvm] [GISel] Enforce G_PTR_ADD RHS type matching index size for addr space (PR #84352)

Jay Foad via llvm-commits llvm-commits at lists.llvm.org
Fri Mar 8 05:52:37 PST 2024


https://github.com/jayfoad updated https://github.com/llvm/llvm-project/pull/84352

>From 4c7b4b6f1ac4a59b874c9bfde9ad501df6de0ffb Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad at amd.com>
Date: Thu, 7 Mar 2024 17:56:54 +0000
Subject: [PATCH 1/5] [GISel] Enforce G_PTR_ADD RHS type matching index size
 for addr space

---
 llvm/lib/CodeGen/MachineVerifier.cpp         | 8 ++++++++
 llvm/test/MachineVerifier/test_g_ptr_add.mir | 6 +++++-
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp
index 1d0757c5d7f5f5..c29daf7e68a643 100644
--- a/llvm/lib/CodeGen/MachineVerifier.cpp
+++ b/llvm/lib/CodeGen/MachineVerifier.cpp
@@ -1301,6 +1301,14 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
     if (OffsetTy.isPointerOrPointerVector())
       report("gep offset operand must not be a pointer", MI);
 
+    if (PtrTy.isPointerOrPointerVector()) {
+      const DataLayout &DL = MF->getDataLayout();
+      unsigned AS = PtrTy.getAddressSpace();
+      unsigned IndexSizeInBits = DL.getIndexSize(AS) * 8;
+      if (OffsetTy.getScalarSizeInBits() != IndexSizeInBits)
+        report("gep offset operand must match index size for address space", MI);
+    }
+
     // TODO: Is the offset allowed to be a scalar with a vector?
     break;
   }
diff --git a/llvm/test/MachineVerifier/test_g_ptr_add.mir b/llvm/test/MachineVerifier/test_g_ptr_add.mir
index 07fe6266701d5c..7d1373586c8eb3 100644
--- a/llvm/test/MachineVerifier/test_g_ptr_add.mir
+++ b/llvm/test/MachineVerifier/test_g_ptr_add.mir
@@ -1,4 +1,4 @@
-#RUN: not --crash llc -o - -mtriple=arm64 -run-pass=none -verify-machineinstrs %s 2>&1 | FileCheck %s
+# RUN: not --crash llc -o - -mtriple=arm64 -run-pass=none -verify-machineinstrs %s 2>&1 | FileCheck %s
 # REQUIRES: aarch64-registered-target
 
 ---
@@ -29,4 +29,8 @@ body:             |
     ; CHECK: Bad machine code: gep first operand must be a pointer
     %6:_(s64) = G_PTR_ADD %1, %1
 
+    %7:_(s32) = G_IMPLICIT_DEF
+
+    ; CHECK: Bad machine code: gep offset operand must match index size for address space
+    %8:_(p0) = G_PTR_ADD %0, %7
 ...

>From 66a04e5b503d6fe85bb97392a6feb95a19679b5a Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad at amd.com>
Date: Fri, 8 Mar 2024 09:39:19 +0000
Subject: [PATCH 2/5] Fix handwritten MIR tests

---
 .../GlobalISel/combine-ptradd-int2ptr.mir     |  10 +-
 .../AArch64/GlobalISel/legalize-ptr-add.mir   |  17 --
 .../prelegalizer-combiner-load-or-pattern.mir | 226 +++++++++---------
 .../AMDGPU/GlobalISel/legalize-ptr-add.mir    | 207 ----------------
 .../GlobalISel/arm-legalize-load-store.mir    |  28 ---
 .../X86/GlobalISel/legalize-ptr-add-32.mir    |  55 +++++
 .../X86/GlobalISel/legalize-ptr-add-64.mir    |  55 +++++
 .../X86/GlobalISel/legalize-ptr-add.mir       | 224 -----------------
 .../X86/GlobalISel/regbankselect-X86_64.mir   |  19 +-
 9 files changed, 235 insertions(+), 606 deletions(-)
 create mode 100644 llvm/test/CodeGen/X86/GlobalISel/legalize-ptr-add-32.mir
 create mode 100644 llvm/test/CodeGen/X86/GlobalISel/legalize-ptr-add-64.mir
 delete mode 100644 llvm/test/CodeGen/X86/GlobalISel/legalize-ptr-add.mir

diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-ptradd-int2ptr.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-ptradd-int2ptr.mir
index 40e5e8ebb7731e..1233a0af424533 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-ptradd-int2ptr.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-ptradd-int2ptr.mir
@@ -11,7 +11,7 @@ body:             |
     ; CHECK: [[C:%[0-9]+]]:_(p64) = G_CONSTANT i64 44
     ; CHECK: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[C]](p64)
     ; CHECK: $x0 = COPY [[PTRTOINT]](s64)
-    %1:_(s32) = G_CONSTANT i32 42
+    %1:_(s64) = G_CONSTANT i64 42
     %2:_(s32) = G_CONSTANT i32 2
     %3:_(p64) = G_INTTOPTR %2
     %4:_(p64) = G_PTR_ADD %3, %1
@@ -26,7 +26,7 @@ body:             |
     ; CHECK-LABEL: name: agc.test_combine_ptradd_constants_ptrres
     ; CHECK: [[C:%[0-9]+]]:_(p64) = G_CONSTANT i64 44
     ; CHECK: $x0 = COPY [[C]](p64)
-    %1:_(s32) = G_CONSTANT i32 42
+    %1:_(s64) = G_CONSTANT i64 42
     %2:_(s32) = G_CONSTANT i32 2
     %3:_(p64) = G_INTTOPTR %2
     %4:_(p64) = G_PTR_ADD %3, %1
@@ -39,12 +39,12 @@ body:             |
     liveins: $x0, $x1
     ; Ensure non-constant G_PTR_ADDs are not folded.
     ; CHECK-LABEL: name: agc.test_not_combine_variable_ptradd
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
+    ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 42
     ; CHECK: [[COPY:%[0-9]+]]:_(p64) = COPY $x1
-    ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p64) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p64) = G_PTR_ADD [[COPY]], [[C]](s64)
     ; CHECK: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[PTR_ADD]](p64)
     ; CHECK: $x0 = COPY [[PTRTOINT]](s64)
-    %1:_(s32) = G_CONSTANT i32 42
+    %1:_(s64) = G_CONSTANT i64 42
     %2:_(p64) = COPY $x1
     %3:_(p64) = G_PTR_ADD %2, %1
     %4:_(s64) = G_PTRTOINT %3
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ptr-add.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ptr-add.mir
index 7bd9725d0fc87d..1ecd36b55380a6 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ptr-add.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ptr-add.mir
@@ -1,23 +1,6 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 # RUN: llc -mtriple=aarch64 -run-pass=legalizer %s -o - | FileCheck %s
 ---
-name:            test_ptr_add_small
-body:             |
-  bb.0.entry:
-    ; CHECK-LABEL: name: test_ptr_add_small
-    ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
-    ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY1]], 8
-    ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[SEXT_INREG]](s64)
-    ; CHECK: $x0 = COPY [[PTR_ADD]](p0)
-    %0:_(p0) = COPY $x0
-    %1:_(s64) = COPY $x1
-    %2:_(s8) = G_TRUNC %1(s64)
-    %3:_(p0) = G_PTR_ADD %0, %2(s8)
-    $x0 = COPY %3(p0)
-
-...
----
 name:            test_ptr_add_vec_p0
 body:             |
   bb.0.entry:
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-load-or-pattern.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-load-or-pattern.mir
index 88d214e43c82e5..c30fab32fccbf6 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-load-or-pattern.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-load-or-pattern.mir
@@ -38,18 +38,18 @@ body:             |
     ; BIG: %full_load:_(s32) = G_BSWAP [[LOAD]]
     ; BIG: $w1 = COPY %full_load(s32)
     ; BIG: RET_ReallyLR implicit $w1
-    %cst_1:_(s32) = G_CONSTANT i32 1
-    %cst_2:_(s32) = G_CONSTANT i32 2
-    %cst_3:_(s32) = G_CONSTANT i32 3
+    %cst_1:_(s64) = G_CONSTANT i64 1
+    %cst_2:_(s64) = G_CONSTANT i64 2
+    %cst_3:_(s64) = G_CONSTANT i64 3
 
     %cst_8:_(s32) = G_CONSTANT i32 8
     %cst_16:_(s32) = G_CONSTANT i32 16
     %cst_24:_(s32) = G_CONSTANT i32 24
 
     %ptr:_(p0) = COPY $x1
-    %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32)
-    %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s32)
-    %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s32)
+    %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64)
+    %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s64)
+    %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s64)
 
     %byte0:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8))
 
@@ -104,18 +104,18 @@ body:             |
     ; BIG: %full_load:_(s32) = G_LOAD %ptr(p0) :: (load (s32), align 1)
     ; BIG: $w1 = COPY %full_load(s32)
     ; BIG: RET_ReallyLR implicit $w1
-    %cst_1:_(s32) = G_CONSTANT i32 1
-    %cst_2:_(s32) = G_CONSTANT i32 2
-    %cst_3:_(s32) = G_CONSTANT i32 3
+    %cst_1:_(s64) = G_CONSTANT i64 1
+    %cst_2:_(s64) = G_CONSTANT i64 2
+    %cst_3:_(s64) = G_CONSTANT i64 3
 
     %cst_8:_(s32) = G_CONSTANT i32 8
     %cst_16:_(s32) = G_CONSTANT i32 16
     %cst_24:_(s32) = G_CONSTANT i32 24
 
     %ptr:_(p0) = COPY $x1
-    %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32)
-    %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s32)
-    %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s32)
+    %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64)
+    %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s64)
+    %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s64)
 
     %elt0:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8))
     %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s8))
@@ -162,18 +162,18 @@ body:             |
     ; BIG: %full_load:_(s32) = G_BSWAP [[LOAD]]
     ; BIG: $w1 = COPY %full_load(s32)
     ; BIG: RET_ReallyLR implicit $w1
-    %cst_1:_(s32) = G_CONSTANT i32 1
-    %cst_2:_(s32) = G_CONSTANT i32 2
-    %cst_3:_(s32) = G_CONSTANT i32 3
+    %cst_1:_(s64) = G_CONSTANT i64 1
+    %cst_2:_(s64) = G_CONSTANT i64 2
+    %cst_3:_(s64) = G_CONSTANT i64 3
 
     %cst_8:_(s32) = G_CONSTANT i32 8
     %cst_16:_(s32) = G_CONSTANT i32 16
     %cst_24:_(s32) = G_CONSTANT i32 24
 
     %ptr:_(p0) = COPY $x1
-    %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32)
-    %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s32)
-    %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s32)
+    %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64)
+    %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s64)
+    %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s64)
 
     %byte0:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8))
 
@@ -414,35 +414,35 @@ body:             |
 
     ; LITTLE-LABEL: name: nonzero_start_idx_positive_little_endian_pat
     ; LITTLE: liveins: $x0, $x1
-    ; LITTLE: %cst_1:_(s32) = G_CONSTANT i32 1
+    ; LITTLE: %cst_1:_(s64) = G_CONSTANT i64 1
     ; LITTLE: %ptr:_(p0) = COPY $x0
-    ; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32)
+    ; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64)
     ; LITTLE: %full_load:_(s32) = G_LOAD %ptr_elt_1(p0) :: (load (s32), align 1)
     ; LITTLE: $w1 = COPY %full_load(s32)
     ; LITTLE: RET_ReallyLR implicit $w1
     ; BIG-LABEL: name: nonzero_start_idx_positive_little_endian_pat
     ; BIG: liveins: $x0, $x1
-    ; BIG: %cst_1:_(s32) = G_CONSTANT i32 1
+    ; BIG: %cst_1:_(s64) = G_CONSTANT i64 1
     ; BIG: %ptr:_(p0) = COPY $x0
-    ; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32)
+    ; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64)
     ; BIG: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD %ptr_elt_1(p0) :: (load (s32), align 1)
     ; BIG: %full_load:_(s32) = G_BSWAP [[LOAD]]
     ; BIG: $w1 = COPY %full_load(s32)
     ; BIG: RET_ReallyLR implicit $w1
-    %cst_1:_(s32) = G_CONSTANT i32 1
-    %cst_2:_(s32) = G_CONSTANT i32 2
-    %cst_3:_(s32) = G_CONSTANT i32 3
-    %cst_4:_(s32) = G_CONSTANT i32 4
+    %cst_1:_(s64) = G_CONSTANT i64 1
+    %cst_2:_(s64) = G_CONSTANT i64 2
+    %cst_3:_(s64) = G_CONSTANT i64 3
+    %cst_4:_(s64) = G_CONSTANT i64 4
 
     %cst_8:_(s32) = G_CONSTANT i32 8
     %cst_16:_(s32) = G_CONSTANT i32 16
     %cst_24:_(s32) = G_CONSTANT i32 24
 
     %ptr:_(p0) = COPY $x0
-    %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32)
-    %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s32)
-    %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s32)
-    %ptr_elt_4:_(p0) = G_PTR_ADD %ptr, %cst_4(s32)
+    %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64)
+    %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s64)
+    %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s64)
+    %ptr_elt_4:_(p0) = G_PTR_ADD %ptr, %cst_4(s64)
 
     %elt2:_(s32) = G_ZEXTLOAD %ptr_elt_2(p0) :: (load (s8))
     %elt3:_(s32) = G_ZEXTLOAD %ptr_elt_3(p0) :: (load (s8))
@@ -476,35 +476,35 @@ body:             |
 
     ; LITTLE-LABEL: name: nonzero_start_idx_positive_big_endian_pat
     ; LITTLE: liveins: $x0, $x1
-    ; LITTLE: %cst_1:_(s32) = G_CONSTANT i32 1
+    ; LITTLE: %cst_1:_(s64) = G_CONSTANT i64 1
     ; LITTLE: %ptr:_(p0) = COPY $x0
-    ; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32)
+    ; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64)
     ; LITTLE: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD %ptr_elt_1(p0) :: (load (s32), align 1)
     ; LITTLE: %full_load:_(s32) = G_BSWAP [[LOAD]]
     ; LITTLE: $w1 = COPY %full_load(s32)
     ; LITTLE: RET_ReallyLR implicit $w1
     ; BIG-LABEL: name: nonzero_start_idx_positive_big_endian_pat
     ; BIG: liveins: $x0, $x1
-    ; BIG: %cst_1:_(s32) = G_CONSTANT i32 1
+    ; BIG: %cst_1:_(s64) = G_CONSTANT i64 1
     ; BIG: %ptr:_(p0) = COPY $x0
-    ; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32)
+    ; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64)
     ; BIG: %full_load:_(s32) = G_LOAD %ptr_elt_1(p0) :: (load (s32), align 1)
     ; BIG: $w1 = COPY %full_load(s32)
     ; BIG: RET_ReallyLR implicit $w1
-    %cst_1:_(s32) = G_CONSTANT i32 1
-    %cst_2:_(s32) = G_CONSTANT i32 2
-    %cst_3:_(s32) = G_CONSTANT i32 3
-    %cst_4:_(s32) = G_CONSTANT i32 4
+    %cst_1:_(s64) = G_CONSTANT i64 1
+    %cst_2:_(s64) = G_CONSTANT i64 2
+    %cst_3:_(s64) = G_CONSTANT i64 3
+    %cst_4:_(s64) = G_CONSTANT i64 4
 
     %cst_8:_(s32) = G_CONSTANT i32 8
     %cst_16:_(s32) = G_CONSTANT i32 16
     %cst_24:_(s32) = G_CONSTANT i32 24
 
     %ptr:_(p0) = COPY $x0
-    %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32)
-    %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s32)
-    %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s32)
-    %ptr_elt_4:_(p0) = G_PTR_ADD %ptr, %cst_4(s32)
+    %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64)
+    %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s64)
+    %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s64)
+    %ptr_elt_4:_(p0) = G_PTR_ADD %ptr, %cst_4(s64)
 
     %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s8))
     %elt2:_(s32) = G_ZEXTLOAD %ptr_elt_2(p0) :: (load (s8))
@@ -538,33 +538,33 @@ body:             |
 
     ; LITTLE-LABEL: name: nonzero_start_idx_negative_little_endian_pat
     ; LITTLE: liveins: $x0, $x1
-    ; LITTLE: %cst_neg_3:_(s32) = G_CONSTANT i32 -3
+    ; LITTLE: %cst_neg_3:_(s64) = G_CONSTANT i64 -3
     ; LITTLE: %ptr:_(p0) = COPY $x0
-    ; LITTLE: %ptr_elt_neg_3:_(p0) = G_PTR_ADD %ptr, %cst_neg_3(s32)
+    ; LITTLE: %ptr_elt_neg_3:_(p0) = G_PTR_ADD %ptr, %cst_neg_3(s64)
     ; LITTLE: %full_load:_(s32) = G_LOAD %ptr_elt_neg_3(p0) :: (load (s32), align 1)
     ; LITTLE: $w1 = COPY %full_load(s32)
     ; LITTLE: RET_ReallyLR implicit $w1
     ; BIG-LABEL: name: nonzero_start_idx_negative_little_endian_pat
     ; BIG: liveins: $x0, $x1
-    ; BIG: %cst_neg_3:_(s32) = G_CONSTANT i32 -3
+    ; BIG: %cst_neg_3:_(s64) = G_CONSTANT i64 -3
     ; BIG: %ptr:_(p0) = COPY $x0
-    ; BIG: %ptr_elt_neg_3:_(p0) = G_PTR_ADD %ptr, %cst_neg_3(s32)
+    ; BIG: %ptr_elt_neg_3:_(p0) = G_PTR_ADD %ptr, %cst_neg_3(s64)
     ; BIG: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD %ptr_elt_neg_3(p0) :: (load (s32), align 1)
     ; BIG: %full_load:_(s32) = G_BSWAP [[LOAD]]
     ; BIG: $w1 = COPY %full_load(s32)
     ; BIG: RET_ReallyLR implicit $w1
-    %cst_neg_1:_(s32) = G_CONSTANT i32 -1
-    %cst_neg_2:_(s32) = G_CONSTANT i32 -2
-    %cst_neg_3:_(s32) = G_CONSTANT i32 -3
+    %cst_neg_1:_(s64) = G_CONSTANT i64 -1
+    %cst_neg_2:_(s64) = G_CONSTANT i64 -2
+    %cst_neg_3:_(s64) = G_CONSTANT i64 -3
 
     %cst_8:_(s32) = G_CONSTANT i32 8
     %cst_16:_(s32) = G_CONSTANT i32 16
     %cst_24:_(s32) = G_CONSTANT i32 24
 
     %ptr:_(p0) = COPY $x0
-    %ptr_elt_neg_3:_(p0) = G_PTR_ADD %ptr, %cst_neg_3(s32)
-    %ptr_elt_neg_2:_(p0) = G_PTR_ADD %ptr, %cst_neg_2(s32)
-    %ptr_elt_neg_1:_(p0) = G_PTR_ADD %ptr, %cst_neg_1(s32)
+    %ptr_elt_neg_3:_(p0) = G_PTR_ADD %ptr, %cst_neg_3(s64)
+    %ptr_elt_neg_2:_(p0) = G_PTR_ADD %ptr, %cst_neg_2(s64)
+    %ptr_elt_neg_1:_(p0) = G_PTR_ADD %ptr, %cst_neg_1(s64)
 
     %elt_neg_2:_(s32) = G_ZEXTLOAD %ptr_elt_neg_2(p0) :: (load (s8))
     %elt_neg_1:_(s32) = G_ZEXTLOAD %ptr_elt_neg_1(p0) :: (load (s8))
@@ -598,33 +598,33 @@ body:             |
 
     ; LITTLE-LABEL: name: nonzero_start_idx_negative_big_endian_pat
     ; LITTLE: liveins: $x0, $x1
-    ; LITTLE: %cst_neg_3:_(s32) = G_CONSTANT i32 -3
+    ; LITTLE: %cst_neg_3:_(s64) = G_CONSTANT i64 -3
     ; LITTLE: %ptr:_(p0) = COPY $x0
-    ; LITTLE: %ptr_elt_neg_3:_(p0) = G_PTR_ADD %ptr, %cst_neg_3(s32)
+    ; LITTLE: %ptr_elt_neg_3:_(p0) = G_PTR_ADD %ptr, %cst_neg_3(s64)
     ; LITTLE: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD %ptr_elt_neg_3(p0) :: (load (s32), align 1)
     ; LITTLE: %full_load:_(s32) = G_BSWAP [[LOAD]]
     ; LITTLE: $w1 = COPY %full_load(s32)
     ; LITTLE: RET_ReallyLR implicit $w1
     ; BIG-LABEL: name: nonzero_start_idx_negative_big_endian_pat
     ; BIG: liveins: $x0, $x1
-    ; BIG: %cst_neg_3:_(s32) = G_CONSTANT i32 -3
+    ; BIG: %cst_neg_3:_(s64) = G_CONSTANT i64 -3
     ; BIG: %ptr:_(p0) = COPY $x0
-    ; BIG: %ptr_elt_neg_3:_(p0) = G_PTR_ADD %ptr, %cst_neg_3(s32)
+    ; BIG: %ptr_elt_neg_3:_(p0) = G_PTR_ADD %ptr, %cst_neg_3(s64)
     ; BIG: %full_load:_(s32) = G_LOAD %ptr_elt_neg_3(p0) :: (load (s32), align 1)
     ; BIG: $w1 = COPY %full_load(s32)
     ; BIG: RET_ReallyLR implicit $w1
-    %cst_neg_1:_(s32) = G_CONSTANT i32 -1
-    %cst_neg_2:_(s32) = G_CONSTANT i32 -2
-    %cst_neg_3:_(s32) = G_CONSTANT i32 -3
+    %cst_neg_1:_(s64) = G_CONSTANT i64 -1
+    %cst_neg_2:_(s64) = G_CONSTANT i64 -2
+    %cst_neg_3:_(s64) = G_CONSTANT i64 -3
 
     %cst_8:_(s32) = G_CONSTANT i32 8
     %cst_16:_(s32) = G_CONSTANT i32 16
     %cst_24:_(s32) = G_CONSTANT i32 24
 
     %ptr:_(p0) = COPY $x0
-    %ptr_elt_neg_3:_(p0) = G_PTR_ADD %ptr, %cst_neg_3(s32)
-    %ptr_elt_neg_2:_(p0) = G_PTR_ADD %ptr, %cst_neg_2(s32)
-    %ptr_elt_neg_1:_(p0) = G_PTR_ADD %ptr, %cst_neg_1(s32)
+    %ptr_elt_neg_3:_(p0) = G_PTR_ADD %ptr, %cst_neg_3(s64)
+    %ptr_elt_neg_2:_(p0) = G_PTR_ADD %ptr, %cst_neg_2(s64)
+    %ptr_elt_neg_1:_(p0) = G_PTR_ADD %ptr, %cst_neg_1(s64)
 
     %elt_neg_3:_(s32) = G_ZEXTLOAD %ptr_elt_neg_3(p0) :: (load (s8))
     %elt_neg_2:_(s32) = G_ZEXTLOAD %ptr_elt_neg_2(p0) :: (load (s8))
@@ -977,15 +977,15 @@ body:             |
 
     ; LITTLE-LABEL: name: dont_combine_duplicate_idx
     ; LITTLE: liveins: $x0, $x1
-    ; LITTLE: %cst_1:_(s32) = G_CONSTANT i32 1
-    ; LITTLE: %reused_idx:_(s32) = G_CONSTANT i32 2
+    ; LITTLE: %cst_1:_(s64) = G_CONSTANT i64 1
+    ; LITTLE: %reused_idx:_(s64) = G_CONSTANT i64 2
     ; LITTLE: %cst_8:_(s32) = G_CONSTANT i32 8
     ; LITTLE: %cst_16:_(s32) = G_CONSTANT i32 16
     ; LITTLE: %cst_24:_(s32) = G_CONSTANT i32 24
     ; LITTLE: %ptr:_(p0) = COPY $x1
-    ; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32)
-    ; LITTLE: %uses_idx_2:_(p0) = G_PTR_ADD %ptr, %reused_idx(s32)
-    ; LITTLE: %also_uses_idx_2:_(p0) = G_PTR_ADD %ptr, %reused_idx(s32)
+    ; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64)
+    ; LITTLE: %uses_idx_2:_(p0) = G_PTR_ADD %ptr, %reused_idx(s64)
+    ; LITTLE: %also_uses_idx_2:_(p0) = G_PTR_ADD %ptr, %reused_idx(s64)
     ; LITTLE: %byte0:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8))
     ; LITTLE: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s8))
     ; LITTLE: %elt2:_(s32) = G_ZEXTLOAD %uses_idx_2(p0) :: (load (s8))
@@ -1000,15 +1000,15 @@ body:             |
     ; LITTLE: RET_ReallyLR implicit $w1
     ; BIG-LABEL: name: dont_combine_duplicate_idx
     ; BIG: liveins: $x0, $x1
-    ; BIG: %cst_1:_(s32) = G_CONSTANT i32 1
-    ; BIG: %reused_idx:_(s32) = G_CONSTANT i32 2
+    ; BIG: %cst_1:_(s64) = G_CONSTANT i64 1
+    ; BIG: %reused_idx:_(s64) = G_CONSTANT i64 2
     ; BIG: %cst_8:_(s32) = G_CONSTANT i32 8
     ; BIG: %cst_16:_(s32) = G_CONSTANT i32 16
     ; BIG: %cst_24:_(s32) = G_CONSTANT i32 24
     ; BIG: %ptr:_(p0) = COPY $x1
-    ; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32)
-    ; BIG: %uses_idx_2:_(p0) = G_PTR_ADD %ptr, %reused_idx(s32)
-    ; BIG: %also_uses_idx_2:_(p0) = G_PTR_ADD %ptr, %reused_idx(s32)
+    ; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64)
+    ; BIG: %uses_idx_2:_(p0) = G_PTR_ADD %ptr, %reused_idx(s64)
+    ; BIG: %also_uses_idx_2:_(p0) = G_PTR_ADD %ptr, %reused_idx(s64)
     ; BIG: %byte0:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8))
     ; BIG: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s8))
     ; BIG: %elt2:_(s32) = G_ZEXTLOAD %uses_idx_2(p0) :: (load (s8))
@@ -1021,17 +1021,17 @@ body:             |
     ; BIG: %full_load:_(s32) = G_OR %or1, %or2
     ; BIG: $w1 = COPY %full_load(s32)
     ; BIG: RET_ReallyLR implicit $w1
-    %cst_1:_(s32) = G_CONSTANT i32 1
-    %reused_idx:_(s32) = G_CONSTANT i32 2
+    %cst_1:_(s64) = G_CONSTANT i64 1
+    %reused_idx:_(s64) = G_CONSTANT i64 2
 
     %cst_8:_(s32) = G_CONSTANT i32 8
     %cst_16:_(s32) = G_CONSTANT i32 16
     %cst_24:_(s32) = G_CONSTANT i32 24
 
     %ptr:_(p0) = COPY $x1
-    %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32)
-    %uses_idx_2:_(p0) = G_PTR_ADD %ptr, %reused_idx(s32)
-    %also_uses_idx_2:_(p0) = G_PTR_ADD %ptr, %reused_idx(s32)
+    %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64)
+    %uses_idx_2:_(p0) = G_PTR_ADD %ptr, %reused_idx(s64)
+    %also_uses_idx_2:_(p0) = G_PTR_ADD %ptr, %reused_idx(s64)
 
     %byte0:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8))
 
@@ -1064,15 +1064,15 @@ body:             |
 
     ; LITTLE-LABEL: name: dont_combine_duplicate_offset
     ; LITTLE: liveins: $x0, $x1
-    ; LITTLE: %cst_1:_(s32) = G_CONSTANT i32 1
-    ; LITTLE: %cst_2:_(s32) = G_CONSTANT i32 2
-    ; LITTLE: %cst_3:_(s32) = G_CONSTANT i32 3
+    ; LITTLE: %cst_1:_(s64) = G_CONSTANT i64 1
+    ; LITTLE: %cst_2:_(s64) = G_CONSTANT i64 2
+    ; LITTLE: %cst_3:_(s64) = G_CONSTANT i64 3
     ; LITTLE: %cst_8:_(s32) = G_CONSTANT i32 8
     ; LITTLE: %duplicate_shl_cst:_(s32) = G_CONSTANT i32 16
     ; LITTLE: %ptr:_(p0) = COPY $x1
-    ; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32)
-    ; LITTLE: %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s32)
-    ; LITTLE: %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s32)
+    ; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64)
+    ; LITTLE: %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s64)
+    ; LITTLE: %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s64)
     ; LITTLE: %byte0:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8))
     ; LITTLE: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s8))
     ; LITTLE: %elt2:_(s32) = G_ZEXTLOAD %ptr_elt_2(p0) :: (load (s8))
@@ -1087,15 +1087,15 @@ body:             |
     ; LITTLE: RET_ReallyLR implicit $w1
     ; BIG-LABEL: name: dont_combine_duplicate_offset
     ; BIG: liveins: $x0, $x1
-    ; BIG: %cst_1:_(s32) = G_CONSTANT i32 1
-    ; BIG: %cst_2:_(s32) = G_CONSTANT i32 2
-    ; BIG: %cst_3:_(s32) = G_CONSTANT i32 3
+    ; BIG: %cst_1:_(s64) = G_CONSTANT i64 1
+    ; BIG: %cst_2:_(s64) = G_CONSTANT i64 2
+    ; BIG: %cst_3:_(s64) = G_CONSTANT i64 3
     ; BIG: %cst_8:_(s32) = G_CONSTANT i32 8
     ; BIG: %duplicate_shl_cst:_(s32) = G_CONSTANT i32 16
     ; BIG: %ptr:_(p0) = COPY $x1
-    ; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32)
-    ; BIG: %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s32)
-    ; BIG: %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s32)
+    ; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64)
+    ; BIG: %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s64)
+    ; BIG: %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s64)
     ; BIG: %byte0:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8))
     ; BIG: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s8))
     ; BIG: %elt2:_(s32) = G_ZEXTLOAD %ptr_elt_2(p0) :: (load (s8))
@@ -1108,17 +1108,17 @@ body:             |
     ; BIG: %full_load:_(s32) = G_OR %or1, %or2
     ; BIG: $w1 = COPY %full_load(s32)
     ; BIG: RET_ReallyLR implicit $w1
-    %cst_1:_(s32) = G_CONSTANT i32 1
-    %cst_2:_(s32) = G_CONSTANT i32 2
-    %cst_3:_(s32) = G_CONSTANT i32 3
+    %cst_1:_(s64) = G_CONSTANT i64 1
+    %cst_2:_(s64) = G_CONSTANT i64 2
+    %cst_3:_(s64) = G_CONSTANT i64 3
 
     %cst_8:_(s32) = G_CONSTANT i32 8
     %duplicate_shl_cst:_(s32) = G_CONSTANT i32 16
 
     %ptr:_(p0) = COPY $x1
-    %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32)
-    %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s32)
-    %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s32)
+    %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64)
+    %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s64)
+    %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s64)
 
     %byte0:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8))
 
@@ -1153,16 +1153,16 @@ body:             |
 
     ; LITTLE-LABEL: name: dont_combine_lowest_index_not_zero_offset
     ; LITTLE: liveins: $x0, $x1
-    ; LITTLE: %cst_1:_(s32) = G_CONSTANT i32 1
-    ; LITTLE: %cst_2:_(s32) = G_CONSTANT i32 2
-    ; LITTLE: %cst_3:_(s32) = G_CONSTANT i32 3
+    ; LITTLE: %cst_1:_(s64) = G_CONSTANT i64 1
+    ; LITTLE: %cst_2:_(s64) = G_CONSTANT i64 2
+    ; LITTLE: %cst_3:_(s64) = G_CONSTANT i64 3
     ; LITTLE: %cst_8:_(s32) = G_CONSTANT i32 8
     ; LITTLE: %cst_16:_(s32) = G_CONSTANT i32 16
     ; LITTLE: %cst_24:_(s32) = G_CONSTANT i32 24
     ; LITTLE: %ptr:_(p0) = COPY $x1
-    ; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32)
-    ; LITTLE: %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s32)
-    ; LITTLE: %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s32)
+    ; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64)
+    ; LITTLE: %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s64)
+    ; LITTLE: %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s64)
     ; LITTLE: %lowest_idx_load:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8))
     ; LITTLE: %byte0:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s8))
     ; LITTLE: %elt2:_(s32) = G_ZEXTLOAD %ptr_elt_2(p0) :: (load (s8))
@@ -1177,16 +1177,16 @@ body:             |
     ; LITTLE: RET_ReallyLR implicit $w1
     ; BIG-LABEL: name: dont_combine_lowest_index_not_zero_offset
     ; BIG: liveins: $x0, $x1
-    ; BIG: %cst_1:_(s32) = G_CONSTANT i32 1
-    ; BIG: %cst_2:_(s32) = G_CONSTANT i32 2
-    ; BIG: %cst_3:_(s32) = G_CONSTANT i32 3
+    ; BIG: %cst_1:_(s64) = G_CONSTANT i64 1
+    ; BIG: %cst_2:_(s64) = G_CONSTANT i64 2
+    ; BIG: %cst_3:_(s64) = G_CONSTANT i64 3
     ; BIG: %cst_8:_(s32) = G_CONSTANT i32 8
     ; BIG: %cst_16:_(s32) = G_CONSTANT i32 16
     ; BIG: %cst_24:_(s32) = G_CONSTANT i32 24
     ; BIG: %ptr:_(p0) = COPY $x1
-    ; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32)
-    ; BIG: %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s32)
-    ; BIG: %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s32)
+    ; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64)
+    ; BIG: %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s64)
+    ; BIG: %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s64)
     ; BIG: %lowest_idx_load:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8))
     ; BIG: %byte0:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s8))
     ; BIG: %elt2:_(s32) = G_ZEXTLOAD %ptr_elt_2(p0) :: (load (s8))
@@ -1199,18 +1199,18 @@ body:             |
     ; BIG: %full_load:_(s32) = G_OR %or1, %or2
     ; BIG: $w1 = COPY %full_load(s32)
     ; BIG: RET_ReallyLR implicit $w1
-    %cst_1:_(s32) = G_CONSTANT i32 1
-    %cst_2:_(s32) = G_CONSTANT i32 2
-    %cst_3:_(s32) = G_CONSTANT i32 3
+    %cst_1:_(s64) = G_CONSTANT i64 1
+    %cst_2:_(s64) = G_CONSTANT i64 2
+    %cst_3:_(s64) = G_CONSTANT i64 3
 
     %cst_8:_(s32) = G_CONSTANT i32 8
     %cst_16:_(s32) = G_CONSTANT i32 16
     %cst_24:_(s32) = G_CONSTANT i32 24
 
     %ptr:_(p0) = COPY $x1
-    %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32)
-    %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s32)
-    %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s32)
+    %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64)
+    %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s64)
+    %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s64)
 
     ; This load is index 0
     %lowest_idx_load:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8))
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ptr-add.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ptr-add.mir
index 660746c84287d2..09e1109c36293c 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ptr-add.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ptr-add.mir
@@ -205,210 +205,3 @@ body: |
     %2:_(<2 x p3>) = G_PTR_ADD %0, %1
     $vgpr0_vgpr1 = COPY %2
 ...
-
----
-name: test_gep_global_s16_idx
-body: |
-  bb.0:
-    liveins: $vgpr0_vgpr1, $vgpr2
-
-    ; CHECK-LABEL: name: test_gep_global_s16_idx
-    ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2
-    ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY1]](s32)
-    ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[ANYEXT]], 16
-    ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[SEXT_INREG]](s64)
-    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[PTR_ADD]](p1)
-    %0:_(p1) = COPY $vgpr0_vgpr1
-    %1:_(s32) = COPY $vgpr2
-    %2:_(s16) = G_TRUNC %1
-    %3:_(p1) = G_PTR_ADD %0, %2
-    $vgpr0_vgpr1 = COPY %3
-...
-
----
-name: test_gep_global_s32_idx
-body: |
-  bb.0:
-    liveins: $vgpr0_vgpr1, $vgpr2
-
-    ; CHECK-LABEL: name: test_gep_global_s32_idx
-    ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2
-    ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[COPY1]](s32)
-    ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[SEXT]](s64)
-    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[PTR_ADD]](p1)
-    %0:_(p1) = COPY $vgpr0_vgpr1
-    %1:_(s32) = COPY $vgpr2
-    %2:_(p1) = G_PTR_ADD %0, %1
-    $vgpr0_vgpr1 = COPY %2
-...
-
----
-name: test_gep_global_s96_idx
-body: |
-  bb.0:
-    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4
-
-    ; CHECK-LABEL: name: test_gep_global_s96_idx
-    ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4
-    ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4
-    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[COPY1]](s96)
-    ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[TRUNC]](s64)
-    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[PTR_ADD]](p1)
-    %0:_(p1) = COPY $vgpr0_vgpr1
-    %1:_(s96) = COPY $vgpr2_vgpr3_vgpr4
-    %2:_(p1) = G_PTR_ADD %0, %1
-    $vgpr0_vgpr1 = COPY %2
-...
-
----
-name: test_gep_local_i16_idx
-body: |
-  bb.0:
-    liveins: $vgpr0, $vgpr1
-
-    ; CHECK-LABEL: name: test_gep_local_i16_idx
-    ; CHECK: liveins: $vgpr0, $vgpr1
-    ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 16
-    ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[SEXT_INREG]](s32)
-    ; CHECK-NEXT: $vgpr0 = COPY [[PTR_ADD]](p3)
-    %0:_(p3) = COPY $vgpr0
-    %1:_(s32) = COPY $vgpr1
-    %2:_(s16) = G_TRUNC %1
-    %3:_(p3) = G_PTR_ADD %0, %2
-    $vgpr0 = COPY %3
-...
-
----
-name: test_gep_local_i64_idx
-body: |
-  bb.0:
-    liveins: $vgpr0, $vgpr1_vgpr2
-
-    ; CHECK-LABEL: name: test_gep_local_i64_idx
-    ; CHECK: liveins: $vgpr0, $vgpr1_vgpr2
-    ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr1_vgpr2
-    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64)
-    ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[TRUNC]](s32)
-    ; CHECK-NEXT: $vgpr0 = COPY [[PTR_ADD]](p3)
-    %0:_(p3) = COPY $vgpr0
-    %1:_(s64) = COPY $vgpr1_vgpr2
-    %2:_(p3) = G_PTR_ADD %0, %1
-    $vgpr0 = COPY %2
-...
-
----
-name: test_gep_v2p1_v2i32
-body: |
-  bb.0:
-    liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5
-
-    ; CHECK-LABEL: name: test_gep_v2p1_v2i32
-    ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5
-    ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x p1>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5
-    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(p1), [[UV1:%[0-9]+]]:_(p1) = G_UNMERGE_VALUES [[COPY]](<2 x p1>)
-    ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
-    ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[UV2]](s32)
-    ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[UV]], [[SEXT]](s64)
-    ; CHECK-NEXT: [[SEXT1:%[0-9]+]]:_(s64) = G_SEXT [[UV3]](s32)
-    ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[UV1]], [[SEXT1]](s64)
-    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[PTR_ADD]](p1), [[PTR_ADD1]](p1)
-    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x p1>)
-    %0:_(<2 x p1>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    %1:_(<2 x s32>) = COPY $vgpr4_vgpr5
-    %2:_(<2 x p1>) = G_PTR_ADD %0, %1
-    $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2
-...
-
----
-name: test_gep_v2p1_v2i96
-body: |
-  bb.0:
-    liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6, $vgpr7_vgpr8_vgpr9
-
-    ; CHECK-LABEL: name: test_gep_v2p1_v2i96
-    ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6, $vgpr7_vgpr8_vgpr9
-    ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x p1>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr4_vgpr5_vgpr6
-    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY $vgpr7_vgpr8_vgpr9
-    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(p1), [[UV1:%[0-9]+]]:_(p1) = G_UNMERGE_VALUES [[COPY]](<2 x p1>)
-    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[COPY1]](s96)
-    ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[UV]], [[TRUNC]](s64)
-    ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s64) = G_TRUNC [[COPY2]](s96)
-    ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[UV1]], [[TRUNC1]](s64)
-    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[PTR_ADD]](p1), [[PTR_ADD1]](p1)
-    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x p1>)
-    %0:_(<2 x p1>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    %1:_(s96) = COPY $vgpr4_vgpr5_vgpr6
-    %2:_(s96) = COPY $vgpr7_vgpr8_vgpr9
-    %3:_(<2 x s96>) = G_BUILD_VECTOR %1, %2
-    %4:_(<2 x p1>) = G_PTR_ADD %0, %3
-    $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %4
-...
-
----
-name: test_gep_v2p3_v2s16
-body: |
-  bb.0:
-    liveins: $vgpr0_vgpr1, $vgpr2
-
-    ; CHECK-LABEL: name: test_gep_v2p3_v2s16
-    ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2
-    ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr0_vgpr1
-    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
-    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(p3), [[UV1:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[COPY]](<2 x p3>)
-    ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>)
-    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
-    ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST]], 16
-    ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[UV]], [[SEXT_INREG]](s32)
-    ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 16
-    ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[UV1]], [[SEXT_INREG1]](s32)
-    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[PTR_ADD]](p3), [[PTR_ADD1]](p3)
-    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
-    %0:_(<2 x p3>) = COPY $vgpr0_vgpr1
-    %1:_(<2 x s16>) = COPY $vgpr2
-    %2:_(<2 x p3>) = G_PTR_ADD %0, %1
-    $vgpr0_vgpr1 = COPY %2
-...
-
----
-name: test_gep_v2p3_v2s64
-body: |
-  bb.0:
-    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
-
-    ; CHECK-LABEL: name: test_gep_v2p3_v2s64
-    ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
-    ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr0_vgpr1
-    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(p3), [[UV1:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[COPY]](<2 x p3>)
-    ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>)
-    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV2]](s64)
-    ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[UV]], [[TRUNC]](s32)
-    ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV3]](s64)
-    ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[UV1]], [[TRUNC1]](s32)
-    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[PTR_ADD]](p3), [[PTR_ADD1]](p3)
-    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
-    %0:_(<2 x p3>) = COPY $vgpr0_vgpr1
-    %1:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    %2:_(<2 x p3>) = G_PTR_ADD %0, %1
-    $vgpr0_vgpr1 = COPY %2
-...
diff --git a/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-load-store.mir b/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-load-store.mir
index c1b1e2282254c6..044ad60d1ae765 100644
--- a/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-load-store.mir
+++ b/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-load-store.mir
@@ -9,7 +9,6 @@
   define void @test_load_store_64_novfp() #1 { ret void }
 
   define void @test_gep_s32() { ret void }
-  define void @test_gep_s16() { ret void }
 
   attributes #0 = { "target-features"="+vfp2" }
   attributes #1 = { "target-features"="-vfp2sp" }
@@ -211,30 +210,3 @@ body:             |
     $r0 = COPY %2(p0)
     BX_RET 14, $noreg, implicit $r0
 ...
----
-name:            test_gep_s16
-# CHECK-LABEL: name: test_gep_s16
-legalized:       false
-# CHECK: legalized: true
-regBankSelected: false
-selected:        false
-tracksRegLiveness: true
-registers:
-  - { id: 0, class: _ }
-  - { id: 1, class: _ }
-  - { id: 2, class: _ }
-body:             |
-  bb.0:
-    liveins: $r0
-
-    %0(p0) = COPY $r0
-    %1(s16) = G_LOAD %0(p0) :: (load (s16))
-
-    ; CHECK-NOT: G_PTR_ADD {{%[0-9]+}}, {{%[0-9]+}}(s16)
-    ; CHECK: {{%[0-9]+}}:_(p0) = G_PTR_ADD {{%[0-9]+}}, {{%[0-9]+}}(s32)
-    ; CHECK-NOT: G_PTR_ADD {{%[0-9]+}}, {{%[0-9]+}}(s16)
-    %2(p0) = G_PTR_ADD %0, %1(s16)
-
-    $r0 = COPY %2(p0)
-    BX_RET 14, $noreg, implicit $r0
-...
diff --git a/llvm/test/CodeGen/X86/GlobalISel/legalize-ptr-add-32.mir b/llvm/test/CodeGen/X86/GlobalISel/legalize-ptr-add-32.mir
new file mode 100644
index 00000000000000..584a400996e6ad
--- /dev/null
+++ b/llvm/test/CodeGen/X86/GlobalISel/legalize-ptr-add-32.mir
@@ -0,0 +1,55 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=i386-linux-gnu -run-pass=legalizer %s -o - | FileCheck %s --check-prefixes=CHECK
+
+--- |
+  define void @test_gep_i32c(ptr %addr) {
+    %arrayidx = getelementptr i32, ptr undef, i32 5
+    ret void
+  }
+  define void @test_gep_i32(ptr %addr, i32 %ofs) {
+    %arrayidx = getelementptr i32, ptr undef, i32 %ofs
+    ret void
+  }
+...
+---
+name:            test_gep_i32c
+legalized:       false
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+  - { id: 2, class: _ }
+body:             |
+  bb.1 (%ir-block.0):
+    ; CHECK-LABEL: name: test_gep_i32c
+    ; CHECK: [[DEF:%[0-9]+]]:_(p0) = IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+    ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[DEF]], [[C]](s32)
+    ; CHECK-NEXT: G_STORE [[PTR_ADD]](p0), [[DEF]](p0) :: (store (p0) into %ir.addr)
+    ; CHECK-NEXT: RET 0
+    %0(p0) = IMPLICIT_DEF
+    %1(s32) = G_CONSTANT i32 20
+    %2(p0) = G_PTR_ADD %0, %1(s32)
+    G_STORE %2, %0 :: (store (p0) into %ir.addr)
+    RET 0
+...
+---
+name:            test_gep_i32
+legalized:       false
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+  - { id: 2, class: _ }
+body:             |
+  bb.1 (%ir-block.0):
+    ; CHECK-LABEL: name: test_gep_i32
+    ; CHECK: [[DEF:%[0-9]+]]:_(p0) = IMPLICIT_DEF
+    ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = IMPLICIT_DEF
+    ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[DEF]], [[DEF1]](s32)
+    ; CHECK-NEXT: G_STORE [[PTR_ADD]](p0), [[DEF]](p0) :: (store (p0) into %ir.addr)
+    ; CHECK-NEXT: RET 0
+    %0(p0) = IMPLICIT_DEF
+    %1(s32) = IMPLICIT_DEF
+    %2(p0) = G_PTR_ADD %0, %1(s32)
+    G_STORE %2, %0 :: (store (p0) into %ir.addr)
+    RET 0
+...
diff --git a/llvm/test/CodeGen/X86/GlobalISel/legalize-ptr-add-64.mir b/llvm/test/CodeGen/X86/GlobalISel/legalize-ptr-add-64.mir
new file mode 100644
index 00000000000000..7826257c21e58b
--- /dev/null
+++ b/llvm/test/CodeGen/X86/GlobalISel/legalize-ptr-add-64.mir
@@ -0,0 +1,55 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=x86_64-linux-gnu -run-pass=legalizer %s -o - | FileCheck %s --check-prefixes=X64
+
+--- |
+  define void @test_gep_i64c(ptr %addr) {
+    %arrayidx = getelementptr i32, ptr undef, i64 5
+    ret void
+  }
+  define void @test_gep_i64(ptr %addr, i64 %ofs) {
+    %arrayidx = getelementptr i32, ptr undef, i64 %ofs
+    ret void
+  }
+...
+---
+name:            test_gep_i64c
+legalized:       false
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+  - { id: 2, class: _ }
+body:             |
+  bb.1 (%ir-block.0):
+    ; X64-LABEL: name: test_gep_i64c
+    ; X64: [[DEF:%[0-9]+]]:_(p0) = IMPLICIT_DEF
+    ; X64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 20
+    ; X64-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[DEF]], [[C]](s64)
+    ; X64-NEXT: G_STORE [[PTR_ADD]](p0), [[DEF]](p0) :: (store (p0) into %ir.addr)
+    ; X64-NEXT: RET 0
+    %0(p0) = IMPLICIT_DEF
+    %1(s64) = G_CONSTANT i64 20
+    %2(p0) = G_PTR_ADD %0, %1(s64)
+    G_STORE %2, %0 :: (store (p0) into %ir.addr)
+    RET 0
+...
+---
+name:            test_gep_i64
+legalized:       false
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+  - { id: 2, class: _ }
+body:             |
+  bb.1 (%ir-block.0):
+    ; X64-LABEL: name: test_gep_i64
+    ; X64: [[DEF:%[0-9]+]]:_(p0) = IMPLICIT_DEF
+    ; X64-NEXT: [[DEF1:%[0-9]+]]:_(s64) = IMPLICIT_DEF
+    ; X64-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[DEF]], [[DEF1]](s64)
+    ; X64-NEXT: G_STORE [[PTR_ADD]](p0), [[DEF]](p0) :: (store (p0) into %ir.addr)
+    ; X64-NEXT: RET 0
+    %0(p0) = IMPLICIT_DEF
+    %1(s64) = IMPLICIT_DEF
+    %2(p0) = G_PTR_ADD %0, %1(s64)
+    G_STORE %2, %0 :: (store (p0) into %ir.addr)
+    RET 0
+...
diff --git a/llvm/test/CodeGen/X86/GlobalISel/legalize-ptr-add.mir b/llvm/test/CodeGen/X86/GlobalISel/legalize-ptr-add.mir
deleted file mode 100644
index b1beb2e98cc8de..00000000000000
--- a/llvm/test/CodeGen/X86/GlobalISel/legalize-ptr-add.mir
+++ /dev/null
@@ -1,224 +0,0 @@
-# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=x86_64-linux-gnu -run-pass=legalizer %s -o - | FileCheck %s --check-prefixes=CHECK,X64
-# RUN: llc -mtriple=i386-linux-gnu -run-pass=legalizer %s -o - | FileCheck %s --check-prefixes=CHECK,X86
-
---- |
-  define void @test_gep_i8c(ptr %addr) {
-    %arrayidx = getelementptr i32, ptr undef, i8 5
-    ret void
-  }
-  define void @test_gep_i8(ptr %addr, i8 %ofs) {
-    %arrayidx = getelementptr i32, ptr undef, i8 %ofs
-    ret void
-  }
-
-  define void @test_gep_i16c(ptr %addr) {
-    %arrayidx = getelementptr i32, ptr undef, i16 5
-    ret void
-  }
-  define void @test_gep_i16(ptr %addr, i16 %ofs) {
-    %arrayidx = getelementptr i32, ptr undef, i16 %ofs
-    ret void
-  }
-
-  define void @test_gep_i32c(ptr %addr) {
-    %arrayidx = getelementptr i32, ptr undef, i32 5
-    ret void
-  }
-  define void @test_gep_i32(ptr %addr, i32 %ofs) {
-    %arrayidx = getelementptr i32, ptr undef, i32 %ofs
-    ret void
-  }
-
-  define void @test_gep_i64c(ptr %addr) {
-    %arrayidx = getelementptr i32, ptr undef, i64 5
-    ret void
-  }
-  define void @test_gep_i64(ptr %addr, i64 %ofs) {
-    %arrayidx = getelementptr i32, ptr undef, i64 %ofs
-    ret void
-  }
-...
----
-name:            test_gep_i8c
-legalized:       false
-registers:
-  - { id: 0, class: _ }
-  - { id: 1, class: _ }
-  - { id: 2, class: _ }
-body:             |
-  bb.1 (%ir-block.0):
-    ; CHECK-LABEL: name: test_gep_i8c
-    ; CHECK: [[DEF:%[0-9]+]]:_(p0) = IMPLICIT_DEF
-    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-    ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[DEF]], [[C]](s32)
-    ; CHECK-NEXT: G_STORE [[PTR_ADD]](p0), [[DEF]](p0) :: (store (p0) into %ir.addr)
-    ; CHECK-NEXT: RET 0
-    %0(p0) = IMPLICIT_DEF
-    %1(s8) = G_CONSTANT i8 20
-    %2(p0) = G_PTR_ADD %0, %1(s8)
-    G_STORE %2, %0 :: (store (p0) into %ir.addr)
-    RET 0
-...
----
-name:            test_gep_i8
-legalized:       false
-registers:
-  - { id: 0, class: _ }
-  - { id: 1, class: _ }
-  - { id: 2, class: _ }
-body:             |
-  bb.1 (%ir-block.0):
-    ; CHECK-LABEL: name: test_gep_i8
-    ; CHECK: [[DEF:%[0-9]+]]:_(p0) = IMPLICIT_DEF
-    ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s8) = IMPLICIT_DEF
-    ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[DEF1]](s8)
-    ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[DEF]], [[SEXT]](s32)
-    ; CHECK-NEXT: G_STORE [[PTR_ADD]](p0), [[DEF]](p0) :: (store (p0) into %ir.addr)
-    ; CHECK-NEXT: RET 0
-    %0(p0) = IMPLICIT_DEF
-    %1(s8) = IMPLICIT_DEF
-    %2(p0) = G_PTR_ADD %0, %1(s8)
-    G_STORE %2, %0 :: (store (p0) into %ir.addr)
-    RET 0
-...
----
-name:            test_gep_i16c
-legalized:       false
-registers:
-  - { id: 0, class: _ }
-  - { id: 1, class: _ }
-  - { id: 2, class: _ }
-body:             |
-  bb.1 (%ir-block.0):
-    ; CHECK-LABEL: name: test_gep_i16c
-    ; CHECK: [[DEF:%[0-9]+]]:_(p0) = IMPLICIT_DEF
-    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-    ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[DEF]], [[C]](s32)
-    ; CHECK-NEXT: G_STORE [[PTR_ADD]](p0), [[DEF]](p0) :: (store (p0) into %ir.addr)
-    ; CHECK-NEXT: RET 0
-    %0(p0) = IMPLICIT_DEF
-    %1(s16) = G_CONSTANT i16 20
-    %2(p0) = G_PTR_ADD %0, %1(s16)
-    G_STORE %2, %0 :: (store (p0) into %ir.addr)
-    RET 0
-...
----
-name:            test_gep_i16
-legalized:       false
-registers:
-  - { id: 0, class: _ }
-  - { id: 1, class: _ }
-  - { id: 2, class: _ }
-body:             |
-  bb.1 (%ir-block.0):
-    ; CHECK-LABEL: name: test_gep_i16
-    ; CHECK: [[DEF:%[0-9]+]]:_(p0) = IMPLICIT_DEF
-    ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s16) = IMPLICIT_DEF
-    ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[DEF1]](s16)
-    ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[DEF]], [[SEXT]](s32)
-    ; CHECK-NEXT: G_STORE [[PTR_ADD]](p0), [[DEF]](p0) :: (store (p0) into %ir.addr)
-    ; CHECK-NEXT: RET 0
-    %0(p0) = IMPLICIT_DEF
-    %1(s16) = IMPLICIT_DEF
-    %2(p0) = G_PTR_ADD %0, %1(s16)
-    G_STORE %2, %0 :: (store (p0) into %ir.addr)
-    RET 0
-...
----
-name:            test_gep_i32c
-legalized:       false
-registers:
-  - { id: 0, class: _ }
-  - { id: 1, class: _ }
-  - { id: 2, class: _ }
-body:             |
-  bb.1 (%ir-block.0):
-    ; CHECK-LABEL: name: test_gep_i32c
-    ; CHECK: [[DEF:%[0-9]+]]:_(p0) = IMPLICIT_DEF
-    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-    ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[DEF]], [[C]](s32)
-    ; CHECK-NEXT: G_STORE [[PTR_ADD]](p0), [[DEF]](p0) :: (store (p0) into %ir.addr)
-    ; CHECK-NEXT: RET 0
-    %0(p0) = IMPLICIT_DEF
-    %1(s32) = G_CONSTANT i32 20
-    %2(p0) = G_PTR_ADD %0, %1(s32)
-    G_STORE %2, %0 :: (store (p0) into %ir.addr)
-    RET 0
-...
----
-name:            test_gep_i32
-legalized:       false
-registers:
-  - { id: 0, class: _ }
-  - { id: 1, class: _ }
-  - { id: 2, class: _ }
-body:             |
-  bb.1 (%ir-block.0):
-    ; CHECK-LABEL: name: test_gep_i32
-    ; CHECK: [[DEF:%[0-9]+]]:_(p0) = IMPLICIT_DEF
-    ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = IMPLICIT_DEF
-    ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[DEF]], [[DEF1]](s32)
-    ; CHECK-NEXT: G_STORE [[PTR_ADD]](p0), [[DEF]](p0) :: (store (p0) into %ir.addr)
-    ; CHECK-NEXT: RET 0
-    %0(p0) = IMPLICIT_DEF
-    %1(s32) = IMPLICIT_DEF
-    %2(p0) = G_PTR_ADD %0, %1(s32)
-    G_STORE %2, %0 :: (store (p0) into %ir.addr)
-    RET 0
-...
----
-name:            test_gep_i64c
-legalized:       false
-registers:
-  - { id: 0, class: _ }
-  - { id: 1, class: _ }
-  - { id: 2, class: _ }
-body:             |
-  bb.1 (%ir-block.0):
-    ; X64-LABEL: name: test_gep_i64c
-    ; X64: [[DEF:%[0-9]+]]:_(p0) = IMPLICIT_DEF
-    ; X64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 20
-    ; X64-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[DEF]], [[C]](s64)
-    ; X64-NEXT: G_STORE [[PTR_ADD]](p0), [[DEF]](p0) :: (store (p0) into %ir.addr)
-    ; X64-NEXT: RET 0
-    ; X86-LABEL: name: test_gep_i64c
-    ; X86: [[DEF:%[0-9]+]]:_(p0) = IMPLICIT_DEF
-    ; X86-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-    ; X86-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[DEF]], [[C]](s32)
-    ; X86-NEXT: G_STORE [[PTR_ADD]](p0), [[DEF]](p0) :: (store (p0) into %ir.addr)
-    ; X86-NEXT: RET 0
-    %0(p0) = IMPLICIT_DEF
-    %1(s64) = G_CONSTANT i64 20
-    %2(p0) = G_PTR_ADD %0, %1(s64)
-    G_STORE %2, %0 :: (store (p0) into %ir.addr)
-    RET 0
-...
----
-name:            test_gep_i64
-legalized:       false
-registers:
-  - { id: 0, class: _ }
-  - { id: 1, class: _ }
-  - { id: 2, class: _ }
-body:             |
-  bb.1 (%ir-block.0):
-    ; X64-LABEL: name: test_gep_i64
-    ; X64: [[DEF:%[0-9]+]]:_(p0) = IMPLICIT_DEF
-    ; X64-NEXT: [[DEF1:%[0-9]+]]:_(s64) = IMPLICIT_DEF
-    ; X64-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[DEF]], [[DEF1]](s64)
-    ; X64-NEXT: G_STORE [[PTR_ADD]](p0), [[DEF]](p0) :: (store (p0) into %ir.addr)
-    ; X64-NEXT: RET 0
-    ; X86-LABEL: name: test_gep_i64
-    ; X86: [[DEF:%[0-9]+]]:_(p0) = IMPLICIT_DEF
-    ; X86-NEXT: [[DEF1:%[0-9]+]]:_(s64) = IMPLICIT_DEF
-    ; X86-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[DEF1]](s64)
-    ; X86-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[DEF]], [[TRUNC]](s32)
-    ; X86-NEXT: G_STORE [[PTR_ADD]](p0), [[DEF]](p0) :: (store (p0) into %ir.addr)
-    ; X86-NEXT: RET 0
-    %0(p0) = IMPLICIT_DEF
-    %1(s64) = IMPLICIT_DEF
-    %2(p0) = G_PTR_ADD %0, %1(s64)
-    G_STORE %2, %0 :: (store (p0) into %ir.addr)
-    RET 0
-...
diff --git a/llvm/test/CodeGen/X86/GlobalISel/regbankselect-X86_64.mir b/llvm/test/CodeGen/X86/GlobalISel/regbankselect-X86_64.mir
index c2dcf30359248a..03d4c7dd3281dd 100644
--- a/llvm/test/CodeGen/X86/GlobalISel/regbankselect-X86_64.mir
+++ b/llvm/test/CodeGen/X86/GlobalISel/regbankselect-X86_64.mir
@@ -1380,23 +1380,18 @@ body:             |
   bb.0 (%ir-block.0):
     ; FAST-LABEL: name: test_gep
     ; FAST: [[DEF:%[0-9]+]]:gpr(p0) = G_IMPLICIT_DEF
-    ; FAST: [[C:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 20
-    ; FAST: [[PTR_ADD:%[0-9]+]]:gpr(p0) = G_PTR_ADD [[DEF]], [[C]](s32)
-    ; FAST: [[C1:%[0-9]+]]:gpr(s64) = G_CONSTANT i64 20
-    ; FAST: [[PTR_ADD1:%[0-9]+]]:gpr(p0) = G_PTR_ADD [[DEF]], [[C1]](s64)
+    ; FAST: [[C:%[0-9]+]]:gpr(s64) = G_CONSTANT i64 20
+    ; FAST: [[PTR_ADD:%[0-9]+]]:gpr(p0) = G_PTR_ADD [[DEF]], [[C]](s64)
     ; FAST: RET 0
+    ;
     ; GREEDY-LABEL: name: test_gep
     ; GREEDY: [[DEF:%[0-9]+]]:gpr(p0) = G_IMPLICIT_DEF
-    ; GREEDY: [[C:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 20
-    ; GREEDY: [[PTR_ADD:%[0-9]+]]:gpr(p0) = G_PTR_ADD [[DEF]], [[C]](s32)
-    ; GREEDY: [[C1:%[0-9]+]]:gpr(s64) = G_CONSTANT i64 20
-    ; GREEDY: [[PTR_ADD1:%[0-9]+]]:gpr(p0) = G_PTR_ADD [[DEF]], [[C1]](s64)
+    ; GREEDY: [[C:%[0-9]+]]:gpr(s64) = G_CONSTANT i64 20
+    ; GREEDY: [[PTR_ADD:%[0-9]+]]:gpr(p0) = G_PTR_ADD [[DEF]], [[C]](s64)
     ; GREEDY: RET 0
     %0(p0) = G_IMPLICIT_DEF
-    %1(s32) = G_CONSTANT i32 20
-    %2(p0) = G_PTR_ADD %0, %1(s32)
-    %3(s64) = G_CONSTANT i64 20
-    %4(p0) = G_PTR_ADD %0, %3(s64)
+    %1(s64) = G_CONSTANT i64 20
+    %2(p0) = G_PTR_ADD %0, %1(s64)
     RET 0
 
 ...

>From cad01436590eb1fad220c6fa1753ab6bef6505d3 Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad at amd.com>
Date: Fri, 8 Mar 2024 11:54:30 +0000
Subject: [PATCH 3/5] Fix LegalizerHelper::getVectorElementPointer

---
 .../CodeGen/GlobalISel/LegalizerHelper.cpp    |   9 +-
 .../combine-extract-vector-load.mir           |  13 +-
 .../GlobalISel/extractelement-stack-lower.ll  |  21 +-
 .../AMDGPU/GlobalISel/extractelement.i128.ll  | 110 +++++-----
 .../AMDGPU/GlobalISel/extractelement.i16.ll   | 190 ++++++------------
 5 files changed, 132 insertions(+), 211 deletions(-)

diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 1d016e684c48f6..258047e2b56fd2 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -4004,7 +4004,14 @@ Register LegalizerHelper::getVectorElementPointer(Register VecPtr, LLT VecTy,
 
   Index = clampVectorIndex(MIRBuilder, Index, VecTy);
 
-  LLT IdxTy = MRI.getType(Index);
+  // Convert index to the correct size for the address space.
+  const DataLayout &DL = MIRBuilder.getDataLayout();
+  unsigned AS = MRI.getType(VecPtr).getAddressSpace();
+  unsigned IndexSizeInBits = DL.getIndexSize(AS) * 8;
+  LLT IdxTy = MRI.getType(Index).changeElementSize(IndexSizeInBits);
+  if (IdxTy != MRI.getType(Index))
+    Index = MIRBuilder.buildSExtOrTrunc(IdxTy, Index).getReg(0);
+
   auto Mul = MIRBuilder.buildMul(IdxTy, Index,
                                  MIRBuilder.buildConstant(IdxTy, EltSize));
 
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-extract-vector-load.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-extract-vector-load.mir
index aa72a9ec06ede5..b49f5160985130 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-extract-vector-load.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-extract-vector-load.mir
@@ -8,8 +8,9 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; CHECK-LABEL: name: test_ptradd_crash__offset_smaller
-    ; CHECK: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 12
-    ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[C]](p1) :: (load (s32), addrspace 1)
+    ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; CHECK-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[C]](s64)
+    ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[INTTOPTR]](p1) :: (load (s32), addrspace 1)
     ; CHECK-NEXT: $sgpr0 = COPY [[LOAD]](s32)
     ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
     %1:_(p1) = G_CONSTANT i64 0
@@ -27,8 +28,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; CHECK-LABEL: name: test_ptradd_crash__offset_wider
-    ; CHECK: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 12
-    ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[C]](p1) :: (load (s32), addrspace 1)
+    ; CHECK: [[C:%[0-9]+]]:_(s128) = G_CONSTANT i128 3
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[C]](s128)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[TRUNC]], [[C1]](s64)
+    ; CHECK-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[SHL]](s64)
+    ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[INTTOPTR]](p1) :: (load (s32), addrspace 1)
     ; CHECK-NEXT: $sgpr0 = COPY [[LOAD]](s32)
     ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
     %1:_(p1) = G_CONSTANT i64 0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll
index b58c3b20986363..43f3dcc86f426e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll
@@ -11,9 +11,8 @@ define i32 @v_extract_v64i32_varidx(ptr addrspace(1) %ptr, i32 %idx) {
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_and_b32_e32 v2, 63, v2
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 2, v2
-; GFX9-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
 ; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
-; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
 ; GFX9-NEXT:    global_load_dword v0, v[0:1], off
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
@@ -28,10 +27,8 @@ define i32 @v_extract_v64i32_varidx(ptr addrspace(1) %ptr, i32 %idx) {
 ; GFX12-NEXT:    v_and_b32_e32 v2, 63, v2
 ; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX12-NEXT:    v_lshlrev_b32_e32 v2, 2, v2
-; GFX12-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
 ; GFX12-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX12-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX12-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
 ; GFX12-NEXT:    global_load_b32 v0, v[0:1], off
 ; GFX12-NEXT:    s_wait_loadcnt 0x0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
@@ -46,9 +43,8 @@ define i16 @v_extract_v128i16_varidx(ptr addrspace(1) %ptr, i32 %idx) {
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_and_b32_e32 v2, 0x7f, v2
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 1, v2
-; GFX9-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
 ; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
-; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
 ; GFX9-NEXT:    global_load_ushort v0, v[0:1], off
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
@@ -63,10 +59,8 @@ define i16 @v_extract_v128i16_varidx(ptr addrspace(1) %ptr, i32 %idx) {
 ; GFX12-NEXT:    v_and_b32_e32 v2, 0x7f, v2
 ; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX12-NEXT:    v_lshlrev_b32_e32 v2, 1, v2
-; GFX12-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
 ; GFX12-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX12-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX12-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
 ; GFX12-NEXT:    global_load_u16 v0, v[0:1], off
 ; GFX12-NEXT:    s_wait_loadcnt 0x0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
@@ -81,9 +75,8 @@ define i64 @v_extract_v32i64_varidx(ptr addrspace(1) %ptr, i32 %idx) {
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_and_b32_e32 v2, 31, v2
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 3, v2
-; GFX9-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
 ; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
-; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
 ; GFX9-NEXT:    global_load_dwordx2 v[0:1], v[0:1], off
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
@@ -98,10 +91,8 @@ define i64 @v_extract_v32i64_varidx(ptr addrspace(1) %ptr, i32 %idx) {
 ; GFX12-NEXT:    v_and_b32_e32 v2, 31, v2
 ; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX12-NEXT:    v_lshlrev_b32_e32 v2, 3, v2
-; GFX12-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
 ; GFX12-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX12-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX12-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
 ; GFX12-NEXT:    global_load_b64 v[0:1], v[0:1], off
 ; GFX12-NEXT:    s_wait_loadcnt 0x0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i128.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i128.ll
index 057790617204cc..e1ce9ea14a2a95 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i128.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i128.ll
@@ -6,37 +6,44 @@
 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11 %s
 
 define amdgpu_ps i128 @extractelement_sgpr_v4i128_sgpr_idx(ptr addrspace(4) inreg %ptr, i32 inreg %idx) {
-; GCN-LABEL: extractelement_sgpr_v4i128_sgpr_idx:
-; GCN:       ; %bb.0:
-; GCN-NEXT:    s_and_b32 s0, s4, 3
-; GCN-NEXT:    s_lshl_b32 s0, s0, 4
-; GCN-NEXT:    s_ashr_i32 s1, s0, 31
-; GCN-NEXT:    s_add_u32 s0, s2, s0
-; GCN-NEXT:    s_addc_u32 s1, s3, s1
-; GCN-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
-; GCN-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN-NEXT:    ; return to shader part epilog
+; GFX9-LABEL: extractelement_sgpr_v4i128_sgpr_idx:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_and_b32 s0, s4, 3
+; GFX9-NEXT:    s_lshl_b32 s0, s0, 4
+; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[2:3], s0 offset:0x0
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX8-LABEL: extractelement_sgpr_v4i128_sgpr_idx:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_and_b32 s0, s4, 3
+; GFX8-NEXT:    s_lshl_b32 s0, s0, 4
+; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[2:3], s0
+; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX8-NEXT:    ; return to shader part epilog
+;
+; GFX7-LABEL: extractelement_sgpr_v4i128_sgpr_idx:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_and_b32 s0, s4, 3
+; GFX7-NEXT:    s_lshl_b32 s0, s0, 4
+; GFX7-NEXT:    s_load_dwordx4 s[0:3], s[2:3], s0
+; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX7-NEXT:    ; return to shader part epilog
 ;
 ; GFX10-LABEL: extractelement_sgpr_v4i128_sgpr_idx:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_and_b32 s0, s4, 3
 ; GFX10-NEXT:    s_lshl_b32 s0, s0, 4
-; GFX10-NEXT:    s_ashr_i32 s1, s0, 31
-; GFX10-NEXT:    s_add_u32 s0, s2, s0
-; GFX10-NEXT:    s_addc_u32 s1, s3, s1
-; GFX10-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
+; GFX10-NEXT:    s_load_dwordx4 s[0:3], s[2:3], s0 offset:0x0
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-NEXT:    ; return to shader part epilog
 ;
 ; GFX11-LABEL: extractelement_sgpr_v4i128_sgpr_idx:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_and_b32 s0, s4, 3
-; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
 ; GFX11-NEXT:    s_lshl_b32 s0, s0, 4
-; GFX11-NEXT:    s_ashr_i32 s1, s0, 31
-; GFX11-NEXT:    s_add_u32 s0, s2, s0
-; GFX11-NEXT:    s_addc_u32 s1, s3, s1
-; GFX11-NEXT:    s_load_b128 s[0:3], s[0:1], 0x0
+; GFX11-NEXT:    s_load_b128 s[0:3], s[2:3], s0 offset:0x0
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-NEXT:    ; return to shader part epilog
   %vector = load <4 x i128>, ptr addrspace(4) %ptr
@@ -48,8 +55,8 @@ define amdgpu_ps i128 @extractelement_vgpr_v4i128_sgpr_idx(ptr addrspace(1) %ptr
 ; GFX9-LABEL: extractelement_vgpr_v4i128_sgpr_idx:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_and_b32 s0, s2, 3
+; GFX9-NEXT:    s_mov_b32 s1, 0
 ; GFX9-NEXT:    s_lshl_b32 s0, s0, 4
-; GFX9-NEXT:    s_ashr_i32 s1, s0, 31
 ; GFX9-NEXT:    v_mov_b32_e32 v3, s1
 ; GFX9-NEXT:    v_mov_b32_e32 v2, s0
 ; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
@@ -65,8 +72,8 @@ define amdgpu_ps i128 @extractelement_vgpr_v4i128_sgpr_idx(ptr addrspace(1) %ptr
 ; GFX8-LABEL: extractelement_vgpr_v4i128_sgpr_idx:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_and_b32 s0, s2, 3
+; GFX8-NEXT:    s_mov_b32 s1, 0
 ; GFX8-NEXT:    s_lshl_b32 s0, s0, 4
-; GFX8-NEXT:    s_ashr_i32 s1, s0, 31
 ; GFX8-NEXT:    v_mov_b32_e32 v3, s1
 ; GFX8-NEXT:    v_mov_b32_e32 v2, s0
 ; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
@@ -82,10 +89,10 @@ define amdgpu_ps i128 @extractelement_vgpr_v4i128_sgpr_idx(ptr addrspace(1) %ptr
 ; GFX7-LABEL: extractelement_vgpr_v4i128_sgpr_idx:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_and_b32 s0, s2, 3
+; GFX7-NEXT:    s_mov_b32 s1, 0
 ; GFX7-NEXT:    s_lshl_b32 s0, s0, 4
-; GFX7-NEXT:    s_ashr_i32 s1, s0, 31
-; GFX7-NEXT:    s_mov_b32 s2, 0
 ; GFX7-NEXT:    s_mov_b32 s3, 0xf000
+; GFX7-NEXT:    s_mov_b32 s2, s1
 ; GFX7-NEXT:    buffer_load_dwordx4 v[0:3], v[0:1], s[0:3], 0 addr64
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
 ; GFX7-NEXT:    v_readfirstlane_b32 s0, v0
@@ -97,8 +104,8 @@ define amdgpu_ps i128 @extractelement_vgpr_v4i128_sgpr_idx(ptr addrspace(1) %ptr
 ; GFX10-LABEL: extractelement_vgpr_v4i128_sgpr_idx:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_and_b32 s0, s2, 3
+; GFX10-NEXT:    s_mov_b32 s1, 0
 ; GFX10-NEXT:    s_lshl_b32 s0, s0, 4
-; GFX10-NEXT:    s_ashr_i32 s1, s0, 31
 ; GFX10-NEXT:    v_mov_b32_e32 v3, s1
 ; GFX10-NEXT:    v_mov_b32_e32 v2, s0
 ; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
@@ -114,9 +121,8 @@ define amdgpu_ps i128 @extractelement_vgpr_v4i128_sgpr_idx(ptr addrspace(1) %ptr
 ; GFX11-LABEL: extractelement_vgpr_v4i128_sgpr_idx:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_and_b32 s0, s2, 3
-; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-NEXT:    s_mov_b32 s1, 0
 ; GFX11-NEXT:    s_lshl_b32 s0, s0, 4
-; GFX11-NEXT:    s_ashr_i32 s1, s0, 31
 ; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
 ; GFX11-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
@@ -140,9 +146,8 @@ define i128 @extractelement_vgpr_v4i128_vgpr_idx(ptr addrspace(1) %ptr, i32 %idx
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_and_b32_e32 v2, 3, v2
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 4, v2
-; GFX9-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
 ; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
-; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
 ; GFX9-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
@@ -152,9 +157,8 @@ define i128 @extractelement_vgpr_v4i128_vgpr_idx(ptr addrspace(1) %ptr, i32 %idx
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_and_b32_e32 v2, 3, v2
 ; GFX8-NEXT:    v_lshlrev_b32_e32 v2, 4, v2
-; GFX8-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
 ; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
-; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, v1, v3, vcc
+; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
 ; GFX8-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
@@ -164,9 +168,8 @@ define i128 @extractelement_vgpr_v4i128_vgpr_idx(ptr addrspace(1) %ptr, i32 %idx
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7-NEXT:    v_and_b32_e32 v2, 3, v2
 ; GFX7-NEXT:    v_lshlrev_b32_e32 v2, 4, v2
-; GFX7-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
 ; GFX7-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
-; GFX7-NEXT:    v_addc_u32_e32 v1, vcc, v1, v3, vcc
+; GFX7-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
 ; GFX7-NEXT:    s_mov_b32 s6, 0
 ; GFX7-NEXT:    s_mov_b32 s7, 0xf000
 ; GFX7-NEXT:    s_mov_b64 s[4:5], 0
@@ -179,9 +182,8 @@ define i128 @extractelement_vgpr_v4i128_vgpr_idx(ptr addrspace(1) %ptr, i32 %idx
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    v_and_b32_e32 v2, 3, v2
 ; GFX10-NEXT:    v_lshlrev_b32_e32 v2, 4, v2
-; GFX10-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
 ; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
 ; GFX10-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
@@ -192,10 +194,8 @@ define i128 @extractelement_vgpr_v4i128_vgpr_idx(ptr addrspace(1) %ptr, i32 %idx
 ; GFX11-NEXT:    v_and_b32_e32 v2, 3, v2
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_lshlrev_b32_e32 v2, 4, v2
-; GFX11-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
 ; GFX11-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX11-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
 ; GFX11-NEXT:    global_load_b128 v[0:3], v[0:1], off
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
@@ -208,13 +208,8 @@ define amdgpu_ps i128 @extractelement_sgpr_v4i128_vgpr_idx(ptr addrspace(4) inre
 ; GFX9-LABEL: extractelement_sgpr_v4i128_vgpr_idx:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    v_and_b32_e32 v0, 3, v0
-; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 4, v0
-; GFX9-NEXT:    v_mov_b32_e32 v0, s2
-; GFX9-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
-; GFX9-NEXT:    v_mov_b32_e32 v1, s3
-; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
-; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
-; GFX9-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 4, v0
+; GFX9-NEXT:    global_load_dwordx4 v[0:3], v0, s[2:3]
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
 ; GFX9-NEXT:    v_readfirstlane_b32 s1, v1
@@ -227,10 +222,9 @@ define amdgpu_ps i128 @extractelement_sgpr_v4i128_vgpr_idx(ptr addrspace(4) inre
 ; GFX8-NEXT:    v_and_b32_e32 v0, 3, v0
 ; GFX8-NEXT:    v_lshlrev_b32_e32 v2, 4, v0
 ; GFX8-NEXT:    v_mov_b32_e32 v0, s2
-; GFX8-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
 ; GFX8-NEXT:    v_mov_b32_e32 v1, s3
 ; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
-; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, v1, v3, vcc
+; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
 ; GFX8-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
 ; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
@@ -242,10 +236,10 @@ define amdgpu_ps i128 @extractelement_sgpr_v4i128_vgpr_idx(ptr addrspace(4) inre
 ; GFX7-LABEL: extractelement_sgpr_v4i128_vgpr_idx:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    v_and_b32_e32 v0, 3, v0
-; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 4, v0
 ; GFX7-NEXT:    s_mov_b32 s0, s2
 ; GFX7-NEXT:    s_mov_b32 s1, s3
-; GFX7-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
+; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 4, v0
+; GFX7-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX7-NEXT:    s_mov_b32 s2, 0
 ; GFX7-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX7-NEXT:    buffer_load_dwordx4 v[0:3], v[0:1], s[0:3], 0 addr64
@@ -259,13 +253,8 @@ define amdgpu_ps i128 @extractelement_sgpr_v4i128_vgpr_idx(ptr addrspace(4) inre
 ; GFX10-LABEL: extractelement_sgpr_v4i128_vgpr_idx:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    v_and_b32_e32 v0, 3, v0
-; GFX10-NEXT:    v_lshlrev_b32_e32 v2, 4, v0
-; GFX10-NEXT:    v_mov_b32_e32 v0, s2
-; GFX10-NEXT:    v_mov_b32_e32 v1, s3
-; GFX10-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
-; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
-; GFX10-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 4, v0
+; GFX10-NEXT:    global_load_dwordx4 v[0:3], v0, s[2:3]
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
 ; GFX10-NEXT:    v_readfirstlane_b32 s1, v1
@@ -276,14 +265,9 @@ define amdgpu_ps i128 @extractelement_sgpr_v4i128_vgpr_idx(ptr addrspace(4) inre
 ; GFX11-LABEL: extractelement_sgpr_v4i128_vgpr_idx:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    v_and_b32_e32 v0, 3, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_lshlrev_b32_e32 v2, 4, v0
-; GFX11-NEXT:    v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
-; GFX11-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX11-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
-; GFX11-NEXT:    global_load_b128 v[0:3], v[0:1], off
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 4, v0
+; GFX11-NEXT:    global_load_b128 v[0:3], v0, s[2:3]
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
 ; GFX11-NEXT:    v_readfirstlane_b32 s1, v1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i16.ll
index 6d772df3fa281b..021f609053a0f6 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i16.ll
@@ -10,11 +10,8 @@ define amdgpu_ps i16 @extractelement_sgpr_v4i16_sgpr_idx(ptr addrspace(4) inreg
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_and_b32 s0, s4, 3
 ; GFX9-NEXT:    s_lshl_b32 s0, s0, 1
-; GFX9-NEXT:    s_ashr_i32 s1, s0, 31
-; GFX9-NEXT:    s_add_u32 s0, s2, s0
-; GFX9-NEXT:    s_addc_u32 s1, s3, s1
-; GFX9-NEXT:    v_mov_b32_e32 v0, 0
-; GFX9-NEXT:    global_load_ushort v0, v0, s[0:1]
+; GFX9-NEXT:    v_mov_b32_e32 v0, s0
+; GFX9-NEXT:    global_load_ushort v0, v0, s[2:3]
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
 ; GFX9-NEXT:    ; return to shader part epilog
@@ -23,9 +20,8 @@ define amdgpu_ps i16 @extractelement_sgpr_v4i16_sgpr_idx(ptr addrspace(4) inreg
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_and_b32 s0, s4, 3
 ; GFX8-NEXT:    s_lshl_b32 s0, s0, 1
-; GFX8-NEXT:    s_ashr_i32 s1, s0, 31
 ; GFX8-NEXT:    s_add_u32 s0, s2, s0
-; GFX8-NEXT:    s_addc_u32 s1, s3, s1
+; GFX8-NEXT:    s_addc_u32 s1, s3, 0
 ; GFX8-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX8-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX8-NEXT:    flat_load_ushort v0, v[0:1]
@@ -38,11 +34,11 @@ define amdgpu_ps i16 @extractelement_sgpr_v4i16_sgpr_idx(ptr addrspace(4) inreg
 ; GFX7-NEXT:    s_mov_b32 s0, s2
 ; GFX7-NEXT:    s_and_b32 s2, s4, 3
 ; GFX7-NEXT:    s_lshl_b32 s4, s2, 1
-; GFX7-NEXT:    s_ashr_i32 s5, s4, 31
+; GFX7-NEXT:    s_mov_b32 s5, 0
 ; GFX7-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX7-NEXT:    s_mov_b32 s1, s3
-; GFX7-NEXT:    s_mov_b32 s2, 0
 ; GFX7-NEXT:    s_mov_b32 s3, 0xf000
+; GFX7-NEXT:    s_mov_b32 s2, s5
 ; GFX7-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX7-NEXT:    buffer_load_ushort v0, v[0:1], s[0:3], 0 addr64
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
@@ -52,12 +48,9 @@ define amdgpu_ps i16 @extractelement_sgpr_v4i16_sgpr_idx(ptr addrspace(4) inreg
 ; GFX10-LABEL: extractelement_sgpr_v4i16_sgpr_idx:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_and_b32 s0, s4, 3
-; GFX10-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX10-NEXT:    s_lshl_b32 s0, s0, 1
-; GFX10-NEXT:    s_ashr_i32 s1, s0, 31
-; GFX10-NEXT:    s_add_u32 s0, s2, s0
-; GFX10-NEXT:    s_addc_u32 s1, s3, s1
-; GFX10-NEXT:    global_load_ushort v0, v0, s[0:1]
+; GFX10-NEXT:    v_mov_b32_e32 v0, s0
+; GFX10-NEXT:    global_load_ushort v0, v0, s[2:3]
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
 ; GFX10-NEXT:    ; return to shader part epilog
@@ -65,13 +58,10 @@ define amdgpu_ps i16 @extractelement_sgpr_v4i16_sgpr_idx(ptr addrspace(4) inreg
 ; GFX11-LABEL: extractelement_sgpr_v4i16_sgpr_idx:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_and_b32 s0, s4, 3
-; GFX11-NEXT:    v_mov_b32_e32 v0, 0
+; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
 ; GFX11-NEXT:    s_lshl_b32 s0, s0, 1
-; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX11-NEXT:    s_ashr_i32 s1, s0, 31
-; GFX11-NEXT:    s_add_u32 s0, s2, s0
-; GFX11-NEXT:    s_addc_u32 s1, s3, s1
-; GFX11-NEXT:    global_load_u16 v0, v0, s[0:1]
+; GFX11-NEXT:    v_mov_b32_e32 v0, s0
+; GFX11-NEXT:    global_load_u16 v0, v0, s[2:3]
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
 ; GFX11-NEXT:    ; return to shader part epilog
@@ -84,8 +74,8 @@ define amdgpu_ps i16 @extractelement_vgpr_v4i16_sgpr_idx(ptr addrspace(1) %ptr,
 ; GFX9-LABEL: extractelement_vgpr_v4i16_sgpr_idx:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_and_b32 s0, s2, 3
+; GFX9-NEXT:    s_mov_b32 s1, 0
 ; GFX9-NEXT:    s_lshl_b32 s0, s0, 1
-; GFX9-NEXT:    s_ashr_i32 s1, s0, 31
 ; GFX9-NEXT:    v_mov_b32_e32 v3, s1
 ; GFX9-NEXT:    v_mov_b32_e32 v2, s0
 ; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
@@ -98,8 +88,8 @@ define amdgpu_ps i16 @extractelement_vgpr_v4i16_sgpr_idx(ptr addrspace(1) %ptr,
 ; GFX8-LABEL: extractelement_vgpr_v4i16_sgpr_idx:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_and_b32 s0, s2, 3
+; GFX8-NEXT:    s_mov_b32 s1, 0
 ; GFX8-NEXT:    s_lshl_b32 s0, s0, 1
-; GFX8-NEXT:    s_ashr_i32 s1, s0, 31
 ; GFX8-NEXT:    v_mov_b32_e32 v3, s1
 ; GFX8-NEXT:    v_mov_b32_e32 v2, s0
 ; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
@@ -112,10 +102,10 @@ define amdgpu_ps i16 @extractelement_vgpr_v4i16_sgpr_idx(ptr addrspace(1) %ptr,
 ; GFX7-LABEL: extractelement_vgpr_v4i16_sgpr_idx:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_and_b32 s0, s2, 3
+; GFX7-NEXT:    s_mov_b32 s1, 0
 ; GFX7-NEXT:    s_lshl_b32 s0, s0, 1
-; GFX7-NEXT:    s_ashr_i32 s1, s0, 31
-; GFX7-NEXT:    s_mov_b32 s2, 0
 ; GFX7-NEXT:    s_mov_b32 s3, 0xf000
+; GFX7-NEXT:    s_mov_b32 s2, s1
 ; GFX7-NEXT:    buffer_load_ushort v0, v[0:1], s[0:3], 0 addr64
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
 ; GFX7-NEXT:    v_readfirstlane_b32 s0, v0
@@ -124,8 +114,8 @@ define amdgpu_ps i16 @extractelement_vgpr_v4i16_sgpr_idx(ptr addrspace(1) %ptr,
 ; GFX10-LABEL: extractelement_vgpr_v4i16_sgpr_idx:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_and_b32 s0, s2, 3
+; GFX10-NEXT:    s_mov_b32 s1, 0
 ; GFX10-NEXT:    s_lshl_b32 s0, s0, 1
-; GFX10-NEXT:    s_ashr_i32 s1, s0, 31
 ; GFX10-NEXT:    v_mov_b32_e32 v3, s1
 ; GFX10-NEXT:    v_mov_b32_e32 v2, s0
 ; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
@@ -138,9 +128,8 @@ define amdgpu_ps i16 @extractelement_vgpr_v4i16_sgpr_idx(ptr addrspace(1) %ptr,
 ; GFX11-LABEL: extractelement_vgpr_v4i16_sgpr_idx:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_and_b32 s0, s2, 3
-; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-NEXT:    s_mov_b32 s1, 0
 ; GFX11-NEXT:    s_lshl_b32 s0, s0, 1
-; GFX11-NEXT:    s_ashr_i32 s1, s0, 31
 ; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
 ; GFX11-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
@@ -161,9 +150,8 @@ define i16 @extractelement_vgpr_v4i16_vgpr_idx(ptr addrspace(1) %ptr, i32 %idx)
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_and_b32_e32 v2, 3, v2
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 1, v2
-; GFX9-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
 ; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
-; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
 ; GFX9-NEXT:    global_load_ushort v0, v[0:1], off
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
@@ -173,9 +161,8 @@ define i16 @extractelement_vgpr_v4i16_vgpr_idx(ptr addrspace(1) %ptr, i32 %idx)
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_and_b32_e32 v2, 3, v2
 ; GFX8-NEXT:    v_lshlrev_b32_e32 v2, 1, v2
-; GFX8-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
 ; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
-; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, v1, v3, vcc
+; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
 ; GFX8-NEXT:    flat_load_ushort v0, v[0:1]
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
@@ -185,9 +172,8 @@ define i16 @extractelement_vgpr_v4i16_vgpr_idx(ptr addrspace(1) %ptr, i32 %idx)
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7-NEXT:    v_and_b32_e32 v2, 3, v2
 ; GFX7-NEXT:    v_lshlrev_b32_e32 v2, 1, v2
-; GFX7-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
 ; GFX7-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
-; GFX7-NEXT:    v_addc_u32_e32 v1, vcc, v1, v3, vcc
+; GFX7-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
 ; GFX7-NEXT:    s_mov_b32 s6, 0
 ; GFX7-NEXT:    s_mov_b32 s7, 0xf000
 ; GFX7-NEXT:    s_mov_b64 s[4:5], 0
@@ -200,9 +186,8 @@ define i16 @extractelement_vgpr_v4i16_vgpr_idx(ptr addrspace(1) %ptr, i32 %idx)
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    v_and_b32_e32 v2, 3, v2
 ; GFX10-NEXT:    v_lshlrev_b32_e32 v2, 1, v2
-; GFX10-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
 ; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
 ; GFX10-NEXT:    global_load_ushort v0, v[0:1], off
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
@@ -213,10 +198,8 @@ define i16 @extractelement_vgpr_v4i16_vgpr_idx(ptr addrspace(1) %ptr, i32 %idx)
 ; GFX11-NEXT:    v_and_b32_e32 v2, 3, v2
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_lshlrev_b32_e32 v2, 1, v2
-; GFX11-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
 ; GFX11-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX11-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
 ; GFX11-NEXT:    global_load_u16 v0, v[0:1], off
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
@@ -229,13 +212,8 @@ define amdgpu_ps i16 @extractelement_sgpr_v4i16_vgpr_idx(ptr addrspace(4) inreg
 ; GFX9-LABEL: extractelement_sgpr_v4i16_vgpr_idx:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    v_and_b32_e32 v0, 3, v0
-; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 1, v0
-; GFX9-NEXT:    v_mov_b32_e32 v0, s2
-; GFX9-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
-; GFX9-NEXT:    v_mov_b32_e32 v1, s3
-; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
-; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
-; GFX9-NEXT:    global_load_ushort v0, v[0:1], off
+; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
+; GFX9-NEXT:    global_load_ushort v0, v0, s[2:3]
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
 ; GFX9-NEXT:    ; return to shader part epilog
@@ -245,10 +223,9 @@ define amdgpu_ps i16 @extractelement_sgpr_v4i16_vgpr_idx(ptr addrspace(4) inreg
 ; GFX8-NEXT:    v_and_b32_e32 v0, 3, v0
 ; GFX8-NEXT:    v_lshlrev_b32_e32 v2, 1, v0
 ; GFX8-NEXT:    v_mov_b32_e32 v0, s2
-; GFX8-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
 ; GFX8-NEXT:    v_mov_b32_e32 v1, s3
 ; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
-; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, v1, v3, vcc
+; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
 ; GFX8-NEXT:    flat_load_ushort v0, v[0:1]
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
 ; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
@@ -257,10 +234,10 @@ define amdgpu_ps i16 @extractelement_sgpr_v4i16_vgpr_idx(ptr addrspace(4) inreg
 ; GFX7-LABEL: extractelement_sgpr_v4i16_vgpr_idx:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    v_and_b32_e32 v0, 3, v0
-; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
 ; GFX7-NEXT:    s_mov_b32 s0, s2
 ; GFX7-NEXT:    s_mov_b32 s1, s3
-; GFX7-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
+; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
+; GFX7-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX7-NEXT:    s_mov_b32 s2, 0
 ; GFX7-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX7-NEXT:    buffer_load_ushort v0, v[0:1], s[0:3], 0 addr64
@@ -271,13 +248,8 @@ define amdgpu_ps i16 @extractelement_sgpr_v4i16_vgpr_idx(ptr addrspace(4) inreg
 ; GFX10-LABEL: extractelement_sgpr_v4i16_vgpr_idx:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    v_and_b32_e32 v0, 3, v0
-; GFX10-NEXT:    v_lshlrev_b32_e32 v2, 1, v0
-; GFX10-NEXT:    v_mov_b32_e32 v0, s2
-; GFX10-NEXT:    v_mov_b32_e32 v1, s3
-; GFX10-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
-; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
-; GFX10-NEXT:    global_load_ushort v0, v[0:1], off
+; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
+; GFX10-NEXT:    global_load_ushort v0, v0, s[2:3]
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
 ; GFX10-NEXT:    ; return to shader part epilog
@@ -285,14 +257,9 @@ define amdgpu_ps i16 @extractelement_sgpr_v4i16_vgpr_idx(ptr addrspace(4) inreg
 ; GFX11-LABEL: extractelement_sgpr_v4i16_vgpr_idx:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    v_and_b32_e32 v0, 3, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_lshlrev_b32_e32 v2, 1, v0
-; GFX11-NEXT:    v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
-; GFX11-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX11-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
-; GFX11-NEXT:    global_load_u16 v0, v[0:1], off
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
+; GFX11-NEXT:    global_load_u16 v0, v0, s[2:3]
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
 ; GFX11-NEXT:    ; return to shader part epilog
@@ -686,11 +653,8 @@ define amdgpu_ps i16 @extractelement_sgpr_v8i16_sgpr_idx(ptr addrspace(4) inreg
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_and_b32 s0, s4, 7
 ; GFX9-NEXT:    s_lshl_b32 s0, s0, 1
-; GFX9-NEXT:    s_ashr_i32 s1, s0, 31
-; GFX9-NEXT:    s_add_u32 s0, s2, s0
-; GFX9-NEXT:    s_addc_u32 s1, s3, s1
-; GFX9-NEXT:    v_mov_b32_e32 v0, 0
-; GFX9-NEXT:    global_load_ushort v0, v0, s[0:1]
+; GFX9-NEXT:    v_mov_b32_e32 v0, s0
+; GFX9-NEXT:    global_load_ushort v0, v0, s[2:3]
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
 ; GFX9-NEXT:    ; return to shader part epilog
@@ -699,9 +663,8 @@ define amdgpu_ps i16 @extractelement_sgpr_v8i16_sgpr_idx(ptr addrspace(4) inreg
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_and_b32 s0, s4, 7
 ; GFX8-NEXT:    s_lshl_b32 s0, s0, 1
-; GFX8-NEXT:    s_ashr_i32 s1, s0, 31
 ; GFX8-NEXT:    s_add_u32 s0, s2, s0
-; GFX8-NEXT:    s_addc_u32 s1, s3, s1
+; GFX8-NEXT:    s_addc_u32 s1, s3, 0
 ; GFX8-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX8-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX8-NEXT:    flat_load_ushort v0, v[0:1]
@@ -714,11 +677,11 @@ define amdgpu_ps i16 @extractelement_sgpr_v8i16_sgpr_idx(ptr addrspace(4) inreg
 ; GFX7-NEXT:    s_mov_b32 s0, s2
 ; GFX7-NEXT:    s_and_b32 s2, s4, 7
 ; GFX7-NEXT:    s_lshl_b32 s4, s2, 1
-; GFX7-NEXT:    s_ashr_i32 s5, s4, 31
+; GFX7-NEXT:    s_mov_b32 s5, 0
 ; GFX7-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX7-NEXT:    s_mov_b32 s1, s3
-; GFX7-NEXT:    s_mov_b32 s2, 0
 ; GFX7-NEXT:    s_mov_b32 s3, 0xf000
+; GFX7-NEXT:    s_mov_b32 s2, s5
 ; GFX7-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX7-NEXT:    buffer_load_ushort v0, v[0:1], s[0:3], 0 addr64
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
@@ -728,12 +691,9 @@ define amdgpu_ps i16 @extractelement_sgpr_v8i16_sgpr_idx(ptr addrspace(4) inreg
 ; GFX10-LABEL: extractelement_sgpr_v8i16_sgpr_idx:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_and_b32 s0, s4, 7
-; GFX10-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX10-NEXT:    s_lshl_b32 s0, s0, 1
-; GFX10-NEXT:    s_ashr_i32 s1, s0, 31
-; GFX10-NEXT:    s_add_u32 s0, s2, s0
-; GFX10-NEXT:    s_addc_u32 s1, s3, s1
-; GFX10-NEXT:    global_load_ushort v0, v0, s[0:1]
+; GFX10-NEXT:    v_mov_b32_e32 v0, s0
+; GFX10-NEXT:    global_load_ushort v0, v0, s[2:3]
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
 ; GFX10-NEXT:    ; return to shader part epilog
@@ -741,13 +701,10 @@ define amdgpu_ps i16 @extractelement_sgpr_v8i16_sgpr_idx(ptr addrspace(4) inreg
 ; GFX11-LABEL: extractelement_sgpr_v8i16_sgpr_idx:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_and_b32 s0, s4, 7
-; GFX11-NEXT:    v_mov_b32_e32 v0, 0
+; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
 ; GFX11-NEXT:    s_lshl_b32 s0, s0, 1
-; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX11-NEXT:    s_ashr_i32 s1, s0, 31
-; GFX11-NEXT:    s_add_u32 s0, s2, s0
-; GFX11-NEXT:    s_addc_u32 s1, s3, s1
-; GFX11-NEXT:    global_load_u16 v0, v0, s[0:1]
+; GFX11-NEXT:    v_mov_b32_e32 v0, s0
+; GFX11-NEXT:    global_load_u16 v0, v0, s[2:3]
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
 ; GFX11-NEXT:    ; return to shader part epilog
@@ -760,8 +717,8 @@ define amdgpu_ps i16 @extractelement_vgpr_v8i16_sgpr_idx(ptr addrspace(1) %ptr,
 ; GFX9-LABEL: extractelement_vgpr_v8i16_sgpr_idx:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_and_b32 s0, s2, 7
+; GFX9-NEXT:    s_mov_b32 s1, 0
 ; GFX9-NEXT:    s_lshl_b32 s0, s0, 1
-; GFX9-NEXT:    s_ashr_i32 s1, s0, 31
 ; GFX9-NEXT:    v_mov_b32_e32 v3, s1
 ; GFX9-NEXT:    v_mov_b32_e32 v2, s0
 ; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
@@ -774,8 +731,8 @@ define amdgpu_ps i16 @extractelement_vgpr_v8i16_sgpr_idx(ptr addrspace(1) %ptr,
 ; GFX8-LABEL: extractelement_vgpr_v8i16_sgpr_idx:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_and_b32 s0, s2, 7
+; GFX8-NEXT:    s_mov_b32 s1, 0
 ; GFX8-NEXT:    s_lshl_b32 s0, s0, 1
-; GFX8-NEXT:    s_ashr_i32 s1, s0, 31
 ; GFX8-NEXT:    v_mov_b32_e32 v3, s1
 ; GFX8-NEXT:    v_mov_b32_e32 v2, s0
 ; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
@@ -788,10 +745,10 @@ define amdgpu_ps i16 @extractelement_vgpr_v8i16_sgpr_idx(ptr addrspace(1) %ptr,
 ; GFX7-LABEL: extractelement_vgpr_v8i16_sgpr_idx:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_and_b32 s0, s2, 7
+; GFX7-NEXT:    s_mov_b32 s1, 0
 ; GFX7-NEXT:    s_lshl_b32 s0, s0, 1
-; GFX7-NEXT:    s_ashr_i32 s1, s0, 31
-; GFX7-NEXT:    s_mov_b32 s2, 0
 ; GFX7-NEXT:    s_mov_b32 s3, 0xf000
+; GFX7-NEXT:    s_mov_b32 s2, s1
 ; GFX7-NEXT:    buffer_load_ushort v0, v[0:1], s[0:3], 0 addr64
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
 ; GFX7-NEXT:    v_readfirstlane_b32 s0, v0
@@ -800,8 +757,8 @@ define amdgpu_ps i16 @extractelement_vgpr_v8i16_sgpr_idx(ptr addrspace(1) %ptr,
 ; GFX10-LABEL: extractelement_vgpr_v8i16_sgpr_idx:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_and_b32 s0, s2, 7
+; GFX10-NEXT:    s_mov_b32 s1, 0
 ; GFX10-NEXT:    s_lshl_b32 s0, s0, 1
-; GFX10-NEXT:    s_ashr_i32 s1, s0, 31
 ; GFX10-NEXT:    v_mov_b32_e32 v3, s1
 ; GFX10-NEXT:    v_mov_b32_e32 v2, s0
 ; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
@@ -814,9 +771,8 @@ define amdgpu_ps i16 @extractelement_vgpr_v8i16_sgpr_idx(ptr addrspace(1) %ptr,
 ; GFX11-LABEL: extractelement_vgpr_v8i16_sgpr_idx:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_and_b32 s0, s2, 7
-; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-NEXT:    s_mov_b32 s1, 0
 ; GFX11-NEXT:    s_lshl_b32 s0, s0, 1
-; GFX11-NEXT:    s_ashr_i32 s1, s0, 31
 ; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
 ; GFX11-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
@@ -837,9 +793,8 @@ define i16 @extractelement_vgpr_v8i16_vgpr_idx(ptr addrspace(1) %ptr, i32 %idx)
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_and_b32_e32 v2, 7, v2
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 1, v2
-; GFX9-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
 ; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
-; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
 ; GFX9-NEXT:    global_load_ushort v0, v[0:1], off
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
@@ -849,9 +804,8 @@ define i16 @extractelement_vgpr_v8i16_vgpr_idx(ptr addrspace(1) %ptr, i32 %idx)
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_and_b32_e32 v2, 7, v2
 ; GFX8-NEXT:    v_lshlrev_b32_e32 v2, 1, v2
-; GFX8-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
 ; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
-; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, v1, v3, vcc
+; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
 ; GFX8-NEXT:    flat_load_ushort v0, v[0:1]
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
@@ -861,9 +815,8 @@ define i16 @extractelement_vgpr_v8i16_vgpr_idx(ptr addrspace(1) %ptr, i32 %idx)
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7-NEXT:    v_and_b32_e32 v2, 7, v2
 ; GFX7-NEXT:    v_lshlrev_b32_e32 v2, 1, v2
-; GFX7-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
 ; GFX7-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
-; GFX7-NEXT:    v_addc_u32_e32 v1, vcc, v1, v3, vcc
+; GFX7-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
 ; GFX7-NEXT:    s_mov_b32 s6, 0
 ; GFX7-NEXT:    s_mov_b32 s7, 0xf000
 ; GFX7-NEXT:    s_mov_b64 s[4:5], 0
@@ -876,9 +829,8 @@ define i16 @extractelement_vgpr_v8i16_vgpr_idx(ptr addrspace(1) %ptr, i32 %idx)
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    v_and_b32_e32 v2, 7, v2
 ; GFX10-NEXT:    v_lshlrev_b32_e32 v2, 1, v2
-; GFX10-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
 ; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
 ; GFX10-NEXT:    global_load_ushort v0, v[0:1], off
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
@@ -889,10 +841,8 @@ define i16 @extractelement_vgpr_v8i16_vgpr_idx(ptr addrspace(1) %ptr, i32 %idx)
 ; GFX11-NEXT:    v_and_b32_e32 v2, 7, v2
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_lshlrev_b32_e32 v2, 1, v2
-; GFX11-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
 ; GFX11-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX11-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
 ; GFX11-NEXT:    global_load_u16 v0, v[0:1], off
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
@@ -905,13 +855,8 @@ define amdgpu_ps i16 @extractelement_sgpr_v8i16_vgpr_idx(ptr addrspace(4) inreg
 ; GFX9-LABEL: extractelement_sgpr_v8i16_vgpr_idx:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    v_and_b32_e32 v0, 7, v0
-; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 1, v0
-; GFX9-NEXT:    v_mov_b32_e32 v0, s2
-; GFX9-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
-; GFX9-NEXT:    v_mov_b32_e32 v1, s3
-; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
-; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
-; GFX9-NEXT:    global_load_ushort v0, v[0:1], off
+; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
+; GFX9-NEXT:    global_load_ushort v0, v0, s[2:3]
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
 ; GFX9-NEXT:    ; return to shader part epilog
@@ -921,10 +866,9 @@ define amdgpu_ps i16 @extractelement_sgpr_v8i16_vgpr_idx(ptr addrspace(4) inreg
 ; GFX8-NEXT:    v_and_b32_e32 v0, 7, v0
 ; GFX8-NEXT:    v_lshlrev_b32_e32 v2, 1, v0
 ; GFX8-NEXT:    v_mov_b32_e32 v0, s2
-; GFX8-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
 ; GFX8-NEXT:    v_mov_b32_e32 v1, s3
 ; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
-; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, v1, v3, vcc
+; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
 ; GFX8-NEXT:    flat_load_ushort v0, v[0:1]
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
 ; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
@@ -933,10 +877,10 @@ define amdgpu_ps i16 @extractelement_sgpr_v8i16_vgpr_idx(ptr addrspace(4) inreg
 ; GFX7-LABEL: extractelement_sgpr_v8i16_vgpr_idx:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    v_and_b32_e32 v0, 7, v0
-; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
 ; GFX7-NEXT:    s_mov_b32 s0, s2
 ; GFX7-NEXT:    s_mov_b32 s1, s3
-; GFX7-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
+; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
+; GFX7-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX7-NEXT:    s_mov_b32 s2, 0
 ; GFX7-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX7-NEXT:    buffer_load_ushort v0, v[0:1], s[0:3], 0 addr64
@@ -947,13 +891,8 @@ define amdgpu_ps i16 @extractelement_sgpr_v8i16_vgpr_idx(ptr addrspace(4) inreg
 ; GFX10-LABEL: extractelement_sgpr_v8i16_vgpr_idx:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    v_and_b32_e32 v0, 7, v0
-; GFX10-NEXT:    v_lshlrev_b32_e32 v2, 1, v0
-; GFX10-NEXT:    v_mov_b32_e32 v0, s2
-; GFX10-NEXT:    v_mov_b32_e32 v1, s3
-; GFX10-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
-; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
-; GFX10-NEXT:    global_load_ushort v0, v[0:1], off
+; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
+; GFX10-NEXT:    global_load_ushort v0, v0, s[2:3]
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
 ; GFX10-NEXT:    ; return to shader part epilog
@@ -961,14 +900,9 @@ define amdgpu_ps i16 @extractelement_sgpr_v8i16_vgpr_idx(ptr addrspace(4) inreg
 ; GFX11-LABEL: extractelement_sgpr_v8i16_vgpr_idx:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    v_and_b32_e32 v0, 7, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_lshlrev_b32_e32 v2, 1, v0
-; GFX11-NEXT:    v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
-; GFX11-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX11-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
-; GFX11-NEXT:    global_load_u16 v0, v[0:1], off
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
+; GFX11-NEXT:    global_load_u16 v0, v0, s[2:3]
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
 ; GFX11-NEXT:    ; return to shader part epilog

>From d6025ab4ce85c834d7a461e45d51b0132b28bd30 Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad at amd.com>
Date: Fri, 8 Mar 2024 12:08:10 +0000
Subject: [PATCH 4/5] clang-format

---
 llvm/lib/CodeGen/MachineVerifier.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp
index c29daf7e68a643..cdb6cc63feffc1 100644
--- a/llvm/lib/CodeGen/MachineVerifier.cpp
+++ b/llvm/lib/CodeGen/MachineVerifier.cpp
@@ -1306,7 +1306,8 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
       unsigned AS = PtrTy.getAddressSpace();
       unsigned IndexSizeInBits = DL.getIndexSize(AS) * 8;
       if (OffsetTy.getScalarSizeInBits() != IndexSizeInBits)
-        report("gep offset operand must match index size for address space", MI);
+        report("gep offset operand must match index size for address space",
+               MI);
     }
 
     // TODO: Is the offset allowed to be a scalar with a vector?

>From cf51a1c4e7c5367ff7af5af13f773a3603e1e930 Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad at gmail.com>
Date: Fri, 8 Mar 2024 13:52:29 +0000
Subject: [PATCH 5/5] Braces

---
 llvm/lib/CodeGen/MachineVerifier.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp
index cdb6cc63feffc1..b397aed76aa20a 100644
--- a/llvm/lib/CodeGen/MachineVerifier.cpp
+++ b/llvm/lib/CodeGen/MachineVerifier.cpp
@@ -1305,9 +1305,10 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
       const DataLayout &DL = MF->getDataLayout();
       unsigned AS = PtrTy.getAddressSpace();
       unsigned IndexSizeInBits = DL.getIndexSize(AS) * 8;
-      if (OffsetTy.getScalarSizeInBits() != IndexSizeInBits)
+      if (OffsetTy.getScalarSizeInBits() != IndexSizeInBits) {
         report("gep offset operand must match index size for address space",
                MI);
+      }
     }
 
     // TODO: Is the offset allowed to be a scalar with a vector?



More information about the llvm-commits mailing list