[llvm] [GISel] Enforce G_PTR_ADD RHS type matching index size for addr space (PR #84352)
Jay Foad via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 8 04:08:23 PST 2024
https://github.com/jayfoad updated https://github.com/llvm/llvm-project/pull/84352
>From 4c7b4b6f1ac4a59b874c9bfde9ad501df6de0ffb Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad at amd.com>
Date: Thu, 7 Mar 2024 17:56:54 +0000
Subject: [PATCH 1/4] [GISel] Enforce G_PTR_ADD RHS type matching index size
for addr space
---
llvm/lib/CodeGen/MachineVerifier.cpp | 8 ++++++++
llvm/test/MachineVerifier/test_g_ptr_add.mir | 6 +++++-
2 files changed, 13 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp
index 1d0757c5d7f5f5..c29daf7e68a643 100644
--- a/llvm/lib/CodeGen/MachineVerifier.cpp
+++ b/llvm/lib/CodeGen/MachineVerifier.cpp
@@ -1301,6 +1301,14 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
if (OffsetTy.isPointerOrPointerVector())
report("gep offset operand must not be a pointer", MI);
+ if (PtrTy.isPointerOrPointerVector()) {
+ const DataLayout &DL = MF->getDataLayout();
+ unsigned AS = PtrTy.getAddressSpace();
+ unsigned IndexSizeInBits = DL.getIndexSize(AS) * 8;
+ if (OffsetTy.getScalarSizeInBits() != IndexSizeInBits)
+ report("gep offset operand must match index size for address space", MI);
+ }
+
// TODO: Is the offset allowed to be a scalar with a vector?
break;
}
diff --git a/llvm/test/MachineVerifier/test_g_ptr_add.mir b/llvm/test/MachineVerifier/test_g_ptr_add.mir
index 07fe6266701d5c..7d1373586c8eb3 100644
--- a/llvm/test/MachineVerifier/test_g_ptr_add.mir
+++ b/llvm/test/MachineVerifier/test_g_ptr_add.mir
@@ -1,4 +1,4 @@
-#RUN: not --crash llc -o - -mtriple=arm64 -run-pass=none -verify-machineinstrs %s 2>&1 | FileCheck %s
+# RUN: not --crash llc -o - -mtriple=arm64 -run-pass=none -verify-machineinstrs %s 2>&1 | FileCheck %s
# REQUIRES: aarch64-registered-target
---
@@ -29,4 +29,8 @@ body: |
; CHECK: Bad machine code: gep first operand must be a pointer
%6:_(s64) = G_PTR_ADD %1, %1
+ %7:_(s32) = G_IMPLICIT_DEF
+
+ ; CHECK: Bad machine code: gep offset operand must match index size for address space
+ %8:_(p0) = G_PTR_ADD %0, %7
...
>From 66a04e5b503d6fe85bb97392a6feb95a19679b5a Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad at amd.com>
Date: Fri, 8 Mar 2024 09:39:19 +0000
Subject: [PATCH 2/4] Fix handwritten MIR tests
---
.../GlobalISel/combine-ptradd-int2ptr.mir | 10 +-
.../AArch64/GlobalISel/legalize-ptr-add.mir | 17 --
.../prelegalizer-combiner-load-or-pattern.mir | 226 +++++++++---------
.../AMDGPU/GlobalISel/legalize-ptr-add.mir | 207 ----------------
.../GlobalISel/arm-legalize-load-store.mir | 28 ---
.../X86/GlobalISel/legalize-ptr-add-32.mir | 55 +++++
.../X86/GlobalISel/legalize-ptr-add-64.mir | 55 +++++
.../X86/GlobalISel/legalize-ptr-add.mir | 224 -----------------
.../X86/GlobalISel/regbankselect-X86_64.mir | 19 +-
9 files changed, 235 insertions(+), 606 deletions(-)
create mode 100644 llvm/test/CodeGen/X86/GlobalISel/legalize-ptr-add-32.mir
create mode 100644 llvm/test/CodeGen/X86/GlobalISel/legalize-ptr-add-64.mir
delete mode 100644 llvm/test/CodeGen/X86/GlobalISel/legalize-ptr-add.mir
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-ptradd-int2ptr.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-ptradd-int2ptr.mir
index 40e5e8ebb7731e..1233a0af424533 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-ptradd-int2ptr.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-ptradd-int2ptr.mir
@@ -11,7 +11,7 @@ body: |
; CHECK: [[C:%[0-9]+]]:_(p64) = G_CONSTANT i64 44
; CHECK: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[C]](p64)
; CHECK: $x0 = COPY [[PTRTOINT]](s64)
- %1:_(s32) = G_CONSTANT i32 42
+ %1:_(s64) = G_CONSTANT i64 42
%2:_(s32) = G_CONSTANT i32 2
%3:_(p64) = G_INTTOPTR %2
%4:_(p64) = G_PTR_ADD %3, %1
@@ -26,7 +26,7 @@ body: |
; CHECK-LABEL: name: agc.test_combine_ptradd_constants_ptrres
; CHECK: [[C:%[0-9]+]]:_(p64) = G_CONSTANT i64 44
; CHECK: $x0 = COPY [[C]](p64)
- %1:_(s32) = G_CONSTANT i32 42
+ %1:_(s64) = G_CONSTANT i64 42
%2:_(s32) = G_CONSTANT i32 2
%3:_(p64) = G_INTTOPTR %2
%4:_(p64) = G_PTR_ADD %3, %1
@@ -39,12 +39,12 @@ body: |
liveins: $x0, $x1
; Ensure non-constant G_PTR_ADDs are not folded.
; CHECK-LABEL: name: agc.test_not_combine_variable_ptradd
- ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
+ ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 42
; CHECK: [[COPY:%[0-9]+]]:_(p64) = COPY $x1
- ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p64) = G_PTR_ADD [[COPY]], [[C]](s32)
+ ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p64) = G_PTR_ADD [[COPY]], [[C]](s64)
; CHECK: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[PTR_ADD]](p64)
; CHECK: $x0 = COPY [[PTRTOINT]](s64)
- %1:_(s32) = G_CONSTANT i32 42
+ %1:_(s64) = G_CONSTANT i64 42
%2:_(p64) = COPY $x1
%3:_(p64) = G_PTR_ADD %2, %1
%4:_(s64) = G_PTRTOINT %3
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ptr-add.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ptr-add.mir
index 7bd9725d0fc87d..1ecd36b55380a6 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ptr-add.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ptr-add.mir
@@ -1,23 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=aarch64 -run-pass=legalizer %s -o - | FileCheck %s
---
-name: test_ptr_add_small
-body: |
- bb.0.entry:
- ; CHECK-LABEL: name: test_ptr_add_small
- ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
- ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
- ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY1]], 8
- ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[SEXT_INREG]](s64)
- ; CHECK: $x0 = COPY [[PTR_ADD]](p0)
- %0:_(p0) = COPY $x0
- %1:_(s64) = COPY $x1
- %2:_(s8) = G_TRUNC %1(s64)
- %3:_(p0) = G_PTR_ADD %0, %2(s8)
- $x0 = COPY %3(p0)
-
-...
----
name: test_ptr_add_vec_p0
body: |
bb.0.entry:
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-load-or-pattern.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-load-or-pattern.mir
index 88d214e43c82e5..c30fab32fccbf6 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-load-or-pattern.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-load-or-pattern.mir
@@ -38,18 +38,18 @@ body: |
; BIG: %full_load:_(s32) = G_BSWAP [[LOAD]]
; BIG: $w1 = COPY %full_load(s32)
; BIG: RET_ReallyLR implicit $w1
- %cst_1:_(s32) = G_CONSTANT i32 1
- %cst_2:_(s32) = G_CONSTANT i32 2
- %cst_3:_(s32) = G_CONSTANT i32 3
+ %cst_1:_(s64) = G_CONSTANT i64 1
+ %cst_2:_(s64) = G_CONSTANT i64 2
+ %cst_3:_(s64) = G_CONSTANT i64 3
%cst_8:_(s32) = G_CONSTANT i32 8
%cst_16:_(s32) = G_CONSTANT i32 16
%cst_24:_(s32) = G_CONSTANT i32 24
%ptr:_(p0) = COPY $x1
- %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32)
- %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s32)
- %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s32)
+ %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64)
+ %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s64)
+ %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s64)
%byte0:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8))
@@ -104,18 +104,18 @@ body: |
; BIG: %full_load:_(s32) = G_LOAD %ptr(p0) :: (load (s32), align 1)
; BIG: $w1 = COPY %full_load(s32)
; BIG: RET_ReallyLR implicit $w1
- %cst_1:_(s32) = G_CONSTANT i32 1
- %cst_2:_(s32) = G_CONSTANT i32 2
- %cst_3:_(s32) = G_CONSTANT i32 3
+ %cst_1:_(s64) = G_CONSTANT i64 1
+ %cst_2:_(s64) = G_CONSTANT i64 2
+ %cst_3:_(s64) = G_CONSTANT i64 3
%cst_8:_(s32) = G_CONSTANT i32 8
%cst_16:_(s32) = G_CONSTANT i32 16
%cst_24:_(s32) = G_CONSTANT i32 24
%ptr:_(p0) = COPY $x1
- %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32)
- %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s32)
- %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s32)
+ %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64)
+ %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s64)
+ %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s64)
%elt0:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8))
%elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s8))
@@ -162,18 +162,18 @@ body: |
; BIG: %full_load:_(s32) = G_BSWAP [[LOAD]]
; BIG: $w1 = COPY %full_load(s32)
; BIG: RET_ReallyLR implicit $w1
- %cst_1:_(s32) = G_CONSTANT i32 1
- %cst_2:_(s32) = G_CONSTANT i32 2
- %cst_3:_(s32) = G_CONSTANT i32 3
+ %cst_1:_(s64) = G_CONSTANT i64 1
+ %cst_2:_(s64) = G_CONSTANT i64 2
+ %cst_3:_(s64) = G_CONSTANT i64 3
%cst_8:_(s32) = G_CONSTANT i32 8
%cst_16:_(s32) = G_CONSTANT i32 16
%cst_24:_(s32) = G_CONSTANT i32 24
%ptr:_(p0) = COPY $x1
- %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32)
- %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s32)
- %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s32)
+ %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64)
+ %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s64)
+ %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s64)
%byte0:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8))
@@ -414,35 +414,35 @@ body: |
; LITTLE-LABEL: name: nonzero_start_idx_positive_little_endian_pat
; LITTLE: liveins: $x0, $x1
- ; LITTLE: %cst_1:_(s32) = G_CONSTANT i32 1
+ ; LITTLE: %cst_1:_(s64) = G_CONSTANT i64 1
; LITTLE: %ptr:_(p0) = COPY $x0
- ; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32)
+ ; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64)
; LITTLE: %full_load:_(s32) = G_LOAD %ptr_elt_1(p0) :: (load (s32), align 1)
; LITTLE: $w1 = COPY %full_load(s32)
; LITTLE: RET_ReallyLR implicit $w1
; BIG-LABEL: name: nonzero_start_idx_positive_little_endian_pat
; BIG: liveins: $x0, $x1
- ; BIG: %cst_1:_(s32) = G_CONSTANT i32 1
+ ; BIG: %cst_1:_(s64) = G_CONSTANT i64 1
; BIG: %ptr:_(p0) = COPY $x0
- ; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32)
+ ; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64)
; BIG: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD %ptr_elt_1(p0) :: (load (s32), align 1)
; BIG: %full_load:_(s32) = G_BSWAP [[LOAD]]
; BIG: $w1 = COPY %full_load(s32)
; BIG: RET_ReallyLR implicit $w1
- %cst_1:_(s32) = G_CONSTANT i32 1
- %cst_2:_(s32) = G_CONSTANT i32 2
- %cst_3:_(s32) = G_CONSTANT i32 3
- %cst_4:_(s32) = G_CONSTANT i32 4
+ %cst_1:_(s64) = G_CONSTANT i64 1
+ %cst_2:_(s64) = G_CONSTANT i64 2
+ %cst_3:_(s64) = G_CONSTANT i64 3
+ %cst_4:_(s64) = G_CONSTANT i64 4
%cst_8:_(s32) = G_CONSTANT i32 8
%cst_16:_(s32) = G_CONSTANT i32 16
%cst_24:_(s32) = G_CONSTANT i32 24
%ptr:_(p0) = COPY $x0
- %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32)
- %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s32)
- %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s32)
- %ptr_elt_4:_(p0) = G_PTR_ADD %ptr, %cst_4(s32)
+ %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64)
+ %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s64)
+ %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s64)
+ %ptr_elt_4:_(p0) = G_PTR_ADD %ptr, %cst_4(s64)
%elt2:_(s32) = G_ZEXTLOAD %ptr_elt_2(p0) :: (load (s8))
%elt3:_(s32) = G_ZEXTLOAD %ptr_elt_3(p0) :: (load (s8))
@@ -476,35 +476,35 @@ body: |
; LITTLE-LABEL: name: nonzero_start_idx_positive_big_endian_pat
; LITTLE: liveins: $x0, $x1
- ; LITTLE: %cst_1:_(s32) = G_CONSTANT i32 1
+ ; LITTLE: %cst_1:_(s64) = G_CONSTANT i64 1
; LITTLE: %ptr:_(p0) = COPY $x0
- ; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32)
+ ; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64)
; LITTLE: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD %ptr_elt_1(p0) :: (load (s32), align 1)
; LITTLE: %full_load:_(s32) = G_BSWAP [[LOAD]]
; LITTLE: $w1 = COPY %full_load(s32)
; LITTLE: RET_ReallyLR implicit $w1
; BIG-LABEL: name: nonzero_start_idx_positive_big_endian_pat
; BIG: liveins: $x0, $x1
- ; BIG: %cst_1:_(s32) = G_CONSTANT i32 1
+ ; BIG: %cst_1:_(s64) = G_CONSTANT i64 1
; BIG: %ptr:_(p0) = COPY $x0
- ; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32)
+ ; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64)
; BIG: %full_load:_(s32) = G_LOAD %ptr_elt_1(p0) :: (load (s32), align 1)
; BIG: $w1 = COPY %full_load(s32)
; BIG: RET_ReallyLR implicit $w1
- %cst_1:_(s32) = G_CONSTANT i32 1
- %cst_2:_(s32) = G_CONSTANT i32 2
- %cst_3:_(s32) = G_CONSTANT i32 3
- %cst_4:_(s32) = G_CONSTANT i32 4
+ %cst_1:_(s64) = G_CONSTANT i64 1
+ %cst_2:_(s64) = G_CONSTANT i64 2
+ %cst_3:_(s64) = G_CONSTANT i64 3
+ %cst_4:_(s64) = G_CONSTANT i64 4
%cst_8:_(s32) = G_CONSTANT i32 8
%cst_16:_(s32) = G_CONSTANT i32 16
%cst_24:_(s32) = G_CONSTANT i32 24
%ptr:_(p0) = COPY $x0
- %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32)
- %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s32)
- %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s32)
- %ptr_elt_4:_(p0) = G_PTR_ADD %ptr, %cst_4(s32)
+ %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64)
+ %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s64)
+ %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s64)
+ %ptr_elt_4:_(p0) = G_PTR_ADD %ptr, %cst_4(s64)
%elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s8))
%elt2:_(s32) = G_ZEXTLOAD %ptr_elt_2(p0) :: (load (s8))
@@ -538,33 +538,33 @@ body: |
; LITTLE-LABEL: name: nonzero_start_idx_negative_little_endian_pat
; LITTLE: liveins: $x0, $x1
- ; LITTLE: %cst_neg_3:_(s32) = G_CONSTANT i32 -3
+ ; LITTLE: %cst_neg_3:_(s64) = G_CONSTANT i64 -3
; LITTLE: %ptr:_(p0) = COPY $x0
- ; LITTLE: %ptr_elt_neg_3:_(p0) = G_PTR_ADD %ptr, %cst_neg_3(s32)
+ ; LITTLE: %ptr_elt_neg_3:_(p0) = G_PTR_ADD %ptr, %cst_neg_3(s64)
; LITTLE: %full_load:_(s32) = G_LOAD %ptr_elt_neg_3(p0) :: (load (s32), align 1)
; LITTLE: $w1 = COPY %full_load(s32)
; LITTLE: RET_ReallyLR implicit $w1
; BIG-LABEL: name: nonzero_start_idx_negative_little_endian_pat
; BIG: liveins: $x0, $x1
- ; BIG: %cst_neg_3:_(s32) = G_CONSTANT i32 -3
+ ; BIG: %cst_neg_3:_(s64) = G_CONSTANT i64 -3
; BIG: %ptr:_(p0) = COPY $x0
- ; BIG: %ptr_elt_neg_3:_(p0) = G_PTR_ADD %ptr, %cst_neg_3(s32)
+ ; BIG: %ptr_elt_neg_3:_(p0) = G_PTR_ADD %ptr, %cst_neg_3(s64)
; BIG: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD %ptr_elt_neg_3(p0) :: (load (s32), align 1)
; BIG: %full_load:_(s32) = G_BSWAP [[LOAD]]
; BIG: $w1 = COPY %full_load(s32)
; BIG: RET_ReallyLR implicit $w1
- %cst_neg_1:_(s32) = G_CONSTANT i32 -1
- %cst_neg_2:_(s32) = G_CONSTANT i32 -2
- %cst_neg_3:_(s32) = G_CONSTANT i32 -3
+ %cst_neg_1:_(s64) = G_CONSTANT i64 -1
+ %cst_neg_2:_(s64) = G_CONSTANT i64 -2
+ %cst_neg_3:_(s64) = G_CONSTANT i64 -3
%cst_8:_(s32) = G_CONSTANT i32 8
%cst_16:_(s32) = G_CONSTANT i32 16
%cst_24:_(s32) = G_CONSTANT i32 24
%ptr:_(p0) = COPY $x0
- %ptr_elt_neg_3:_(p0) = G_PTR_ADD %ptr, %cst_neg_3(s32)
- %ptr_elt_neg_2:_(p0) = G_PTR_ADD %ptr, %cst_neg_2(s32)
- %ptr_elt_neg_1:_(p0) = G_PTR_ADD %ptr, %cst_neg_1(s32)
+ %ptr_elt_neg_3:_(p0) = G_PTR_ADD %ptr, %cst_neg_3(s64)
+ %ptr_elt_neg_2:_(p0) = G_PTR_ADD %ptr, %cst_neg_2(s64)
+ %ptr_elt_neg_1:_(p0) = G_PTR_ADD %ptr, %cst_neg_1(s64)
%elt_neg_2:_(s32) = G_ZEXTLOAD %ptr_elt_neg_2(p0) :: (load (s8))
%elt_neg_1:_(s32) = G_ZEXTLOAD %ptr_elt_neg_1(p0) :: (load (s8))
@@ -598,33 +598,33 @@ body: |
; LITTLE-LABEL: name: nonzero_start_idx_negative_big_endian_pat
; LITTLE: liveins: $x0, $x1
- ; LITTLE: %cst_neg_3:_(s32) = G_CONSTANT i32 -3
+ ; LITTLE: %cst_neg_3:_(s64) = G_CONSTANT i64 -3
; LITTLE: %ptr:_(p0) = COPY $x0
- ; LITTLE: %ptr_elt_neg_3:_(p0) = G_PTR_ADD %ptr, %cst_neg_3(s32)
+ ; LITTLE: %ptr_elt_neg_3:_(p0) = G_PTR_ADD %ptr, %cst_neg_3(s64)
; LITTLE: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD %ptr_elt_neg_3(p0) :: (load (s32), align 1)
; LITTLE: %full_load:_(s32) = G_BSWAP [[LOAD]]
; LITTLE: $w1 = COPY %full_load(s32)
; LITTLE: RET_ReallyLR implicit $w1
; BIG-LABEL: name: nonzero_start_idx_negative_big_endian_pat
; BIG: liveins: $x0, $x1
- ; BIG: %cst_neg_3:_(s32) = G_CONSTANT i32 -3
+ ; BIG: %cst_neg_3:_(s64) = G_CONSTANT i64 -3
; BIG: %ptr:_(p0) = COPY $x0
- ; BIG: %ptr_elt_neg_3:_(p0) = G_PTR_ADD %ptr, %cst_neg_3(s32)
+ ; BIG: %ptr_elt_neg_3:_(p0) = G_PTR_ADD %ptr, %cst_neg_3(s64)
; BIG: %full_load:_(s32) = G_LOAD %ptr_elt_neg_3(p0) :: (load (s32), align 1)
; BIG: $w1 = COPY %full_load(s32)
; BIG: RET_ReallyLR implicit $w1
- %cst_neg_1:_(s32) = G_CONSTANT i32 -1
- %cst_neg_2:_(s32) = G_CONSTANT i32 -2
- %cst_neg_3:_(s32) = G_CONSTANT i32 -3
+ %cst_neg_1:_(s64) = G_CONSTANT i64 -1
+ %cst_neg_2:_(s64) = G_CONSTANT i64 -2
+ %cst_neg_3:_(s64) = G_CONSTANT i64 -3
%cst_8:_(s32) = G_CONSTANT i32 8
%cst_16:_(s32) = G_CONSTANT i32 16
%cst_24:_(s32) = G_CONSTANT i32 24
%ptr:_(p0) = COPY $x0
- %ptr_elt_neg_3:_(p0) = G_PTR_ADD %ptr, %cst_neg_3(s32)
- %ptr_elt_neg_2:_(p0) = G_PTR_ADD %ptr, %cst_neg_2(s32)
- %ptr_elt_neg_1:_(p0) = G_PTR_ADD %ptr, %cst_neg_1(s32)
+ %ptr_elt_neg_3:_(p0) = G_PTR_ADD %ptr, %cst_neg_3(s64)
+ %ptr_elt_neg_2:_(p0) = G_PTR_ADD %ptr, %cst_neg_2(s64)
+ %ptr_elt_neg_1:_(p0) = G_PTR_ADD %ptr, %cst_neg_1(s64)
%elt_neg_3:_(s32) = G_ZEXTLOAD %ptr_elt_neg_3(p0) :: (load (s8))
%elt_neg_2:_(s32) = G_ZEXTLOAD %ptr_elt_neg_2(p0) :: (load (s8))
@@ -977,15 +977,15 @@ body: |
; LITTLE-LABEL: name: dont_combine_duplicate_idx
; LITTLE: liveins: $x0, $x1
- ; LITTLE: %cst_1:_(s32) = G_CONSTANT i32 1
- ; LITTLE: %reused_idx:_(s32) = G_CONSTANT i32 2
+ ; LITTLE: %cst_1:_(s64) = G_CONSTANT i64 1
+ ; LITTLE: %reused_idx:_(s64) = G_CONSTANT i64 2
; LITTLE: %cst_8:_(s32) = G_CONSTANT i32 8
; LITTLE: %cst_16:_(s32) = G_CONSTANT i32 16
; LITTLE: %cst_24:_(s32) = G_CONSTANT i32 24
; LITTLE: %ptr:_(p0) = COPY $x1
- ; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32)
- ; LITTLE: %uses_idx_2:_(p0) = G_PTR_ADD %ptr, %reused_idx(s32)
- ; LITTLE: %also_uses_idx_2:_(p0) = G_PTR_ADD %ptr, %reused_idx(s32)
+ ; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64)
+ ; LITTLE: %uses_idx_2:_(p0) = G_PTR_ADD %ptr, %reused_idx(s64)
+ ; LITTLE: %also_uses_idx_2:_(p0) = G_PTR_ADD %ptr, %reused_idx(s64)
; LITTLE: %byte0:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8))
; LITTLE: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s8))
; LITTLE: %elt2:_(s32) = G_ZEXTLOAD %uses_idx_2(p0) :: (load (s8))
@@ -1000,15 +1000,15 @@ body: |
; LITTLE: RET_ReallyLR implicit $w1
; BIG-LABEL: name: dont_combine_duplicate_idx
; BIG: liveins: $x0, $x1
- ; BIG: %cst_1:_(s32) = G_CONSTANT i32 1
- ; BIG: %reused_idx:_(s32) = G_CONSTANT i32 2
+ ; BIG: %cst_1:_(s64) = G_CONSTANT i64 1
+ ; BIG: %reused_idx:_(s64) = G_CONSTANT i64 2
; BIG: %cst_8:_(s32) = G_CONSTANT i32 8
; BIG: %cst_16:_(s32) = G_CONSTANT i32 16
; BIG: %cst_24:_(s32) = G_CONSTANT i32 24
; BIG: %ptr:_(p0) = COPY $x1
- ; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32)
- ; BIG: %uses_idx_2:_(p0) = G_PTR_ADD %ptr, %reused_idx(s32)
- ; BIG: %also_uses_idx_2:_(p0) = G_PTR_ADD %ptr, %reused_idx(s32)
+ ; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64)
+ ; BIG: %uses_idx_2:_(p0) = G_PTR_ADD %ptr, %reused_idx(s64)
+ ; BIG: %also_uses_idx_2:_(p0) = G_PTR_ADD %ptr, %reused_idx(s64)
; BIG: %byte0:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8))
; BIG: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s8))
; BIG: %elt2:_(s32) = G_ZEXTLOAD %uses_idx_2(p0) :: (load (s8))
@@ -1021,17 +1021,17 @@ body: |
; BIG: %full_load:_(s32) = G_OR %or1, %or2
; BIG: $w1 = COPY %full_load(s32)
; BIG: RET_ReallyLR implicit $w1
- %cst_1:_(s32) = G_CONSTANT i32 1
- %reused_idx:_(s32) = G_CONSTANT i32 2
+ %cst_1:_(s64) = G_CONSTANT i64 1
+ %reused_idx:_(s64) = G_CONSTANT i64 2
%cst_8:_(s32) = G_CONSTANT i32 8
%cst_16:_(s32) = G_CONSTANT i32 16
%cst_24:_(s32) = G_CONSTANT i32 24
%ptr:_(p0) = COPY $x1
- %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32)
- %uses_idx_2:_(p0) = G_PTR_ADD %ptr, %reused_idx(s32)
- %also_uses_idx_2:_(p0) = G_PTR_ADD %ptr, %reused_idx(s32)
+ %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64)
+ %uses_idx_2:_(p0) = G_PTR_ADD %ptr, %reused_idx(s64)
+ %also_uses_idx_2:_(p0) = G_PTR_ADD %ptr, %reused_idx(s64)
%byte0:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8))
@@ -1064,15 +1064,15 @@ body: |
; LITTLE-LABEL: name: dont_combine_duplicate_offset
; LITTLE: liveins: $x0, $x1
- ; LITTLE: %cst_1:_(s32) = G_CONSTANT i32 1
- ; LITTLE: %cst_2:_(s32) = G_CONSTANT i32 2
- ; LITTLE: %cst_3:_(s32) = G_CONSTANT i32 3
+ ; LITTLE: %cst_1:_(s64) = G_CONSTANT i64 1
+ ; LITTLE: %cst_2:_(s64) = G_CONSTANT i64 2
+ ; LITTLE: %cst_3:_(s64) = G_CONSTANT i64 3
; LITTLE: %cst_8:_(s32) = G_CONSTANT i32 8
; LITTLE: %duplicate_shl_cst:_(s32) = G_CONSTANT i32 16
; LITTLE: %ptr:_(p0) = COPY $x1
- ; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32)
- ; LITTLE: %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s32)
- ; LITTLE: %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s32)
+ ; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64)
+ ; LITTLE: %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s64)
+ ; LITTLE: %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s64)
; LITTLE: %byte0:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8))
; LITTLE: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s8))
; LITTLE: %elt2:_(s32) = G_ZEXTLOAD %ptr_elt_2(p0) :: (load (s8))
@@ -1087,15 +1087,15 @@ body: |
; LITTLE: RET_ReallyLR implicit $w1
; BIG-LABEL: name: dont_combine_duplicate_offset
; BIG: liveins: $x0, $x1
- ; BIG: %cst_1:_(s32) = G_CONSTANT i32 1
- ; BIG: %cst_2:_(s32) = G_CONSTANT i32 2
- ; BIG: %cst_3:_(s32) = G_CONSTANT i32 3
+ ; BIG: %cst_1:_(s64) = G_CONSTANT i64 1
+ ; BIG: %cst_2:_(s64) = G_CONSTANT i64 2
+ ; BIG: %cst_3:_(s64) = G_CONSTANT i64 3
; BIG: %cst_8:_(s32) = G_CONSTANT i32 8
; BIG: %duplicate_shl_cst:_(s32) = G_CONSTANT i32 16
; BIG: %ptr:_(p0) = COPY $x1
- ; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32)
- ; BIG: %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s32)
- ; BIG: %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s32)
+ ; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64)
+ ; BIG: %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s64)
+ ; BIG: %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s64)
; BIG: %byte0:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8))
; BIG: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s8))
; BIG: %elt2:_(s32) = G_ZEXTLOAD %ptr_elt_2(p0) :: (load (s8))
@@ -1108,17 +1108,17 @@ body: |
; BIG: %full_load:_(s32) = G_OR %or1, %or2
; BIG: $w1 = COPY %full_load(s32)
; BIG: RET_ReallyLR implicit $w1
- %cst_1:_(s32) = G_CONSTANT i32 1
- %cst_2:_(s32) = G_CONSTANT i32 2
- %cst_3:_(s32) = G_CONSTANT i32 3
+ %cst_1:_(s64) = G_CONSTANT i64 1
+ %cst_2:_(s64) = G_CONSTANT i64 2
+ %cst_3:_(s64) = G_CONSTANT i64 3
%cst_8:_(s32) = G_CONSTANT i32 8
%duplicate_shl_cst:_(s32) = G_CONSTANT i32 16
%ptr:_(p0) = COPY $x1
- %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32)
- %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s32)
- %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s32)
+ %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64)
+ %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s64)
+ %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s64)
%byte0:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8))
@@ -1153,16 +1153,16 @@ body: |
; LITTLE-LABEL: name: dont_combine_lowest_index_not_zero_offset
; LITTLE: liveins: $x0, $x1
- ; LITTLE: %cst_1:_(s32) = G_CONSTANT i32 1
- ; LITTLE: %cst_2:_(s32) = G_CONSTANT i32 2
- ; LITTLE: %cst_3:_(s32) = G_CONSTANT i32 3
+ ; LITTLE: %cst_1:_(s64) = G_CONSTANT i64 1
+ ; LITTLE: %cst_2:_(s64) = G_CONSTANT i64 2
+ ; LITTLE: %cst_3:_(s64) = G_CONSTANT i64 3
; LITTLE: %cst_8:_(s32) = G_CONSTANT i32 8
; LITTLE: %cst_16:_(s32) = G_CONSTANT i32 16
; LITTLE: %cst_24:_(s32) = G_CONSTANT i32 24
; LITTLE: %ptr:_(p0) = COPY $x1
- ; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32)
- ; LITTLE: %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s32)
- ; LITTLE: %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s32)
+ ; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64)
+ ; LITTLE: %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s64)
+ ; LITTLE: %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s64)
; LITTLE: %lowest_idx_load:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8))
; LITTLE: %byte0:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s8))
; LITTLE: %elt2:_(s32) = G_ZEXTLOAD %ptr_elt_2(p0) :: (load (s8))
@@ -1177,16 +1177,16 @@ body: |
; LITTLE: RET_ReallyLR implicit $w1
; BIG-LABEL: name: dont_combine_lowest_index_not_zero_offset
; BIG: liveins: $x0, $x1
- ; BIG: %cst_1:_(s32) = G_CONSTANT i32 1
- ; BIG: %cst_2:_(s32) = G_CONSTANT i32 2
- ; BIG: %cst_3:_(s32) = G_CONSTANT i32 3
+ ; BIG: %cst_1:_(s64) = G_CONSTANT i64 1
+ ; BIG: %cst_2:_(s64) = G_CONSTANT i64 2
+ ; BIG: %cst_3:_(s64) = G_CONSTANT i64 3
; BIG: %cst_8:_(s32) = G_CONSTANT i32 8
; BIG: %cst_16:_(s32) = G_CONSTANT i32 16
; BIG: %cst_24:_(s32) = G_CONSTANT i32 24
; BIG: %ptr:_(p0) = COPY $x1
- ; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32)
- ; BIG: %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s32)
- ; BIG: %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s32)
+ ; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64)
+ ; BIG: %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s64)
+ ; BIG: %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s64)
; BIG: %lowest_idx_load:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8))
; BIG: %byte0:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s8))
; BIG: %elt2:_(s32) = G_ZEXTLOAD %ptr_elt_2(p0) :: (load (s8))
@@ -1199,18 +1199,18 @@ body: |
; BIG: %full_load:_(s32) = G_OR %or1, %or2
; BIG: $w1 = COPY %full_load(s32)
; BIG: RET_ReallyLR implicit $w1
- %cst_1:_(s32) = G_CONSTANT i32 1
- %cst_2:_(s32) = G_CONSTANT i32 2
- %cst_3:_(s32) = G_CONSTANT i32 3
+ %cst_1:_(s64) = G_CONSTANT i64 1
+ %cst_2:_(s64) = G_CONSTANT i64 2
+ %cst_3:_(s64) = G_CONSTANT i64 3
%cst_8:_(s32) = G_CONSTANT i32 8
%cst_16:_(s32) = G_CONSTANT i32 16
%cst_24:_(s32) = G_CONSTANT i32 24
%ptr:_(p0) = COPY $x1
- %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32)
- %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s32)
- %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s32)
+ %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64)
+ %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s64)
+ %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s64)
; This load is index 0
%lowest_idx_load:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8))
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ptr-add.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ptr-add.mir
index 660746c84287d2..09e1109c36293c 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ptr-add.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ptr-add.mir
@@ -205,210 +205,3 @@ body: |
%2:_(<2 x p3>) = G_PTR_ADD %0, %1
$vgpr0_vgpr1 = COPY %2
...
-
----
-name: test_gep_global_s16_idx
-body: |
- bb.0:
- liveins: $vgpr0_vgpr1, $vgpr2
-
- ; CHECK-LABEL: name: test_gep_global_s16_idx
- ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY1]](s32)
- ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[ANYEXT]], 16
- ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[SEXT_INREG]](s64)
- ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[PTR_ADD]](p1)
- %0:_(p1) = COPY $vgpr0_vgpr1
- %1:_(s32) = COPY $vgpr2
- %2:_(s16) = G_TRUNC %1
- %3:_(p1) = G_PTR_ADD %0, %2
- $vgpr0_vgpr1 = COPY %3
-...
-
----
-name: test_gep_global_s32_idx
-body: |
- bb.0:
- liveins: $vgpr0_vgpr1, $vgpr2
-
- ; CHECK-LABEL: name: test_gep_global_s32_idx
- ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[COPY1]](s32)
- ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[SEXT]](s64)
- ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[PTR_ADD]](p1)
- %0:_(p1) = COPY $vgpr0_vgpr1
- %1:_(s32) = COPY $vgpr2
- %2:_(p1) = G_PTR_ADD %0, %1
- $vgpr0_vgpr1 = COPY %2
-...
-
----
-name: test_gep_global_s96_idx
-body: |
- bb.0:
- liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4
-
- ; CHECK-LABEL: name: test_gep_global_s96_idx
- ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4
- ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[COPY1]](s96)
- ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[TRUNC]](s64)
- ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[PTR_ADD]](p1)
- %0:_(p1) = COPY $vgpr0_vgpr1
- %1:_(s96) = COPY $vgpr2_vgpr3_vgpr4
- %2:_(p1) = G_PTR_ADD %0, %1
- $vgpr0_vgpr1 = COPY %2
-...
-
----
-name: test_gep_local_i16_idx
-body: |
- bb.0:
- liveins: $vgpr0, $vgpr1
-
- ; CHECK-LABEL: name: test_gep_local_i16_idx
- ; CHECK: liveins: $vgpr0, $vgpr1
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 16
- ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[SEXT_INREG]](s32)
- ; CHECK-NEXT: $vgpr0 = COPY [[PTR_ADD]](p3)
- %0:_(p3) = COPY $vgpr0
- %1:_(s32) = COPY $vgpr1
- %2:_(s16) = G_TRUNC %1
- %3:_(p3) = G_PTR_ADD %0, %2
- $vgpr0 = COPY %3
-...
-
----
-name: test_gep_local_i64_idx
-body: |
- bb.0:
- liveins: $vgpr0, $vgpr1_vgpr2
-
- ; CHECK-LABEL: name: test_gep_local_i64_idx
- ; CHECK: liveins: $vgpr0, $vgpr1_vgpr2
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr1_vgpr2
- ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64)
- ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[TRUNC]](s32)
- ; CHECK-NEXT: $vgpr0 = COPY [[PTR_ADD]](p3)
- %0:_(p3) = COPY $vgpr0
- %1:_(s64) = COPY $vgpr1_vgpr2
- %2:_(p3) = G_PTR_ADD %0, %1
- $vgpr0 = COPY %2
-...
-
----
-name: test_gep_v2p1_v2i32
-body: |
- bb.0:
- liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5
-
- ; CHECK-LABEL: name: test_gep_v2p1_v2i32
- ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x p1>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(p1), [[UV1:%[0-9]+]]:_(p1) = G_UNMERGE_VALUES [[COPY]](<2 x p1>)
- ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
- ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[UV2]](s32)
- ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[UV]], [[SEXT]](s64)
- ; CHECK-NEXT: [[SEXT1:%[0-9]+]]:_(s64) = G_SEXT [[UV3]](s32)
- ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[UV1]], [[SEXT1]](s64)
- ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[PTR_ADD]](p1), [[PTR_ADD1]](p1)
- ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x p1>)
- %0:_(<2 x p1>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
- %1:_(<2 x s32>) = COPY $vgpr4_vgpr5
- %2:_(<2 x p1>) = G_PTR_ADD %0, %1
- $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2
-...
-
----
-name: test_gep_v2p1_v2i96
-body: |
- bb.0:
- liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6, $vgpr7_vgpr8_vgpr9
-
- ; CHECK-LABEL: name: test_gep_v2p1_v2i96
- ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6, $vgpr7_vgpr8_vgpr9
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x p1>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr4_vgpr5_vgpr6
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY $vgpr7_vgpr8_vgpr9
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(p1), [[UV1:%[0-9]+]]:_(p1) = G_UNMERGE_VALUES [[COPY]](<2 x p1>)
- ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[COPY1]](s96)
- ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[UV]], [[TRUNC]](s64)
- ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s64) = G_TRUNC [[COPY2]](s96)
- ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[UV1]], [[TRUNC1]](s64)
- ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[PTR_ADD]](p1), [[PTR_ADD1]](p1)
- ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x p1>)
- %0:_(<2 x p1>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
- %1:_(s96) = COPY $vgpr4_vgpr5_vgpr6
- %2:_(s96) = COPY $vgpr7_vgpr8_vgpr9
- %3:_(<2 x s96>) = G_BUILD_VECTOR %1, %2
- %4:_(<2 x p1>) = G_PTR_ADD %0, %3
- $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %4
-...
-
----
-name: test_gep_v2p3_v2s16
-body: |
- bb.0:
- liveins: $vgpr0_vgpr1, $vgpr2
-
- ; CHECK-LABEL: name: test_gep_v2p3_v2s16
- ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr0_vgpr1
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(p3), [[UV1:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[COPY]](<2 x p3>)
- ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>)
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
- ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST]], 16
- ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[UV]], [[SEXT_INREG]](s32)
- ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 16
- ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[UV1]], [[SEXT_INREG1]](s32)
- ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[PTR_ADD]](p3), [[PTR_ADD1]](p3)
- ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
- %0:_(<2 x p3>) = COPY $vgpr0_vgpr1
- %1:_(<2 x s16>) = COPY $vgpr2
- %2:_(<2 x p3>) = G_PTR_ADD %0, %1
- $vgpr0_vgpr1 = COPY %2
-...
-
----
-name: test_gep_v2p3_v2s64
-body: |
- bb.0:
- liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
-
- ; CHECK-LABEL: name: test_gep_v2p3_v2s64
- ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr0_vgpr1
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(p3), [[UV1:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[COPY]](<2 x p3>)
- ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>)
- ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV2]](s64)
- ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[UV]], [[TRUNC]](s32)
- ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV3]](s64)
- ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[UV1]], [[TRUNC1]](s32)
- ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[PTR_ADD]](p3), [[PTR_ADD1]](p3)
- ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
- %0:_(<2 x p3>) = COPY $vgpr0_vgpr1
- %1:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
- %2:_(<2 x p3>) = G_PTR_ADD %0, %1
- $vgpr0_vgpr1 = COPY %2
-...
diff --git a/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-load-store.mir b/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-load-store.mir
index c1b1e2282254c6..044ad60d1ae765 100644
--- a/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-load-store.mir
+++ b/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-load-store.mir
@@ -9,7 +9,6 @@
define void @test_load_store_64_novfp() #1 { ret void }
define void @test_gep_s32() { ret void }
- define void @test_gep_s16() { ret void }
attributes #0 = { "target-features"="+vfp2" }
attributes #1 = { "target-features"="-vfp2sp" }
@@ -211,30 +210,3 @@ body: |
$r0 = COPY %2(p0)
BX_RET 14, $noreg, implicit $r0
...
----
-name: test_gep_s16
-# CHECK-LABEL: name: test_gep_s16
-legalized: false
-# CHECK: legalized: true
-regBankSelected: false
-selected: false
-tracksRegLiveness: true
-registers:
- - { id: 0, class: _ }
- - { id: 1, class: _ }
- - { id: 2, class: _ }
-body: |
- bb.0:
- liveins: $r0
-
- %0(p0) = COPY $r0
- %1(s16) = G_LOAD %0(p0) :: (load (s16))
-
- ; CHECK-NOT: G_PTR_ADD {{%[0-9]+}}, {{%[0-9]+}}(s16)
- ; CHECK: {{%[0-9]+}}:_(p0) = G_PTR_ADD {{%[0-9]+}}, {{%[0-9]+}}(s32)
- ; CHECK-NOT: G_PTR_ADD {{%[0-9]+}}, {{%[0-9]+}}(s16)
- %2(p0) = G_PTR_ADD %0, %1(s16)
-
- $r0 = COPY %2(p0)
- BX_RET 14, $noreg, implicit $r0
-...
diff --git a/llvm/test/CodeGen/X86/GlobalISel/legalize-ptr-add-32.mir b/llvm/test/CodeGen/X86/GlobalISel/legalize-ptr-add-32.mir
new file mode 100644
index 00000000000000..584a400996e6ad
--- /dev/null
+++ b/llvm/test/CodeGen/X86/GlobalISel/legalize-ptr-add-32.mir
@@ -0,0 +1,55 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=i386-linux-gnu -run-pass=legalizer %s -o - | FileCheck %s --check-prefixes=CHECK
+
+--- |
+ define void @test_gep_i32c(ptr %addr) {
+ %arrayidx = getelementptr i32, ptr undef, i32 5
+ ret void
+ }
+ define void @test_gep_i32(ptr %addr, i32 %ofs) {
+ %arrayidx = getelementptr i32, ptr undef, i32 %ofs
+ ret void
+ }
+...
+---
+name: test_gep_i32c
+legalized: false
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+ - { id: 2, class: _ }
+body: |
+ bb.1 (%ir-block.0):
+ ; CHECK-LABEL: name: test_gep_i32c
+ ; CHECK: [[DEF:%[0-9]+]]:_(p0) = IMPLICIT_DEF
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[DEF]], [[C]](s32)
+ ; CHECK-NEXT: G_STORE [[PTR_ADD]](p0), [[DEF]](p0) :: (store (p0) into %ir.addr)
+ ; CHECK-NEXT: RET 0
+ %0(p0) = IMPLICIT_DEF
+ %1(s32) = G_CONSTANT i32 20
+ %2(p0) = G_PTR_ADD %0, %1(s32)
+ G_STORE %2, %0 :: (store (p0) into %ir.addr)
+ RET 0
+...
+---
+name: test_gep_i32
+legalized: false
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+ - { id: 2, class: _ }
+body: |
+ bb.1 (%ir-block.0):
+ ; CHECK-LABEL: name: test_gep_i32
+ ; CHECK: [[DEF:%[0-9]+]]:_(p0) = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = IMPLICIT_DEF
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[DEF]], [[DEF1]](s32)
+ ; CHECK-NEXT: G_STORE [[PTR_ADD]](p0), [[DEF]](p0) :: (store (p0) into %ir.addr)
+ ; CHECK-NEXT: RET 0
+ %0(p0) = IMPLICIT_DEF
+ %1(s32) = IMPLICIT_DEF
+ %2(p0) = G_PTR_ADD %0, %1(s32)
+ G_STORE %2, %0 :: (store (p0) into %ir.addr)
+ RET 0
+...
diff --git a/llvm/test/CodeGen/X86/GlobalISel/legalize-ptr-add-64.mir b/llvm/test/CodeGen/X86/GlobalISel/legalize-ptr-add-64.mir
new file mode 100644
index 00000000000000..7826257c21e58b
--- /dev/null
+++ b/llvm/test/CodeGen/X86/GlobalISel/legalize-ptr-add-64.mir
@@ -0,0 +1,55 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=x86_64-linux-gnu -run-pass=legalizer %s -o - | FileCheck %s --check-prefixes=X64
+
+--- |
+ define void @test_gep_i64c(ptr %addr) {
+ %arrayidx = getelementptr i32, ptr undef, i64 5
+ ret void
+ }
+ define void @test_gep_i64(ptr %addr, i64 %ofs) {
+ %arrayidx = getelementptr i32, ptr undef, i64 %ofs
+ ret void
+ }
+...
+---
+name: test_gep_i64c
+legalized: false
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+ - { id: 2, class: _ }
+body: |
+ bb.1 (%ir-block.0):
+ ; X64-LABEL: name: test_gep_i64c
+ ; X64: [[DEF:%[0-9]+]]:_(p0) = IMPLICIT_DEF
+ ; X64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 20
+ ; X64-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[DEF]], [[C]](s64)
+ ; X64-NEXT: G_STORE [[PTR_ADD]](p0), [[DEF]](p0) :: (store (p0) into %ir.addr)
+ ; X64-NEXT: RET 0
+ %0(p0) = IMPLICIT_DEF
+ %1(s64) = G_CONSTANT i64 20
+ %2(p0) = G_PTR_ADD %0, %1(s64)
+ G_STORE %2, %0 :: (store (p0) into %ir.addr)
+ RET 0
+...
+---
+name: test_gep_i64
+legalized: false
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+ - { id: 2, class: _ }
+body: |
+ bb.1 (%ir-block.0):
+ ; X64-LABEL: name: test_gep_i64
+ ; X64: [[DEF:%[0-9]+]]:_(p0) = IMPLICIT_DEF
+ ; X64-NEXT: [[DEF1:%[0-9]+]]:_(s64) = IMPLICIT_DEF
+ ; X64-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[DEF]], [[DEF1]](s64)
+ ; X64-NEXT: G_STORE [[PTR_ADD]](p0), [[DEF]](p0) :: (store (p0) into %ir.addr)
+ ; X64-NEXT: RET 0
+ %0(p0) = IMPLICIT_DEF
+ %1(s64) = IMPLICIT_DEF
+ %2(p0) = G_PTR_ADD %0, %1(s64)
+ G_STORE %2, %0 :: (store (p0) into %ir.addr)
+ RET 0
+...
diff --git a/llvm/test/CodeGen/X86/GlobalISel/legalize-ptr-add.mir b/llvm/test/CodeGen/X86/GlobalISel/legalize-ptr-add.mir
deleted file mode 100644
index b1beb2e98cc8de..00000000000000
--- a/llvm/test/CodeGen/X86/GlobalISel/legalize-ptr-add.mir
+++ /dev/null
@@ -1,224 +0,0 @@
-# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=x86_64-linux-gnu -run-pass=legalizer %s -o - | FileCheck %s --check-prefixes=CHECK,X64
-# RUN: llc -mtriple=i386-linux-gnu -run-pass=legalizer %s -o - | FileCheck %s --check-prefixes=CHECK,X86
-
---- |
- define void @test_gep_i8c(ptr %addr) {
- %arrayidx = getelementptr i32, ptr undef, i8 5
- ret void
- }
- define void @test_gep_i8(ptr %addr, i8 %ofs) {
- %arrayidx = getelementptr i32, ptr undef, i8 %ofs
- ret void
- }
-
- define void @test_gep_i16c(ptr %addr) {
- %arrayidx = getelementptr i32, ptr undef, i16 5
- ret void
- }
- define void @test_gep_i16(ptr %addr, i16 %ofs) {
- %arrayidx = getelementptr i32, ptr undef, i16 %ofs
- ret void
- }
-
- define void @test_gep_i32c(ptr %addr) {
- %arrayidx = getelementptr i32, ptr undef, i32 5
- ret void
- }
- define void @test_gep_i32(ptr %addr, i32 %ofs) {
- %arrayidx = getelementptr i32, ptr undef, i32 %ofs
- ret void
- }
-
- define void @test_gep_i64c(ptr %addr) {
- %arrayidx = getelementptr i32, ptr undef, i64 5
- ret void
- }
- define void @test_gep_i64(ptr %addr, i64 %ofs) {
- %arrayidx = getelementptr i32, ptr undef, i64 %ofs
- ret void
- }
-...
----
-name: test_gep_i8c
-legalized: false
-registers:
- - { id: 0, class: _ }
- - { id: 1, class: _ }
- - { id: 2, class: _ }
-body: |
- bb.1 (%ir-block.0):
- ; CHECK-LABEL: name: test_gep_i8c
- ; CHECK: [[DEF:%[0-9]+]]:_(p0) = IMPLICIT_DEF
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[DEF]], [[C]](s32)
- ; CHECK-NEXT: G_STORE [[PTR_ADD]](p0), [[DEF]](p0) :: (store (p0) into %ir.addr)
- ; CHECK-NEXT: RET 0
- %0(p0) = IMPLICIT_DEF
- %1(s8) = G_CONSTANT i8 20
- %2(p0) = G_PTR_ADD %0, %1(s8)
- G_STORE %2, %0 :: (store (p0) into %ir.addr)
- RET 0
-...
----
-name: test_gep_i8
-legalized: false
-registers:
- - { id: 0, class: _ }
- - { id: 1, class: _ }
- - { id: 2, class: _ }
-body: |
- bb.1 (%ir-block.0):
- ; CHECK-LABEL: name: test_gep_i8
- ; CHECK: [[DEF:%[0-9]+]]:_(p0) = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s8) = IMPLICIT_DEF
- ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[DEF1]](s8)
- ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[DEF]], [[SEXT]](s32)
- ; CHECK-NEXT: G_STORE [[PTR_ADD]](p0), [[DEF]](p0) :: (store (p0) into %ir.addr)
- ; CHECK-NEXT: RET 0
- %0(p0) = IMPLICIT_DEF
- %1(s8) = IMPLICIT_DEF
- %2(p0) = G_PTR_ADD %0, %1(s8)
- G_STORE %2, %0 :: (store (p0) into %ir.addr)
- RET 0
-...
----
-name: test_gep_i16c
-legalized: false
-registers:
- - { id: 0, class: _ }
- - { id: 1, class: _ }
- - { id: 2, class: _ }
-body: |
- bb.1 (%ir-block.0):
- ; CHECK-LABEL: name: test_gep_i16c
- ; CHECK: [[DEF:%[0-9]+]]:_(p0) = IMPLICIT_DEF
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[DEF]], [[C]](s32)
- ; CHECK-NEXT: G_STORE [[PTR_ADD]](p0), [[DEF]](p0) :: (store (p0) into %ir.addr)
- ; CHECK-NEXT: RET 0
- %0(p0) = IMPLICIT_DEF
- %1(s16) = G_CONSTANT i16 20
- %2(p0) = G_PTR_ADD %0, %1(s16)
- G_STORE %2, %0 :: (store (p0) into %ir.addr)
- RET 0
-...
----
-name: test_gep_i16
-legalized: false
-registers:
- - { id: 0, class: _ }
- - { id: 1, class: _ }
- - { id: 2, class: _ }
-body: |
- bb.1 (%ir-block.0):
- ; CHECK-LABEL: name: test_gep_i16
- ; CHECK: [[DEF:%[0-9]+]]:_(p0) = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s16) = IMPLICIT_DEF
- ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[DEF1]](s16)
- ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[DEF]], [[SEXT]](s32)
- ; CHECK-NEXT: G_STORE [[PTR_ADD]](p0), [[DEF]](p0) :: (store (p0) into %ir.addr)
- ; CHECK-NEXT: RET 0
- %0(p0) = IMPLICIT_DEF
- %1(s16) = IMPLICIT_DEF
- %2(p0) = G_PTR_ADD %0, %1(s16)
- G_STORE %2, %0 :: (store (p0) into %ir.addr)
- RET 0
-...
----
-name: test_gep_i32c
-legalized: false
-registers:
- - { id: 0, class: _ }
- - { id: 1, class: _ }
- - { id: 2, class: _ }
-body: |
- bb.1 (%ir-block.0):
- ; CHECK-LABEL: name: test_gep_i32c
- ; CHECK: [[DEF:%[0-9]+]]:_(p0) = IMPLICIT_DEF
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[DEF]], [[C]](s32)
- ; CHECK-NEXT: G_STORE [[PTR_ADD]](p0), [[DEF]](p0) :: (store (p0) into %ir.addr)
- ; CHECK-NEXT: RET 0
- %0(p0) = IMPLICIT_DEF
- %1(s32) = G_CONSTANT i32 20
- %2(p0) = G_PTR_ADD %0, %1(s32)
- G_STORE %2, %0 :: (store (p0) into %ir.addr)
- RET 0
-...
----
-name: test_gep_i32
-legalized: false
-registers:
- - { id: 0, class: _ }
- - { id: 1, class: _ }
- - { id: 2, class: _ }
-body: |
- bb.1 (%ir-block.0):
- ; CHECK-LABEL: name: test_gep_i32
- ; CHECK: [[DEF:%[0-9]+]]:_(p0) = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = IMPLICIT_DEF
- ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[DEF]], [[DEF1]](s32)
- ; CHECK-NEXT: G_STORE [[PTR_ADD]](p0), [[DEF]](p0) :: (store (p0) into %ir.addr)
- ; CHECK-NEXT: RET 0
- %0(p0) = IMPLICIT_DEF
- %1(s32) = IMPLICIT_DEF
- %2(p0) = G_PTR_ADD %0, %1(s32)
- G_STORE %2, %0 :: (store (p0) into %ir.addr)
- RET 0
-...
----
-name: test_gep_i64c
-legalized: false
-registers:
- - { id: 0, class: _ }
- - { id: 1, class: _ }
- - { id: 2, class: _ }
-body: |
- bb.1 (%ir-block.0):
- ; X64-LABEL: name: test_gep_i64c
- ; X64: [[DEF:%[0-9]+]]:_(p0) = IMPLICIT_DEF
- ; X64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 20
- ; X64-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[DEF]], [[C]](s64)
- ; X64-NEXT: G_STORE [[PTR_ADD]](p0), [[DEF]](p0) :: (store (p0) into %ir.addr)
- ; X64-NEXT: RET 0
- ; X86-LABEL: name: test_gep_i64c
- ; X86: [[DEF:%[0-9]+]]:_(p0) = IMPLICIT_DEF
- ; X86-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; X86-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[DEF]], [[C]](s32)
- ; X86-NEXT: G_STORE [[PTR_ADD]](p0), [[DEF]](p0) :: (store (p0) into %ir.addr)
- ; X86-NEXT: RET 0
- %0(p0) = IMPLICIT_DEF
- %1(s64) = G_CONSTANT i64 20
- %2(p0) = G_PTR_ADD %0, %1(s64)
- G_STORE %2, %0 :: (store (p0) into %ir.addr)
- RET 0
-...
----
-name: test_gep_i64
-legalized: false
-registers:
- - { id: 0, class: _ }
- - { id: 1, class: _ }
- - { id: 2, class: _ }
-body: |
- bb.1 (%ir-block.0):
- ; X64-LABEL: name: test_gep_i64
- ; X64: [[DEF:%[0-9]+]]:_(p0) = IMPLICIT_DEF
- ; X64-NEXT: [[DEF1:%[0-9]+]]:_(s64) = IMPLICIT_DEF
- ; X64-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[DEF]], [[DEF1]](s64)
- ; X64-NEXT: G_STORE [[PTR_ADD]](p0), [[DEF]](p0) :: (store (p0) into %ir.addr)
- ; X64-NEXT: RET 0
- ; X86-LABEL: name: test_gep_i64
- ; X86: [[DEF:%[0-9]+]]:_(p0) = IMPLICIT_DEF
- ; X86-NEXT: [[DEF1:%[0-9]+]]:_(s64) = IMPLICIT_DEF
- ; X86-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[DEF1]](s64)
- ; X86-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[DEF]], [[TRUNC]](s32)
- ; X86-NEXT: G_STORE [[PTR_ADD]](p0), [[DEF]](p0) :: (store (p0) into %ir.addr)
- ; X86-NEXT: RET 0
- %0(p0) = IMPLICIT_DEF
- %1(s64) = IMPLICIT_DEF
- %2(p0) = G_PTR_ADD %0, %1(s64)
- G_STORE %2, %0 :: (store (p0) into %ir.addr)
- RET 0
-...
diff --git a/llvm/test/CodeGen/X86/GlobalISel/regbankselect-X86_64.mir b/llvm/test/CodeGen/X86/GlobalISel/regbankselect-X86_64.mir
index c2dcf30359248a..03d4c7dd3281dd 100644
--- a/llvm/test/CodeGen/X86/GlobalISel/regbankselect-X86_64.mir
+++ b/llvm/test/CodeGen/X86/GlobalISel/regbankselect-X86_64.mir
@@ -1380,23 +1380,18 @@ body: |
bb.0 (%ir-block.0):
; FAST-LABEL: name: test_gep
; FAST: [[DEF:%[0-9]+]]:gpr(p0) = G_IMPLICIT_DEF
- ; FAST: [[C:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 20
- ; FAST: [[PTR_ADD:%[0-9]+]]:gpr(p0) = G_PTR_ADD [[DEF]], [[C]](s32)
- ; FAST: [[C1:%[0-9]+]]:gpr(s64) = G_CONSTANT i64 20
- ; FAST: [[PTR_ADD1:%[0-9]+]]:gpr(p0) = G_PTR_ADD [[DEF]], [[C1]](s64)
+ ; FAST: [[C:%[0-9]+]]:gpr(s64) = G_CONSTANT i64 20
+ ; FAST: [[PTR_ADD:%[0-9]+]]:gpr(p0) = G_PTR_ADD [[DEF]], [[C]](s64)
; FAST: RET 0
+ ;
; GREEDY-LABEL: name: test_gep
; GREEDY: [[DEF:%[0-9]+]]:gpr(p0) = G_IMPLICIT_DEF
- ; GREEDY: [[C:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 20
- ; GREEDY: [[PTR_ADD:%[0-9]+]]:gpr(p0) = G_PTR_ADD [[DEF]], [[C]](s32)
- ; GREEDY: [[C1:%[0-9]+]]:gpr(s64) = G_CONSTANT i64 20
- ; GREEDY: [[PTR_ADD1:%[0-9]+]]:gpr(p0) = G_PTR_ADD [[DEF]], [[C1]](s64)
+ ; GREEDY: [[C:%[0-9]+]]:gpr(s64) = G_CONSTANT i64 20
+ ; GREEDY: [[PTR_ADD:%[0-9]+]]:gpr(p0) = G_PTR_ADD [[DEF]], [[C]](s64)
; GREEDY: RET 0
%0(p0) = G_IMPLICIT_DEF
- %1(s32) = G_CONSTANT i32 20
- %2(p0) = G_PTR_ADD %0, %1(s32)
- %3(s64) = G_CONSTANT i64 20
- %4(p0) = G_PTR_ADD %0, %3(s64)
+ %1(s64) = G_CONSTANT i64 20
+ %2(p0) = G_PTR_ADD %0, %1(s64)
RET 0
...
>From cad01436590eb1fad220c6fa1753ab6bef6505d3 Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad at amd.com>
Date: Fri, 8 Mar 2024 11:54:30 +0000
Subject: [PATCH 3/4] Fix LegalizerHelper::getVectorElementPointer
---
.../CodeGen/GlobalISel/LegalizerHelper.cpp | 9 +-
.../combine-extract-vector-load.mir | 13 +-
.../GlobalISel/extractelement-stack-lower.ll | 21 +-
.../AMDGPU/GlobalISel/extractelement.i128.ll | 110 +++++-----
.../AMDGPU/GlobalISel/extractelement.i16.ll | 190 ++++++------------
5 files changed, 132 insertions(+), 211 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 1d016e684c48f6..258047e2b56fd2 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -4004,7 +4004,14 @@ Register LegalizerHelper::getVectorElementPointer(Register VecPtr, LLT VecTy,
Index = clampVectorIndex(MIRBuilder, Index, VecTy);
- LLT IdxTy = MRI.getType(Index);
+ // Convert index to the correct size for the address space.
+ const DataLayout &DL = MIRBuilder.getDataLayout();
+ unsigned AS = MRI.getType(VecPtr).getAddressSpace();
+ unsigned IndexSizeInBits = DL.getIndexSize(AS) * 8;
+ LLT IdxTy = MRI.getType(Index).changeElementSize(IndexSizeInBits);
+ if (IdxTy != MRI.getType(Index))
+ Index = MIRBuilder.buildSExtOrTrunc(IdxTy, Index).getReg(0);
+
auto Mul = MIRBuilder.buildMul(IdxTy, Index,
MIRBuilder.buildConstant(IdxTy, EltSize));
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-extract-vector-load.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-extract-vector-load.mir
index aa72a9ec06ede5..b49f5160985130 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-extract-vector-load.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-extract-vector-load.mir
@@ -8,8 +8,9 @@ tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: test_ptradd_crash__offset_smaller
- ; CHECK: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 12
- ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[C]](p1) :: (load (s32), addrspace 1)
+ ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+ ; CHECK-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[C]](s64)
+ ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[INTTOPTR]](p1) :: (load (s32), addrspace 1)
; CHECK-NEXT: $sgpr0 = COPY [[LOAD]](s32)
; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
%1:_(p1) = G_CONSTANT i64 0
@@ -27,8 +28,12 @@ tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: test_ptradd_crash__offset_wider
- ; CHECK: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 12
- ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[C]](p1) :: (load (s32), addrspace 1)
+ ; CHECK: [[C:%[0-9]+]]:_(s128) = G_CONSTANT i128 3
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[C]](s128)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[TRUNC]], [[C1]](s64)
+ ; CHECK-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[SHL]](s64)
+ ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[INTTOPTR]](p1) :: (load (s32), addrspace 1)
; CHECK-NEXT: $sgpr0 = COPY [[LOAD]](s32)
; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
%1:_(p1) = G_CONSTANT i64 0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll
index b58c3b20986363..43f3dcc86f426e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll
@@ -11,9 +11,8 @@ define i32 @v_extract_v64i32_varidx(ptr addrspace(1) %ptr, i32 %idx) {
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_and_b32_e32 v2, 63, v2
; GFX9-NEXT: v_lshlrev_b32_e32 v2, 2, v2
-; GFX9-NEXT: v_ashrrev_i32_e32 v3, 31, v2
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
-; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX9-NEXT: global_load_dword v0, v[0:1], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -28,10 +27,8 @@ define i32 @v_extract_v64i32_varidx(ptr addrspace(1) %ptr, i32 %idx) {
; GFX12-NEXT: v_and_b32_e32 v2, 63, v2
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX12-NEXT: v_lshlrev_b32_e32 v2, 2, v2
-; GFX12-NEXT: v_ashrrev_i32_e32 v3, 31, v2
; GFX12-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX12-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX12-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX12-NEXT: global_load_b32 v0, v[0:1], off
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
@@ -46,9 +43,8 @@ define i16 @v_extract_v128i16_varidx(ptr addrspace(1) %ptr, i32 %idx) {
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_and_b32_e32 v2, 0x7f, v2
; GFX9-NEXT: v_lshlrev_b32_e32 v2, 1, v2
-; GFX9-NEXT: v_ashrrev_i32_e32 v3, 31, v2
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
-; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX9-NEXT: global_load_ushort v0, v[0:1], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -63,10 +59,8 @@ define i16 @v_extract_v128i16_varidx(ptr addrspace(1) %ptr, i32 %idx) {
; GFX12-NEXT: v_and_b32_e32 v2, 0x7f, v2
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX12-NEXT: v_lshlrev_b32_e32 v2, 1, v2
-; GFX12-NEXT: v_ashrrev_i32_e32 v3, 31, v2
; GFX12-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX12-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX12-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX12-NEXT: global_load_u16 v0, v[0:1], off
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
@@ -81,9 +75,8 @@ define i64 @v_extract_v32i64_varidx(ptr addrspace(1) %ptr, i32 %idx) {
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_and_b32_e32 v2, 31, v2
; GFX9-NEXT: v_lshlrev_b32_e32 v2, 3, v2
-; GFX9-NEXT: v_ashrrev_i32_e32 v3, 31, v2
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
-; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -98,10 +91,8 @@ define i64 @v_extract_v32i64_varidx(ptr addrspace(1) %ptr, i32 %idx) {
; GFX12-NEXT: v_and_b32_e32 v2, 31, v2
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX12-NEXT: v_lshlrev_b32_e32 v2, 3, v2
-; GFX12-NEXT: v_ashrrev_i32_e32 v3, 31, v2
; GFX12-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX12-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX12-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX12-NEXT: global_load_b64 v[0:1], v[0:1], off
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i128.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i128.ll
index 057790617204cc..e1ce9ea14a2a95 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i128.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i128.ll
@@ -6,37 +6,44 @@
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11 %s
define amdgpu_ps i128 @extractelement_sgpr_v4i128_sgpr_idx(ptr addrspace(4) inreg %ptr, i32 inreg %idx) {
-; GCN-LABEL: extractelement_sgpr_v4i128_sgpr_idx:
-; GCN: ; %bb.0:
-; GCN-NEXT: s_and_b32 s0, s4, 3
-; GCN-NEXT: s_lshl_b32 s0, s0, 4
-; GCN-NEXT: s_ashr_i32 s1, s0, 31
-; GCN-NEXT: s_add_u32 s0, s2, s0
-; GCN-NEXT: s_addc_u32 s1, s3, s1
-; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0
-; GCN-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NEXT: ; return to shader part epilog
+; GFX9-LABEL: extractelement_sgpr_v4i128_sgpr_idx:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_and_b32 s0, s4, 3
+; GFX9-NEXT: s_lshl_b32 s0, s0, 4
+; GFX9-NEXT: s_load_dwordx4 s[0:3], s[2:3], s0 offset:0x0
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: ; return to shader part epilog
+;
+; GFX8-LABEL: extractelement_sgpr_v4i128_sgpr_idx:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_and_b32 s0, s4, 3
+; GFX8-NEXT: s_lshl_b32 s0, s0, 4
+; GFX8-NEXT: s_load_dwordx4 s[0:3], s[2:3], s0
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8-NEXT: ; return to shader part epilog
+;
+; GFX7-LABEL: extractelement_sgpr_v4i128_sgpr_idx:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_and_b32 s0, s4, 3
+; GFX7-NEXT: s_lshl_b32 s0, s0, 4
+; GFX7-NEXT: s_load_dwordx4 s[0:3], s[2:3], s0
+; GFX7-NEXT: s_waitcnt lgkmcnt(0)
+; GFX7-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: extractelement_sgpr_v4i128_sgpr_idx:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_and_b32 s0, s4, 3
; GFX10-NEXT: s_lshl_b32 s0, s0, 4
-; GFX10-NEXT: s_ashr_i32 s1, s0, 31
-; GFX10-NEXT: s_add_u32 s0, s2, s0
-; GFX10-NEXT: s_addc_u32 s1, s3, s1
-; GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0
+; GFX10-NEXT: s_load_dwordx4 s[0:3], s[2:3], s0 offset:0x0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: extractelement_sgpr_v4i128_sgpr_idx:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_and_b32 s0, s4, 3
-; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: s_lshl_b32 s0, s0, 4
-; GFX11-NEXT: s_ashr_i32 s1, s0, 31
-; GFX11-NEXT: s_add_u32 s0, s2, s0
-; GFX11-NEXT: s_addc_u32 s1, s3, s1
-; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
+; GFX11-NEXT: s_load_b128 s[0:3], s[2:3], s0 offset:0x0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: ; return to shader part epilog
%vector = load <4 x i128>, ptr addrspace(4) %ptr
@@ -48,8 +55,8 @@ define amdgpu_ps i128 @extractelement_vgpr_v4i128_sgpr_idx(ptr addrspace(1) %ptr
; GFX9-LABEL: extractelement_vgpr_v4i128_sgpr_idx:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_and_b32 s0, s2, 3
+; GFX9-NEXT: s_mov_b32 s1, 0
; GFX9-NEXT: s_lshl_b32 s0, s0, 4
-; GFX9-NEXT: s_ashr_i32 s1, s0, 31
; GFX9-NEXT: v_mov_b32_e32 v3, s1
; GFX9-NEXT: v_mov_b32_e32 v2, s0
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
@@ -65,8 +72,8 @@ define amdgpu_ps i128 @extractelement_vgpr_v4i128_sgpr_idx(ptr addrspace(1) %ptr
; GFX8-LABEL: extractelement_vgpr_v4i128_sgpr_idx:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_and_b32 s0, s2, 3
+; GFX8-NEXT: s_mov_b32 s1, 0
; GFX8-NEXT: s_lshl_b32 s0, s0, 4
-; GFX8-NEXT: s_ashr_i32 s1, s0, 31
; GFX8-NEXT: v_mov_b32_e32 v3, s1
; GFX8-NEXT: v_mov_b32_e32 v2, s0
; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2
@@ -82,10 +89,10 @@ define amdgpu_ps i128 @extractelement_vgpr_v4i128_sgpr_idx(ptr addrspace(1) %ptr
; GFX7-LABEL: extractelement_vgpr_v4i128_sgpr_idx:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_and_b32 s0, s2, 3
+; GFX7-NEXT: s_mov_b32 s1, 0
; GFX7-NEXT: s_lshl_b32 s0, s0, 4
-; GFX7-NEXT: s_ashr_i32 s1, s0, 31
-; GFX7-NEXT: s_mov_b32 s2, 0
; GFX7-NEXT: s_mov_b32 s3, 0xf000
+; GFX7-NEXT: s_mov_b32 s2, s1
; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[0:3], 0 addr64
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: v_readfirstlane_b32 s0, v0
@@ -97,8 +104,8 @@ define amdgpu_ps i128 @extractelement_vgpr_v4i128_sgpr_idx(ptr addrspace(1) %ptr
; GFX10-LABEL: extractelement_vgpr_v4i128_sgpr_idx:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_and_b32 s0, s2, 3
+; GFX10-NEXT: s_mov_b32 s1, 0
; GFX10-NEXT: s_lshl_b32 s0, s0, 4
-; GFX10-NEXT: s_ashr_i32 s1, s0, 31
; GFX10-NEXT: v_mov_b32_e32 v3, s1
; GFX10-NEXT: v_mov_b32_e32 v2, s0
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
@@ -114,9 +121,8 @@ define amdgpu_ps i128 @extractelement_vgpr_v4i128_sgpr_idx(ptr addrspace(1) %ptr
; GFX11-LABEL: extractelement_vgpr_v4i128_sgpr_idx:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_and_b32 s0, s2, 3
-; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-NEXT: s_mov_b32 s1, 0
; GFX11-NEXT: s_lshl_b32 s0, s0, 4
-; GFX11-NEXT: s_ashr_i32 s1, s0, 31
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
@@ -140,9 +146,8 @@ define i128 @extractelement_vgpr_v4i128_vgpr_idx(ptr addrspace(1) %ptr, i32 %idx
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_and_b32_e32 v2, 3, v2
; GFX9-NEXT: v_lshlrev_b32_e32 v2, 4, v2
-; GFX9-NEXT: v_ashrrev_i32_e32 v3, 31, v2
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
-; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -152,9 +157,8 @@ define i128 @extractelement_vgpr_v4i128_vgpr_idx(ptr addrspace(1) %ptr, i32 %idx
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_and_b32_e32 v2, 3, v2
; GFX8-NEXT: v_lshlrev_b32_e32 v2, 4, v2
-; GFX8-NEXT: v_ashrrev_i32_e32 v3, 31, v2
; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2
-; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
+; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1]
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: s_setpc_b64 s[30:31]
@@ -164,9 +168,8 @@ define i128 @extractelement_vgpr_v4i128_vgpr_idx(ptr addrspace(1) %ptr, i32 %idx
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: v_and_b32_e32 v2, 3, v2
; GFX7-NEXT: v_lshlrev_b32_e32 v2, 4, v2
-; GFX7-NEXT: v_ashrrev_i32_e32 v3, 31, v2
; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v2
-; GFX7-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
+; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; GFX7-NEXT: s_mov_b32 s6, 0
; GFX7-NEXT: s_mov_b32 s7, 0xf000
; GFX7-NEXT: s_mov_b64 s[4:5], 0
@@ -179,9 +182,8 @@ define i128 @extractelement_vgpr_v4i128_vgpr_idx(ptr addrspace(1) %ptr, i32 %idx
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_and_b32_e32 v2, 3, v2
; GFX10-NEXT: v_lshlrev_b32_e32 v2, 4, v2
-; GFX10-NEXT: v_ashrrev_i32_e32 v3, 31, v2
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
@@ -192,10 +194,8 @@ define i128 @extractelement_vgpr_v4i128_vgpr_idx(ptr addrspace(1) %ptr, i32 %idx
; GFX11-NEXT: v_and_b32_e32 v2, 3, v2
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_lshlrev_b32_e32 v2, 4, v2
-; GFX11-NEXT: v_ashrrev_i32_e32 v3, 31, v2
; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
@@ -208,13 +208,8 @@ define amdgpu_ps i128 @extractelement_sgpr_v4i128_vgpr_idx(ptr addrspace(4) inre
; GFX9-LABEL: extractelement_sgpr_v4i128_vgpr_idx:
; GFX9: ; %bb.0:
; GFX9-NEXT: v_and_b32_e32 v0, 3, v0
-; GFX9-NEXT: v_lshlrev_b32_e32 v2, 4, v0
-; GFX9-NEXT: v_mov_b32_e32 v0, s2
-; GFX9-NEXT: v_ashrrev_i32_e32 v3, 31, v2
-; GFX9-NEXT: v_mov_b32_e32 v1, s3
-; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
-; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
-; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-NEXT: v_lshlrev_b32_e32 v0, 4, v0
+; GFX9-NEXT: global_load_dwordx4 v[0:3], v0, s[2:3]
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_readfirstlane_b32 s0, v0
; GFX9-NEXT: v_readfirstlane_b32 s1, v1
@@ -227,10 +222,9 @@ define amdgpu_ps i128 @extractelement_sgpr_v4i128_vgpr_idx(ptr addrspace(4) inre
; GFX8-NEXT: v_and_b32_e32 v0, 3, v0
; GFX8-NEXT: v_lshlrev_b32_e32 v2, 4, v0
; GFX8-NEXT: v_mov_b32_e32 v0, s2
-; GFX8-NEXT: v_ashrrev_i32_e32 v3, 31, v2
; GFX8-NEXT: v_mov_b32_e32 v1, s3
; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2
-; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
+; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1]
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: v_readfirstlane_b32 s0, v0
@@ -242,10 +236,10 @@ define amdgpu_ps i128 @extractelement_sgpr_v4i128_vgpr_idx(ptr addrspace(4) inre
; GFX7-LABEL: extractelement_sgpr_v4i128_vgpr_idx:
; GFX7: ; %bb.0:
; GFX7-NEXT: v_and_b32_e32 v0, 3, v0
-; GFX7-NEXT: v_lshlrev_b32_e32 v0, 4, v0
; GFX7-NEXT: s_mov_b32 s0, s2
; GFX7-NEXT: s_mov_b32 s1, s3
-; GFX7-NEXT: v_ashrrev_i32_e32 v1, 31, v0
+; GFX7-NEXT: v_lshlrev_b32_e32 v0, 4, v0
+; GFX7-NEXT: v_mov_b32_e32 v1, 0
; GFX7-NEXT: s_mov_b32 s2, 0
; GFX7-NEXT: s_mov_b32 s3, 0xf000
; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[0:3], 0 addr64
@@ -259,13 +253,8 @@ define amdgpu_ps i128 @extractelement_sgpr_v4i128_vgpr_idx(ptr addrspace(4) inre
; GFX10-LABEL: extractelement_sgpr_v4i128_vgpr_idx:
; GFX10: ; %bb.0:
; GFX10-NEXT: v_and_b32_e32 v0, 3, v0
-; GFX10-NEXT: v_lshlrev_b32_e32 v2, 4, v0
-; GFX10-NEXT: v_mov_b32_e32 v0, s2
-; GFX10-NEXT: v_mov_b32_e32 v1, s3
-; GFX10-NEXT: v_ashrrev_i32_e32 v3, 31, v2
-; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
-; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-NEXT: v_lshlrev_b32_e32 v0, 4, v0
+; GFX10-NEXT: global_load_dwordx4 v[0:3], v0, s[2:3]
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_readfirstlane_b32 s0, v0
; GFX10-NEXT: v_readfirstlane_b32 s1, v1
@@ -276,14 +265,9 @@ define amdgpu_ps i128 @extractelement_sgpr_v4i128_vgpr_idx(ptr addrspace(4) inre
; GFX11-LABEL: extractelement_sgpr_v4i128_vgpr_idx:
; GFX11: ; %bb.0:
; GFX11-NEXT: v_and_b32_e32 v0, 3, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_lshlrev_b32_e32 v2, 4, v0
-; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
-; GFX11-NEXT: v_ashrrev_i32_e32 v3, 31, v2
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
-; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_lshlrev_b32_e32 v0, 4, v0
+; GFX11-NEXT: global_load_b128 v[0:3], v0, s[2:3]
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_readfirstlane_b32 s0, v0
; GFX11-NEXT: v_readfirstlane_b32 s1, v1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i16.ll
index 6d772df3fa281b..021f609053a0f6 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i16.ll
@@ -10,11 +10,8 @@ define amdgpu_ps i16 @extractelement_sgpr_v4i16_sgpr_idx(ptr addrspace(4) inreg
; GFX9: ; %bb.0:
; GFX9-NEXT: s_and_b32 s0, s4, 3
; GFX9-NEXT: s_lshl_b32 s0, s0, 1
-; GFX9-NEXT: s_ashr_i32 s1, s0, 31
-; GFX9-NEXT: s_add_u32 s0, s2, s0
-; GFX9-NEXT: s_addc_u32 s1, s3, s1
-; GFX9-NEXT: v_mov_b32_e32 v0, 0
-; GFX9-NEXT: global_load_ushort v0, v0, s[0:1]
+; GFX9-NEXT: v_mov_b32_e32 v0, s0
+; GFX9-NEXT: global_load_ushort v0, v0, s[2:3]
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_readfirstlane_b32 s0, v0
; GFX9-NEXT: ; return to shader part epilog
@@ -23,9 +20,8 @@ define amdgpu_ps i16 @extractelement_sgpr_v4i16_sgpr_idx(ptr addrspace(4) inreg
; GFX8: ; %bb.0:
; GFX8-NEXT: s_and_b32 s0, s4, 3
; GFX8-NEXT: s_lshl_b32 s0, s0, 1
-; GFX8-NEXT: s_ashr_i32 s1, s0, 31
; GFX8-NEXT: s_add_u32 s0, s2, s0
-; GFX8-NEXT: s_addc_u32 s1, s3, s1
+; GFX8-NEXT: s_addc_u32 s1, s3, 0
; GFX8-NEXT: v_mov_b32_e32 v0, s0
; GFX8-NEXT: v_mov_b32_e32 v1, s1
; GFX8-NEXT: flat_load_ushort v0, v[0:1]
@@ -38,11 +34,11 @@ define amdgpu_ps i16 @extractelement_sgpr_v4i16_sgpr_idx(ptr addrspace(4) inreg
; GFX7-NEXT: s_mov_b32 s0, s2
; GFX7-NEXT: s_and_b32 s2, s4, 3
; GFX7-NEXT: s_lshl_b32 s4, s2, 1
-; GFX7-NEXT: s_ashr_i32 s5, s4, 31
+; GFX7-NEXT: s_mov_b32 s5, 0
; GFX7-NEXT: v_mov_b32_e32 v0, s4
; GFX7-NEXT: s_mov_b32 s1, s3
-; GFX7-NEXT: s_mov_b32 s2, 0
; GFX7-NEXT: s_mov_b32 s3, 0xf000
+; GFX7-NEXT: s_mov_b32 s2, s5
; GFX7-NEXT: v_mov_b32_e32 v1, s5
; GFX7-NEXT: buffer_load_ushort v0, v[0:1], s[0:3], 0 addr64
; GFX7-NEXT: s_waitcnt vmcnt(0)
@@ -52,12 +48,9 @@ define amdgpu_ps i16 @extractelement_sgpr_v4i16_sgpr_idx(ptr addrspace(4) inreg
; GFX10-LABEL: extractelement_sgpr_v4i16_sgpr_idx:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_and_b32 s0, s4, 3
-; GFX10-NEXT: v_mov_b32_e32 v0, 0
; GFX10-NEXT: s_lshl_b32 s0, s0, 1
-; GFX10-NEXT: s_ashr_i32 s1, s0, 31
-; GFX10-NEXT: s_add_u32 s0, s2, s0
-; GFX10-NEXT: s_addc_u32 s1, s3, s1
-; GFX10-NEXT: global_load_ushort v0, v0, s[0:1]
+; GFX10-NEXT: v_mov_b32_e32 v0, s0
+; GFX10-NEXT: global_load_ushort v0, v0, s[2:3]
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_readfirstlane_b32 s0, v0
; GFX10-NEXT: ; return to shader part epilog
@@ -65,13 +58,10 @@ define amdgpu_ps i16 @extractelement_sgpr_v4i16_sgpr_idx(ptr addrspace(4) inreg
; GFX11-LABEL: extractelement_sgpr_v4i16_sgpr_idx:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_and_b32 s0, s4, 3
-; GFX11-NEXT: v_mov_b32_e32 v0, 0
+; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GFX11-NEXT: s_lshl_b32 s0, s0, 1
-; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX11-NEXT: s_ashr_i32 s1, s0, 31
-; GFX11-NEXT: s_add_u32 s0, s2, s0
-; GFX11-NEXT: s_addc_u32 s1, s3, s1
-; GFX11-NEXT: global_load_u16 v0, v0, s[0:1]
+; GFX11-NEXT: v_mov_b32_e32 v0, s0
+; GFX11-NEXT: global_load_u16 v0, v0, s[2:3]
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_readfirstlane_b32 s0, v0
; GFX11-NEXT: ; return to shader part epilog
@@ -84,8 +74,8 @@ define amdgpu_ps i16 @extractelement_vgpr_v4i16_sgpr_idx(ptr addrspace(1) %ptr,
; GFX9-LABEL: extractelement_vgpr_v4i16_sgpr_idx:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_and_b32 s0, s2, 3
+; GFX9-NEXT: s_mov_b32 s1, 0
; GFX9-NEXT: s_lshl_b32 s0, s0, 1
-; GFX9-NEXT: s_ashr_i32 s1, s0, 31
; GFX9-NEXT: v_mov_b32_e32 v3, s1
; GFX9-NEXT: v_mov_b32_e32 v2, s0
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
@@ -98,8 +88,8 @@ define amdgpu_ps i16 @extractelement_vgpr_v4i16_sgpr_idx(ptr addrspace(1) %ptr,
; GFX8-LABEL: extractelement_vgpr_v4i16_sgpr_idx:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_and_b32 s0, s2, 3
+; GFX8-NEXT: s_mov_b32 s1, 0
; GFX8-NEXT: s_lshl_b32 s0, s0, 1
-; GFX8-NEXT: s_ashr_i32 s1, s0, 31
; GFX8-NEXT: v_mov_b32_e32 v3, s1
; GFX8-NEXT: v_mov_b32_e32 v2, s0
; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2
@@ -112,10 +102,10 @@ define amdgpu_ps i16 @extractelement_vgpr_v4i16_sgpr_idx(ptr addrspace(1) %ptr,
; GFX7-LABEL: extractelement_vgpr_v4i16_sgpr_idx:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_and_b32 s0, s2, 3
+; GFX7-NEXT: s_mov_b32 s1, 0
; GFX7-NEXT: s_lshl_b32 s0, s0, 1
-; GFX7-NEXT: s_ashr_i32 s1, s0, 31
-; GFX7-NEXT: s_mov_b32 s2, 0
; GFX7-NEXT: s_mov_b32 s3, 0xf000
+; GFX7-NEXT: s_mov_b32 s2, s1
; GFX7-NEXT: buffer_load_ushort v0, v[0:1], s[0:3], 0 addr64
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: v_readfirstlane_b32 s0, v0
@@ -124,8 +114,8 @@ define amdgpu_ps i16 @extractelement_vgpr_v4i16_sgpr_idx(ptr addrspace(1) %ptr,
; GFX10-LABEL: extractelement_vgpr_v4i16_sgpr_idx:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_and_b32 s0, s2, 3
+; GFX10-NEXT: s_mov_b32 s1, 0
; GFX10-NEXT: s_lshl_b32 s0, s0, 1
-; GFX10-NEXT: s_ashr_i32 s1, s0, 31
; GFX10-NEXT: v_mov_b32_e32 v3, s1
; GFX10-NEXT: v_mov_b32_e32 v2, s0
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
@@ -138,9 +128,8 @@ define amdgpu_ps i16 @extractelement_vgpr_v4i16_sgpr_idx(ptr addrspace(1) %ptr,
; GFX11-LABEL: extractelement_vgpr_v4i16_sgpr_idx:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_and_b32 s0, s2, 3
-; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-NEXT: s_mov_b32 s1, 0
; GFX11-NEXT: s_lshl_b32 s0, s0, 1
-; GFX11-NEXT: s_ashr_i32 s1, s0, 31
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
@@ -161,9 +150,8 @@ define i16 @extractelement_vgpr_v4i16_vgpr_idx(ptr addrspace(1) %ptr, i32 %idx)
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_and_b32_e32 v2, 3, v2
; GFX9-NEXT: v_lshlrev_b32_e32 v2, 1, v2
-; GFX9-NEXT: v_ashrrev_i32_e32 v3, 31, v2
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
-; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX9-NEXT: global_load_ushort v0, v[0:1], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -173,9 +161,8 @@ define i16 @extractelement_vgpr_v4i16_vgpr_idx(ptr addrspace(1) %ptr, i32 %idx)
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_and_b32_e32 v2, 3, v2
; GFX8-NEXT: v_lshlrev_b32_e32 v2, 1, v2
-; GFX8-NEXT: v_ashrrev_i32_e32 v3, 31, v2
; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2
-; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
+; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; GFX8-NEXT: flat_load_ushort v0, v[0:1]
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: s_setpc_b64 s[30:31]
@@ -185,9 +172,8 @@ define i16 @extractelement_vgpr_v4i16_vgpr_idx(ptr addrspace(1) %ptr, i32 %idx)
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: v_and_b32_e32 v2, 3, v2
; GFX7-NEXT: v_lshlrev_b32_e32 v2, 1, v2
-; GFX7-NEXT: v_ashrrev_i32_e32 v3, 31, v2
; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v2
-; GFX7-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
+; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; GFX7-NEXT: s_mov_b32 s6, 0
; GFX7-NEXT: s_mov_b32 s7, 0xf000
; GFX7-NEXT: s_mov_b64 s[4:5], 0
@@ -200,9 +186,8 @@ define i16 @extractelement_vgpr_v4i16_vgpr_idx(ptr addrspace(1) %ptr, i32 %idx)
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_and_b32_e32 v2, 3, v2
; GFX10-NEXT: v_lshlrev_b32_e32 v2, 1, v2
-; GFX10-NEXT: v_ashrrev_i32_e32 v3, 31, v2
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX10-NEXT: global_load_ushort v0, v[0:1], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
@@ -213,10 +198,8 @@ define i16 @extractelement_vgpr_v4i16_vgpr_idx(ptr addrspace(1) %ptr, i32 %idx)
; GFX11-NEXT: v_and_b32_e32 v2, 3, v2
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_lshlrev_b32_e32 v2, 1, v2
-; GFX11-NEXT: v_ashrrev_i32_e32 v3, 31, v2
; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX11-NEXT: global_load_u16 v0, v[0:1], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
@@ -229,13 +212,8 @@ define amdgpu_ps i16 @extractelement_sgpr_v4i16_vgpr_idx(ptr addrspace(4) inreg
; GFX9-LABEL: extractelement_sgpr_v4i16_vgpr_idx:
; GFX9: ; %bb.0:
; GFX9-NEXT: v_and_b32_e32 v0, 3, v0
-; GFX9-NEXT: v_lshlrev_b32_e32 v2, 1, v0
-; GFX9-NEXT: v_mov_b32_e32 v0, s2
-; GFX9-NEXT: v_ashrrev_i32_e32 v3, 31, v2
-; GFX9-NEXT: v_mov_b32_e32 v1, s3
-; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
-; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
-; GFX9-NEXT: global_load_ushort v0, v[0:1], off
+; GFX9-NEXT: v_lshlrev_b32_e32 v0, 1, v0
+; GFX9-NEXT: global_load_ushort v0, v0, s[2:3]
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_readfirstlane_b32 s0, v0
; GFX9-NEXT: ; return to shader part epilog
@@ -245,10 +223,9 @@ define amdgpu_ps i16 @extractelement_sgpr_v4i16_vgpr_idx(ptr addrspace(4) inreg
; GFX8-NEXT: v_and_b32_e32 v0, 3, v0
; GFX8-NEXT: v_lshlrev_b32_e32 v2, 1, v0
; GFX8-NEXT: v_mov_b32_e32 v0, s2
-; GFX8-NEXT: v_ashrrev_i32_e32 v3, 31, v2
; GFX8-NEXT: v_mov_b32_e32 v1, s3
; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2
-; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
+; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; GFX8-NEXT: flat_load_ushort v0, v[0:1]
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: v_readfirstlane_b32 s0, v0
@@ -257,10 +234,10 @@ define amdgpu_ps i16 @extractelement_sgpr_v4i16_vgpr_idx(ptr addrspace(4) inreg
; GFX7-LABEL: extractelement_sgpr_v4i16_vgpr_idx:
; GFX7: ; %bb.0:
; GFX7-NEXT: v_and_b32_e32 v0, 3, v0
-; GFX7-NEXT: v_lshlrev_b32_e32 v0, 1, v0
; GFX7-NEXT: s_mov_b32 s0, s2
; GFX7-NEXT: s_mov_b32 s1, s3
-; GFX7-NEXT: v_ashrrev_i32_e32 v1, 31, v0
+; GFX7-NEXT: v_lshlrev_b32_e32 v0, 1, v0
+; GFX7-NEXT: v_mov_b32_e32 v1, 0
; GFX7-NEXT: s_mov_b32 s2, 0
; GFX7-NEXT: s_mov_b32 s3, 0xf000
; GFX7-NEXT: buffer_load_ushort v0, v[0:1], s[0:3], 0 addr64
@@ -271,13 +248,8 @@ define amdgpu_ps i16 @extractelement_sgpr_v4i16_vgpr_idx(ptr addrspace(4) inreg
; GFX10-LABEL: extractelement_sgpr_v4i16_vgpr_idx:
; GFX10: ; %bb.0:
; GFX10-NEXT: v_and_b32_e32 v0, 3, v0
-; GFX10-NEXT: v_lshlrev_b32_e32 v2, 1, v0
-; GFX10-NEXT: v_mov_b32_e32 v0, s2
-; GFX10-NEXT: v_mov_b32_e32 v1, s3
-; GFX10-NEXT: v_ashrrev_i32_e32 v3, 31, v2
-; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
-; GFX10-NEXT: global_load_ushort v0, v[0:1], off
+; GFX10-NEXT: v_lshlrev_b32_e32 v0, 1, v0
+; GFX10-NEXT: global_load_ushort v0, v0, s[2:3]
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_readfirstlane_b32 s0, v0
; GFX10-NEXT: ; return to shader part epilog
@@ -285,14 +257,9 @@ define amdgpu_ps i16 @extractelement_sgpr_v4i16_vgpr_idx(ptr addrspace(4) inreg
; GFX11-LABEL: extractelement_sgpr_v4i16_vgpr_idx:
; GFX11: ; %bb.0:
; GFX11-NEXT: v_and_b32_e32 v0, 3, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_lshlrev_b32_e32 v2, 1, v0
-; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
-; GFX11-NEXT: v_ashrrev_i32_e32 v3, 31, v2
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
-; GFX11-NEXT: global_load_u16 v0, v[0:1], off
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_lshlrev_b32_e32 v0, 1, v0
+; GFX11-NEXT: global_load_u16 v0, v0, s[2:3]
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_readfirstlane_b32 s0, v0
; GFX11-NEXT: ; return to shader part epilog
@@ -686,11 +653,8 @@ define amdgpu_ps i16 @extractelement_sgpr_v8i16_sgpr_idx(ptr addrspace(4) inreg
; GFX9: ; %bb.0:
; GFX9-NEXT: s_and_b32 s0, s4, 7
; GFX9-NEXT: s_lshl_b32 s0, s0, 1
-; GFX9-NEXT: s_ashr_i32 s1, s0, 31
-; GFX9-NEXT: s_add_u32 s0, s2, s0
-; GFX9-NEXT: s_addc_u32 s1, s3, s1
-; GFX9-NEXT: v_mov_b32_e32 v0, 0
-; GFX9-NEXT: global_load_ushort v0, v0, s[0:1]
+; GFX9-NEXT: v_mov_b32_e32 v0, s0
+; GFX9-NEXT: global_load_ushort v0, v0, s[2:3]
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_readfirstlane_b32 s0, v0
; GFX9-NEXT: ; return to shader part epilog
@@ -699,9 +663,8 @@ define amdgpu_ps i16 @extractelement_sgpr_v8i16_sgpr_idx(ptr addrspace(4) inreg
; GFX8: ; %bb.0:
; GFX8-NEXT: s_and_b32 s0, s4, 7
; GFX8-NEXT: s_lshl_b32 s0, s0, 1
-; GFX8-NEXT: s_ashr_i32 s1, s0, 31
; GFX8-NEXT: s_add_u32 s0, s2, s0
-; GFX8-NEXT: s_addc_u32 s1, s3, s1
+; GFX8-NEXT: s_addc_u32 s1, s3, 0
; GFX8-NEXT: v_mov_b32_e32 v0, s0
; GFX8-NEXT: v_mov_b32_e32 v1, s1
; GFX8-NEXT: flat_load_ushort v0, v[0:1]
@@ -714,11 +677,11 @@ define amdgpu_ps i16 @extractelement_sgpr_v8i16_sgpr_idx(ptr addrspace(4) inreg
; GFX7-NEXT: s_mov_b32 s0, s2
; GFX7-NEXT: s_and_b32 s2, s4, 7
; GFX7-NEXT: s_lshl_b32 s4, s2, 1
-; GFX7-NEXT: s_ashr_i32 s5, s4, 31
+; GFX7-NEXT: s_mov_b32 s5, 0
; GFX7-NEXT: v_mov_b32_e32 v0, s4
; GFX7-NEXT: s_mov_b32 s1, s3
-; GFX7-NEXT: s_mov_b32 s2, 0
; GFX7-NEXT: s_mov_b32 s3, 0xf000
+; GFX7-NEXT: s_mov_b32 s2, s5
; GFX7-NEXT: v_mov_b32_e32 v1, s5
; GFX7-NEXT: buffer_load_ushort v0, v[0:1], s[0:3], 0 addr64
; GFX7-NEXT: s_waitcnt vmcnt(0)
@@ -728,12 +691,9 @@ define amdgpu_ps i16 @extractelement_sgpr_v8i16_sgpr_idx(ptr addrspace(4) inreg
; GFX10-LABEL: extractelement_sgpr_v8i16_sgpr_idx:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_and_b32 s0, s4, 7
-; GFX10-NEXT: v_mov_b32_e32 v0, 0
; GFX10-NEXT: s_lshl_b32 s0, s0, 1
-; GFX10-NEXT: s_ashr_i32 s1, s0, 31
-; GFX10-NEXT: s_add_u32 s0, s2, s0
-; GFX10-NEXT: s_addc_u32 s1, s3, s1
-; GFX10-NEXT: global_load_ushort v0, v0, s[0:1]
+; GFX10-NEXT: v_mov_b32_e32 v0, s0
+; GFX10-NEXT: global_load_ushort v0, v0, s[2:3]
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_readfirstlane_b32 s0, v0
; GFX10-NEXT: ; return to shader part epilog
@@ -741,13 +701,10 @@ define amdgpu_ps i16 @extractelement_sgpr_v8i16_sgpr_idx(ptr addrspace(4) inreg
; GFX11-LABEL: extractelement_sgpr_v8i16_sgpr_idx:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_and_b32 s0, s4, 7
-; GFX11-NEXT: v_mov_b32_e32 v0, 0
+; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GFX11-NEXT: s_lshl_b32 s0, s0, 1
-; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX11-NEXT: s_ashr_i32 s1, s0, 31
-; GFX11-NEXT: s_add_u32 s0, s2, s0
-; GFX11-NEXT: s_addc_u32 s1, s3, s1
-; GFX11-NEXT: global_load_u16 v0, v0, s[0:1]
+; GFX11-NEXT: v_mov_b32_e32 v0, s0
+; GFX11-NEXT: global_load_u16 v0, v0, s[2:3]
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_readfirstlane_b32 s0, v0
; GFX11-NEXT: ; return to shader part epilog
@@ -760,8 +717,8 @@ define amdgpu_ps i16 @extractelement_vgpr_v8i16_sgpr_idx(ptr addrspace(1) %ptr,
; GFX9-LABEL: extractelement_vgpr_v8i16_sgpr_idx:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_and_b32 s0, s2, 7
+; GFX9-NEXT: s_mov_b32 s1, 0
; GFX9-NEXT: s_lshl_b32 s0, s0, 1
-; GFX9-NEXT: s_ashr_i32 s1, s0, 31
; GFX9-NEXT: v_mov_b32_e32 v3, s1
; GFX9-NEXT: v_mov_b32_e32 v2, s0
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
@@ -774,8 +731,8 @@ define amdgpu_ps i16 @extractelement_vgpr_v8i16_sgpr_idx(ptr addrspace(1) %ptr,
; GFX8-LABEL: extractelement_vgpr_v8i16_sgpr_idx:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_and_b32 s0, s2, 7
+; GFX8-NEXT: s_mov_b32 s1, 0
; GFX8-NEXT: s_lshl_b32 s0, s0, 1
-; GFX8-NEXT: s_ashr_i32 s1, s0, 31
; GFX8-NEXT: v_mov_b32_e32 v3, s1
; GFX8-NEXT: v_mov_b32_e32 v2, s0
; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2
@@ -788,10 +745,10 @@ define amdgpu_ps i16 @extractelement_vgpr_v8i16_sgpr_idx(ptr addrspace(1) %ptr,
; GFX7-LABEL: extractelement_vgpr_v8i16_sgpr_idx:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_and_b32 s0, s2, 7
+; GFX7-NEXT: s_mov_b32 s1, 0
; GFX7-NEXT: s_lshl_b32 s0, s0, 1
-; GFX7-NEXT: s_ashr_i32 s1, s0, 31
-; GFX7-NEXT: s_mov_b32 s2, 0
; GFX7-NEXT: s_mov_b32 s3, 0xf000
+; GFX7-NEXT: s_mov_b32 s2, s1
; GFX7-NEXT: buffer_load_ushort v0, v[0:1], s[0:3], 0 addr64
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: v_readfirstlane_b32 s0, v0
@@ -800,8 +757,8 @@ define amdgpu_ps i16 @extractelement_vgpr_v8i16_sgpr_idx(ptr addrspace(1) %ptr,
; GFX10-LABEL: extractelement_vgpr_v8i16_sgpr_idx:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_and_b32 s0, s2, 7
+; GFX10-NEXT: s_mov_b32 s1, 0
; GFX10-NEXT: s_lshl_b32 s0, s0, 1
-; GFX10-NEXT: s_ashr_i32 s1, s0, 31
; GFX10-NEXT: v_mov_b32_e32 v3, s1
; GFX10-NEXT: v_mov_b32_e32 v2, s0
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
@@ -814,9 +771,8 @@ define amdgpu_ps i16 @extractelement_vgpr_v8i16_sgpr_idx(ptr addrspace(1) %ptr,
; GFX11-LABEL: extractelement_vgpr_v8i16_sgpr_idx:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_and_b32 s0, s2, 7
-; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-NEXT: s_mov_b32 s1, 0
; GFX11-NEXT: s_lshl_b32 s0, s0, 1
-; GFX11-NEXT: s_ashr_i32 s1, s0, 31
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
@@ -837,9 +793,8 @@ define i16 @extractelement_vgpr_v8i16_vgpr_idx(ptr addrspace(1) %ptr, i32 %idx)
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_and_b32_e32 v2, 7, v2
; GFX9-NEXT: v_lshlrev_b32_e32 v2, 1, v2
-; GFX9-NEXT: v_ashrrev_i32_e32 v3, 31, v2
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
-; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX9-NEXT: global_load_ushort v0, v[0:1], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -849,9 +804,8 @@ define i16 @extractelement_vgpr_v8i16_vgpr_idx(ptr addrspace(1) %ptr, i32 %idx)
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_and_b32_e32 v2, 7, v2
; GFX8-NEXT: v_lshlrev_b32_e32 v2, 1, v2
-; GFX8-NEXT: v_ashrrev_i32_e32 v3, 31, v2
; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2
-; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
+; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; GFX8-NEXT: flat_load_ushort v0, v[0:1]
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: s_setpc_b64 s[30:31]
@@ -861,9 +815,8 @@ define i16 @extractelement_vgpr_v8i16_vgpr_idx(ptr addrspace(1) %ptr, i32 %idx)
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: v_and_b32_e32 v2, 7, v2
; GFX7-NEXT: v_lshlrev_b32_e32 v2, 1, v2
-; GFX7-NEXT: v_ashrrev_i32_e32 v3, 31, v2
; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v2
-; GFX7-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
+; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; GFX7-NEXT: s_mov_b32 s6, 0
; GFX7-NEXT: s_mov_b32 s7, 0xf000
; GFX7-NEXT: s_mov_b64 s[4:5], 0
@@ -876,9 +829,8 @@ define i16 @extractelement_vgpr_v8i16_vgpr_idx(ptr addrspace(1) %ptr, i32 %idx)
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_and_b32_e32 v2, 7, v2
; GFX10-NEXT: v_lshlrev_b32_e32 v2, 1, v2
-; GFX10-NEXT: v_ashrrev_i32_e32 v3, 31, v2
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX10-NEXT: global_load_ushort v0, v[0:1], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
@@ -889,10 +841,8 @@ define i16 @extractelement_vgpr_v8i16_vgpr_idx(ptr addrspace(1) %ptr, i32 %idx)
; GFX11-NEXT: v_and_b32_e32 v2, 7, v2
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_lshlrev_b32_e32 v2, 1, v2
-; GFX11-NEXT: v_ashrrev_i32_e32 v3, 31, v2
; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX11-NEXT: global_load_u16 v0, v[0:1], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
@@ -905,13 +855,8 @@ define amdgpu_ps i16 @extractelement_sgpr_v8i16_vgpr_idx(ptr addrspace(4) inreg
; GFX9-LABEL: extractelement_sgpr_v8i16_vgpr_idx:
; GFX9: ; %bb.0:
; GFX9-NEXT: v_and_b32_e32 v0, 7, v0
-; GFX9-NEXT: v_lshlrev_b32_e32 v2, 1, v0
-; GFX9-NEXT: v_mov_b32_e32 v0, s2
-; GFX9-NEXT: v_ashrrev_i32_e32 v3, 31, v2
-; GFX9-NEXT: v_mov_b32_e32 v1, s3
-; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
-; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
-; GFX9-NEXT: global_load_ushort v0, v[0:1], off
+; GFX9-NEXT: v_lshlrev_b32_e32 v0, 1, v0
+; GFX9-NEXT: global_load_ushort v0, v0, s[2:3]
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_readfirstlane_b32 s0, v0
; GFX9-NEXT: ; return to shader part epilog
@@ -921,10 +866,9 @@ define amdgpu_ps i16 @extractelement_sgpr_v8i16_vgpr_idx(ptr addrspace(4) inreg
; GFX8-NEXT: v_and_b32_e32 v0, 7, v0
; GFX8-NEXT: v_lshlrev_b32_e32 v2, 1, v0
; GFX8-NEXT: v_mov_b32_e32 v0, s2
-; GFX8-NEXT: v_ashrrev_i32_e32 v3, 31, v2
; GFX8-NEXT: v_mov_b32_e32 v1, s3
; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2
-; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
+; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; GFX8-NEXT: flat_load_ushort v0, v[0:1]
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: v_readfirstlane_b32 s0, v0
@@ -933,10 +877,10 @@ define amdgpu_ps i16 @extractelement_sgpr_v8i16_vgpr_idx(ptr addrspace(4) inreg
; GFX7-LABEL: extractelement_sgpr_v8i16_vgpr_idx:
; GFX7: ; %bb.0:
; GFX7-NEXT: v_and_b32_e32 v0, 7, v0
-; GFX7-NEXT: v_lshlrev_b32_e32 v0, 1, v0
; GFX7-NEXT: s_mov_b32 s0, s2
; GFX7-NEXT: s_mov_b32 s1, s3
-; GFX7-NEXT: v_ashrrev_i32_e32 v1, 31, v0
+; GFX7-NEXT: v_lshlrev_b32_e32 v0, 1, v0
+; GFX7-NEXT: v_mov_b32_e32 v1, 0
; GFX7-NEXT: s_mov_b32 s2, 0
; GFX7-NEXT: s_mov_b32 s3, 0xf000
; GFX7-NEXT: buffer_load_ushort v0, v[0:1], s[0:3], 0 addr64
@@ -947,13 +891,8 @@ define amdgpu_ps i16 @extractelement_sgpr_v8i16_vgpr_idx(ptr addrspace(4) inreg
; GFX10-LABEL: extractelement_sgpr_v8i16_vgpr_idx:
; GFX10: ; %bb.0:
; GFX10-NEXT: v_and_b32_e32 v0, 7, v0
-; GFX10-NEXT: v_lshlrev_b32_e32 v2, 1, v0
-; GFX10-NEXT: v_mov_b32_e32 v0, s2
-; GFX10-NEXT: v_mov_b32_e32 v1, s3
-; GFX10-NEXT: v_ashrrev_i32_e32 v3, 31, v2
-; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
-; GFX10-NEXT: global_load_ushort v0, v[0:1], off
+; GFX10-NEXT: v_lshlrev_b32_e32 v0, 1, v0
+; GFX10-NEXT: global_load_ushort v0, v0, s[2:3]
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_readfirstlane_b32 s0, v0
; GFX10-NEXT: ; return to shader part epilog
@@ -961,14 +900,9 @@ define amdgpu_ps i16 @extractelement_sgpr_v8i16_vgpr_idx(ptr addrspace(4) inreg
; GFX11-LABEL: extractelement_sgpr_v8i16_vgpr_idx:
; GFX11: ; %bb.0:
; GFX11-NEXT: v_and_b32_e32 v0, 7, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_lshlrev_b32_e32 v2, 1, v0
-; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
-; GFX11-NEXT: v_ashrrev_i32_e32 v3, 31, v2
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
-; GFX11-NEXT: global_load_u16 v0, v[0:1], off
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_lshlrev_b32_e32 v0, 1, v0
+; GFX11-NEXT: global_load_u16 v0, v0, s[2:3]
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_readfirstlane_b32 s0, v0
; GFX11-NEXT: ; return to shader part epilog
>From d6025ab4ce85c834d7a461e45d51b0132b28bd30 Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad at amd.com>
Date: Fri, 8 Mar 2024 12:08:10 +0000
Subject: [PATCH 4/4] clang-format
---
llvm/lib/CodeGen/MachineVerifier.cpp | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp
index c29daf7e68a643..cdb6cc63feffc1 100644
--- a/llvm/lib/CodeGen/MachineVerifier.cpp
+++ b/llvm/lib/CodeGen/MachineVerifier.cpp
@@ -1306,7 +1306,8 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
unsigned AS = PtrTy.getAddressSpace();
unsigned IndexSizeInBits = DL.getIndexSize(AS) * 8;
if (OffsetTy.getScalarSizeInBits() != IndexSizeInBits)
- report("gep offset operand must match index size for address space", MI);
+ report("gep offset operand must match index size for address space",
+ MI);
}
// TODO: Is the offset allowed to be a scalar with a vector?
More information about the llvm-commits
mailing list