[clang] [llvm] AMDGPU: Add builtin/intrinsic global_(load|store)_b128 (PR #172090)

via llvm-commits llvm-commits at lists.llvm.org
Fri Dec 12 13:58:48 PST 2025


https://github.com/macurtis-amd created https://github.com/llvm/llvm-project/pull/172090

Add clang builtins and associated llvm intrinsics for scoped load/store of 128bits

New builtins:
1. `__builtin_amdgcn_global_load_b128` ([documentation](https://github.com/macurtis-amd/llvm-project/blob/global-load-store-b128/clang/docs/LanguageExtensions.rst#__builtin_amdgcn_global_load_b128-and-__builtin_amdgcn_global_store_b128), [test/examples](https://github.com/macurtis-amd/llvm-project/blob/global-load-store-b128/clang/test/CodeGenOpenCL/builtins-amdgcn-global-load-store.cl))
2. `__builtin_amdgcn_global_store_b128` ([documentation](https://github.com/macurtis-amd/llvm-project/blob/global-load-store-b128/clang/docs/LanguageExtensions.rst#__builtin_amdgcn_global_load_b128-and-__builtin_amdgcn_global_store_b128), [test/examples](https://github.com/macurtis-amd/llvm-project/blob/global-load-store-b128/clang/test/CodeGenOpenCL/builtins-amdgcn-global-load-store.cl))

And corresponding intrinsics:
1. `llvm.amdgcn.global.load.b128` ([documentation](https://github.com/macurtis-amd/llvm-project/blob/global-load-store-b128/llvm/docs/AMDGPUUsage.rst) - search for intrinsic name, [test/examples](https://github.com/macurtis-amd/llvm-project/blob/global-load-store-b128/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.load.b128.ll) )
2. `llvm.amdgcn.global.store.b128` ([documentation](https://github.com/macurtis-amd/llvm-project/blob/global-load-store-b128/llvm/docs/AMDGPUUsage.rst) - search for intrinsic name, [test/examples](https://github.com/macurtis-amd/llvm-project/blob/global-load-store-b128/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.load.b128.ll) )

These will initially be used by [RCCL](https://github.com/ROCm/rccl) to address some low-level performance issues.

>From 9bb8bb30a20f3d5267d80254b935ae45266caf19 Mon Sep 17 00:00:00 2001
From: "Curtis, Matthew" <Matthew.Curtis at amd.com>
Date: Thu, 16 Oct 2025 05:26:52 -0500
Subject: [PATCH] AMDGPU: Add builtin/intrinsic global_(load|store)_b128

---
 clang/docs/LanguageExtensions.rst             |    37 +
 clang/include/clang/Basic/BuiltinsAMDGPU.def  |     3 +
 clang/include/clang/Sema/SemaAMDGPU.h         |     2 +
 clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp   |    20 +
 clang/lib/Sema/SemaAMDGPU.cpp                 |    16 +
 .../builtins-amdgcn-global-load-store.cl      |   113 +
 ...builtins-amdgcn-global-load-store-error.cl |    22 +
 ...s-amdgcn-global-load-store-target-error.cl |    26 +
 llvm/docs/AMDGPUUsage.rst                     |   106 +
 .../CodeGen/GlobalISel/GIMatchTableExecutor.h |     6 +
 .../GlobalISel/GIMatchTableExecutorImpl.h     |     9 +
 llvm/include/llvm/IR/IntrinsicsAMDGPU.td      |    25 +
 llvm/lib/IR/Verifier.cpp                      |    33 +-
 .../Target/AMDGPU/AMDGPURegisterBankInfo.cpp  |     3 +
 llvm/lib/Target/AMDGPU/FLATInstructions.td    |    15 +
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp     |    22 +
 .../AMDGPU/llvm.amdgcn.global.load.b128.ll    | 30869 ++++++++++++++++
 .../AMDGPU/llvm.amdgcn.global.store.b128.ll   |  3888 ++
 .../CodeGen/AMDGPU/unsupported-global-load.ll |    36 +
 .../AMDGPU/unsupported-global-store.ll        |    36 +
 llvm/test/Verifier/amdgpu-intrinsics.ll       |    66 +
 .../GlobalISel/GlobalISelMatchTable.cpp       |    17 +
 .../Common/GlobalISel/GlobalISelMatchTable.h  |    18 +
 23 files changed, 35385 insertions(+), 3 deletions(-)
 create mode 100644 clang/test/CodeGenOpenCL/builtins-amdgcn-global-load-store.cl
 create mode 100644 clang/test/SemaOpenCL/builtins-amdgcn-global-load-store-error.cl
 create mode 100644 clang/test/SemaOpenCL/builtins-amdgcn-global-load-store-target-error.cl
 create mode 100644 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.load.b128.ll
 create mode 100644 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.store.b128.ll
 create mode 100644 llvm/test/CodeGen/AMDGPU/unsupported-global-load.ll
 create mode 100644 llvm/test/CodeGen/AMDGPU/unsupported-global-store.ll
 create mode 100644 llvm/test/Verifier/amdgpu-intrinsics.ll

diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst
index c4b86b203d383..4d4d6ca3fe0bd 100644
--- a/clang/docs/LanguageExtensions.rst
+++ b/clang/docs/LanguageExtensions.rst
@@ -5243,6 +5243,43 @@ returns the bit at the position of the current lane. It is almost equivalent to
 ``(mask & (1 << lane_id)) != 0``, except that its behavior is only defined if
 the given mask has the same value for all active lanes of the current wave.
 
+
+__builtin_amdgcn_global_load_b128 and __builtin_amdgcn_global_store_b128
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Signature:
+
+.. code-block:: c
+
+    typedef __attribute__((__vector_size__(4 * sizeof(unsigned int)))) unsigned int v4u;
+    typedef v4u __attribute__((address_space(1))) *global_ptr_to_v4u;
+
+    v4u __builtin_amdgcn_global_load_b128(
+       v4u __attribute__((address_space(1))) *src,
+       const char                            *scope);
+
+    void __builtin_amdgcn_global_store_b128(
+       v4u __attribute__((address_space(1))) *dst,
+       v4u                                    data,
+       const char                            *scope);
+
+Load or store a vector of 4 unsigned integers from or to global memory with
+cache behavior specified by `scope` which must be a string literal.
+
+Valid values for `scope` are:
+
+* ``"wavefront"``       
+* ``"workgroup"``       
+* ``"agent"``           
+* ``""`` (empty string) 
+
+These builtins are supported on gfx9, gfx10, gfx11, and gfx12 targets.
+
+They map to the llvm intrinsics ``llvm.amdgcn.global.load.b128`` and
+``llvm.amdgcn.global.store.b128`` documented in `User Guide for AMDGPU Backend
+<https://llvm.org/docs/AMDGPUUsage.html>`_.
+
+
 ARM/AArch64 Language Extensions
 -------------------------------
 
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index a867144d83928..4bc5b1c16f2ad 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -190,6 +190,9 @@ TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64, "ddQbiiIi", "",
 TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_load_lds, "vQbv*3IUiiiIiIi", "", "vmem-to-lds-load-insts")
 TARGET_BUILTIN(__builtin_amdgcn_struct_ptr_buffer_load_lds, "vQbv*3IUiiiiIiIi", "", "vmem-to-lds-load-insts")
 
+TARGET_BUILTIN(__builtin_amdgcn_global_load_b128, "V4UiV4Ui*1cC*", "n", "gfx9-insts")
+TARGET_BUILTIN(__builtin_amdgcn_global_store_b128, "vV4Ui*1V4UicC*", "n", "gfx9-insts")
+
 //===----------------------------------------------------------------------===//
 // Ballot builtins.
 //===----------------------------------------------------------------------===//
diff --git a/clang/include/clang/Sema/SemaAMDGPU.h b/clang/include/clang/Sema/SemaAMDGPU.h
index bac812a9d4fcf..556bfb705de67 100644
--- a/clang/include/clang/Sema/SemaAMDGPU.h
+++ b/clang/include/clang/Sema/SemaAMDGPU.h
@@ -28,6 +28,8 @@ class SemaAMDGPU : public SemaBase {
 
   bool checkCoopAtomicFunctionCall(CallExpr *TheCall, bool IsStore);
 
+  bool checkScopedMemAccessFunctionCall(CallExpr *TheCall);
+
   bool checkMovDPPFunctionCall(CallExpr *TheCall, unsigned NumArgs,
                                unsigned NumDataArgs);
 
diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
index eabdc370da6b4..384f76e092252 100644
--- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
@@ -885,6 +885,26 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
     llvm::Function *F = CGM.getIntrinsic(IID, {Args[0]->getType()});
     return Builder.CreateCall(F, {Args});
   }
+  case AMDGPU::BI__builtin_amdgcn_global_load_b128:
+  case AMDGPU::BI__builtin_amdgcn_global_store_b128: {
+    const bool IsStore =
+        BuiltinID == AMDGPU::BI__builtin_amdgcn_global_store_b128;
+    LLVMContext &Ctx = CGM.getLLVMContext();
+    SmallVector<Value *, 5> Args = {EmitScalarExpr(E->getArg(0))}; // addr
+    if (IsStore)
+      Args.push_back(EmitScalarExpr(E->getArg(1))); // data
+    const unsigned ScopeIdx = E->getNumArgs() - 1;
+    StringRef ScopeLit =
+        cast<StringLiteral>(E->getArg(ScopeIdx)->IgnoreParenCasts())
+            ->getString();
+    llvm::MDNode *MD =
+        llvm::MDNode::get(Ctx, {llvm::MDString::get(Ctx, ScopeLit)});
+    Args.push_back(llvm::MetadataAsValue::get(Ctx, MD)); // scope
+    llvm::Function *F =
+        CGM.getIntrinsic(IsStore ? Intrinsic::amdgcn_global_store_b128
+                                 : Intrinsic::amdgcn_global_load_b128);
+    return Builder.CreateCall(F, Args);
+  }
   case AMDGPU::BI__builtin_amdgcn_get_fpenv: {
     Function *F = CGM.getIntrinsic(Intrinsic::get_fpenv,
                                    {llvm::Type::getInt64Ty(getLLVMContext())});
diff --git a/clang/lib/Sema/SemaAMDGPU.cpp b/clang/lib/Sema/SemaAMDGPU.cpp
index cece22092bb14..72c7bf03f93ad 100644
--- a/clang/lib/Sema/SemaAMDGPU.cpp
+++ b/clang/lib/Sema/SemaAMDGPU.cpp
@@ -255,6 +255,9 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID,
            (SemaRef.BuiltinConstantArg(TheCall, ArgCount, Result)) ||
            (SemaRef.BuiltinConstantArg(TheCall, (ArgCount - 1), Result));
   }
+  case AMDGPU::BI__builtin_amdgcn_global_load_b128:
+  case AMDGPU::BI__builtin_amdgcn_global_store_b128:
+    return checkScopedMemAccessFunctionCall(TheCall);
   default:
     return false;
   }
@@ -344,6 +347,19 @@ bool SemaAMDGPU::checkCoopAtomicFunctionCall(CallExpr *TheCall, bool IsStore) {
   return Fail;
 }
 
+bool SemaAMDGPU::checkScopedMemAccessFunctionCall(CallExpr *TheCall) {
+  bool Fail = false;
+  // Last argument is a string literal
+  Expr *Arg = TheCall->getArg(TheCall->getNumArgs() - 1);
+  auto Scope = dyn_cast<StringLiteral>(Arg->IgnoreParenCasts());
+  if (!Scope) {
+    Fail = true;
+    Diag(TheCall->getBeginLoc(), diag::err_expr_not_string_literal)
+        << Arg->getSourceRange();
+  }
+  return Fail;
+}
+
 bool SemaAMDGPU::checkMovDPPFunctionCall(CallExpr *TheCall, unsigned NumArgs,
                                          unsigned NumDataArgs) {
   assert(NumDataArgs <= 2);
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-global-load-store.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-global-load-store.cl
new file mode 100644
index 0000000000000..7ffceead747e8
--- /dev/null
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-global-load-store.cl
@@ -0,0 +1,113 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals smart
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx950         -emit-llvm -o - %s | FileCheck %s -check-prefixes=GFX,GFX950
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx9-4-generic -emit-llvm -o - %s | FileCheck %s -check-prefixes=GFX,GFX9_4_GENERIC
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1250        -emit-llvm -o - %s | FileCheck %s -check-prefixes=GFX,GFX1250
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx12-generic  -emit-llvm -o - %s | FileCheck %s -check-prefixes=GFX,GFX12_GENERIC
+
+typedef __attribute__((__vector_size__(4 * sizeof(unsigned int)))) unsigned int v4u32;
+typedef v4u32 __global *global_ptr_to_v4u32;
+
+//------------------------------------------------------------------------------
+// Store
+//------------------------------------------------------------------------------
+// GFX-LABEL: @test_amdgcn_global_store_b128_00(
+// GFX-NEXT:  entry:
+// GFX-NEXT:    tail call void @llvm.amdgcn.global.store.b128(ptr addrspace(1) [[PTR:%.*]], <4 x i32> [[DATA:%.*]], metadata [[META4:![0-9]+]])
+// GFX-NEXT:    ret void
+//
+void test_amdgcn_global_store_b128_00(global_ptr_to_v4u32 ptr, v4u32 data) {
+  __builtin_amdgcn_global_store_b128(ptr, data, "wavefront");
+}
+
+// GFX-LABEL: @test_amdgcn_global_store_b128_01(
+// GFX-NEXT:  entry:
+// GFX-NEXT:    tail call void @llvm.amdgcn.global.store.b128(ptr addrspace(1) [[PTR:%.*]], <4 x i32> [[DATA:%.*]], metadata [[META5:![0-9]+]])
+// GFX-NEXT:    ret void
+//
+void test_amdgcn_global_store_b128_01(global_ptr_to_v4u32 ptr, v4u32 data) {
+  __builtin_amdgcn_global_store_b128(ptr, data, "workgroup");
+}
+
+// GFX-LABEL: @test_amdgcn_global_store_b128_10(
+// GFX-NEXT:  entry:
+// GFX-NEXT:    tail call void @llvm.amdgcn.global.store.b128(ptr addrspace(1) [[PTR:%.*]], <4 x i32> [[DATA:%.*]], metadata [[META6:![0-9]+]])
+// GFX-NEXT:    ret void
+//
+void test_amdgcn_global_store_b128_10(global_ptr_to_v4u32 ptr, v4u32 data) {
+  __builtin_amdgcn_global_store_b128(ptr, data, "agent");
+}
+
+// GFX-LABEL: @test_amdgcn_global_store_b128_11(
+// GFX-NEXT:  entry:
+// GFX-NEXT:    tail call void @llvm.amdgcn.global.store.b128(ptr addrspace(1) [[PTR:%.*]], <4 x i32> [[DATA:%.*]], metadata [[META7:![0-9]+]])
+// GFX-NEXT:    ret void
+//
+void test_amdgcn_global_store_b128_11(global_ptr_to_v4u32 ptr, v4u32 data) {
+  __builtin_amdgcn_global_store_b128(ptr, data, "");
+}
+
+//------------------------------------------------------------------------------
+// Load
+//------------------------------------------------------------------------------
+// GFX-LABEL: @test_amdgcn_global_load_b128_00(
+// GFX-NEXT:  entry:
+// GFX-NEXT:    [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) [[PTR:%.*]], metadata [[META4]])
+// GFX-NEXT:    ret <4 x i32> [[TMP0]]
+//
+v4u32 test_amdgcn_global_load_b128_00(global_ptr_to_v4u32 ptr) {
+  return __builtin_amdgcn_global_load_b128(ptr, "wavefront");
+}
+
+// GFX-LABEL: @test_amdgcn_global_load_b128_01(
+// GFX-NEXT:  entry:
+// GFX-NEXT:    [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) [[PTR:%.*]], metadata [[META5]])
+// GFX-NEXT:    ret <4 x i32> [[TMP0]]
+//
+v4u32 test_amdgcn_global_load_b128_01(global_ptr_to_v4u32 ptr) {
+  return __builtin_amdgcn_global_load_b128(ptr, "workgroup");
+}
+
+// GFX-LABEL: @test_amdgcn_global_load_b128_10(
+// GFX-NEXT:  entry:
+// GFX-NEXT:    [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) [[PTR:%.*]], metadata [[META6]])
+// GFX-NEXT:    ret <4 x i32> [[TMP0]]
+//
+v4u32 test_amdgcn_global_load_b128_10(global_ptr_to_v4u32 ptr) {
+  return __builtin_amdgcn_global_load_b128(ptr, "agent");
+}
+
+// GFX-LABEL: @test_amdgcn_global_load_b128_11(
+// GFX-NEXT:  entry:
+// GFX-NEXT:    [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) [[PTR:%.*]], metadata [[META7]])
+// GFX-NEXT:    ret <4 x i32> [[TMP0]]
+//
+v4u32 test_amdgcn_global_load_b128_11(global_ptr_to_v4u32 ptr) {
+  return __builtin_amdgcn_global_load_b128(ptr, "");
+}
+//.
+// GFX950: [[META4]] = !{!"wavefront"}
+// GFX950: [[META5]] = !{!"workgroup"}
+// GFX950: [[META6]] = !{!"agent"}
+// GFX950: [[META7]] = !{!""}
+//.
+// GFX9_4_GENERIC: [[META4]] = !{!"wavefront"}
+// GFX9_4_GENERIC: [[META5]] = !{!"workgroup"}
+// GFX9_4_GENERIC: [[META6]] = !{!"agent"}
+// GFX9_4_GENERIC: [[META7]] = !{!""}
+//.
+// GFX1250: [[META4]] = !{!"wavefront"}
+// GFX1250: [[META5]] = !{!"workgroup"}
+// GFX1250: [[META6]] = !{!"agent"}
+// GFX1250: [[META7]] = !{!""}
+//.
+// GFX12_GENERIC: [[META4]] = !{!"wavefront"}
+// GFX12_GENERIC: [[META5]] = !{!"workgroup"}
+// GFX12_GENERIC: [[META6]] = !{!"agent"}
+// GFX12_GENERIC: [[META7]] = !{!""}
+//.
+//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+// GFX1250: {{.*}}
+// GFX12_GENERIC: {{.*}}
+// GFX950: {{.*}}
+// GFX9_4_GENERIC: {{.*}}
diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-global-load-store-error.cl b/clang/test/SemaOpenCL/builtins-amdgcn-global-load-store-error.cl
new file mode 100644
index 0000000000000..b21b604baa944
--- /dev/null
+++ b/clang/test/SemaOpenCL/builtins-amdgcn-global-load-store-error.cl
@@ -0,0 +1,22 @@
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx950         -S -verify -o - %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx9-4-generic -S -verify -o - %s
+// REQUIRES: amdgpu-registered-target
+
+typedef __attribute__((__vector_size__(4 * sizeof(unsigned int)))) unsigned int v4u32;
+typedef v4u32 __global *global_ptr_to_v4u32;
+
+void test_amdgcn_global_store_b128_00(v4u32 *ptr, v4u32 data, const char* scope) {
+  __builtin_amdgcn_global_store_b128(ptr, data, "");  //expected-error{{passing '__private v4u32 *__private' to parameter of type '__attribute__((__vector_size__(4 * sizeof(unsigned int)))) unsigned int __global *' changes address space of pointer}}
+}
+
+void test_amdgcn_global_store_b128_01(global_ptr_to_v4u32 ptr, v4u32 data, const char* scope) {
+  __builtin_amdgcn_global_store_b128(ptr, data, scope);  //expected-error{{expression is not a string literal}}
+}
+
+v4u32 test_amdgcn_global_load_b128_00(v4u32 *ptr, const char* scope) {
+  return __builtin_amdgcn_global_load_b128(ptr, "");  //expected-error{{passing '__private v4u32 *__private' to parameter of type '__attribute__((__vector_size__(4 * sizeof(unsigned int)))) unsigned int __global *' changes address space of pointer}}
+}
+
+v4u32 test_amdgcn_global_load_b128_01(global_ptr_to_v4u32 ptr, const char* scope) {
+  return __builtin_amdgcn_global_load_b128(ptr, scope);  //expected-error{{expression is not a string literal}}
+}
diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-global-load-store-target-error.cl b/clang/test/SemaOpenCL/builtins-amdgcn-global-load-store-target-error.cl
new file mode 100644
index 0000000000000..ec357c58ef903
--- /dev/null
+++ b/clang/test/SemaOpenCL/builtins-amdgcn-global-load-store-target-error.cl
@@ -0,0 +1,26 @@
+// We test loads and stores separately because clang only seems to exit after
+// the first 'target feature' error.
+
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx602 -DTEST_LOAD  -S -verify -o - %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx705 -DTEST_LOAD  -S -verify -o - %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx810 -DTEST_LOAD  -S -verify -o - %s
+
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx602 -DTEST_STORE -S -verify -o - %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx705 -DTEST_STORE -S -verify -o - %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx810 -DTEST_STORE -S -verify -o - %s
+// REQUIRES: amdgpu-registered-target
+
+typedef __attribute__((__vector_size__(4 * sizeof(unsigned int)))) unsigned int v4u32;
+typedef v4u32 __global *global_ptr_to_v4u32;
+
+#ifdef TEST_LOAD
+v4u32 test_amdgcn_global_load_b128_01(global_ptr_to_v4u32 ptr, const char* scope) {
+  return __builtin_amdgcn_global_load_b128(ptr, ""); // expected-error{{'__builtin_amdgcn_global_load_b128' needs target feature gfx9-insts}}
+}
+#endif
+
+#ifdef TEST_STORE
+void test_amdgcn_global_store_b128_01(global_ptr_to_v4u32 ptr, v4u32 data, const char* scope) {
+  __builtin_amdgcn_global_store_b128(ptr, data, ""); // expected-error{{'__builtin_amdgcn_global_store_b128' needs target feature gfx9-insts}}
+}
+#endif
diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst
index 7ecf1c1124894..39afd29737156 100644
--- a/llvm/docs/AMDGPUUsage.rst
+++ b/llvm/docs/AMDGPUUsage.rst
@@ -1596,6 +1596,112 @@ The AMDGPU backend implements the following LLVM IR intrinsics.
                                                    * 1 - Data cache.
 
                                                    Instruction cache prefetches are unsafe on invalid address.
+
+  llvm.amdgcn.global.load.b128                     This intrinsic is supported on gfx9, gfx10, gfx11, and gfx12 targets.
+  
+                                                   Signature:
+                                                   
+                                                   .. code-block:: llvm
+                                                      
+                                                      <4 x i32> @llvm.amdgcn.global.load.b128(
+                                                          ptr addrspace(1), ; source
+                                                          metadata)         ; scope    - e.g. '!0' where '!0 = !{!"wavegroup"}'
+
+                                                   Reads the value from the source address with cache behavior specified by the scope.
+
+                                                   The following table shows the mapping between valid scope values and target
+                                                   instruction flags or field values.
+
+                                                   ============== ========================== ========================== ========================== ========================== ==========================
+                                                   targets        instruction                           ``"wavefront"``            ``"workgroup"``                ``"agent"``      ``""`` (empty string)
+                                                   ============== ========================== ========================== ========================== ========================== ==========================
+                                                   gfx90*         ``global_load_dwordx4``                                                                             ``glc``                    ``glc``
+                                                   
+                                                   gfx942, gfx950 ``global_load_dwordx4``                        (wave)            ``sc0`` (group)           ``sc1`` (device)       ``sc0 sc1`` (system)
+                                                   
+                                                   gfx10*         ``global_load_dwordx4``                                                  ``glc``                ``glc dlc``                ``glc dlc``
+                                                   
+                                                   gfx11*         ``global_load_dwordx4``                                                  ``glc``                    ``glc``                    ``glc``
+                                                   
+                                                   gfx120*        ``global_load_b128``                             (CU)    ``scope:SCOPE_SE`` (SE)  ``scope:SCOPE_DEV`` (DEV)  ``scope:SCOPE_SYS`` (SYS)
+                                                   
+                                                   gfx125*        ``global_load_b128``                             (CU)                             ``scope:SCOPE_DEV`` (DEV)  ``scope:SCOPE_SYS`` (SYS)
+                                                   ============== ========================== ========================== ========================== ========================== ==========================
+                                                   
+                                                   For gfx90*, see "GLC Bit Explained" in the appropriate instruction set reference
+                                                   (e.g. Chapter 9.1.10 in "AMD Instinct MI100" Instruction Set Architecture Reference
+                                                   Guide).
+                                                   
+                                                   For gfx942 and gfx950 targets, see "Memory Scope and Temporal Controls" in the
+                                                   appropriate instruction set reference (e.g. Chapter 9.1.10.2 in the "AMD Instinct
+                                                   MI300" Instruction Set Architecture Reference Guide).
+
+                                                   For gfx10* targets, see "GLC, DLC and SLC Bit Explained" in the appropriate
+                                                   instruction set reference (e.g. Chapter 8.1.10 in "RDNA 2" Instruction Set Architecture
+                                                   Reference Guide)
+                                                   
+                                                   For gfx11* targets, see "Cache Controls: SLC, GLC and DLC" in the appropriate
+                                                   instruction set reference (e.g. Chapter 4.1.1 in "RDNA3" Instruction Set Architecture
+                                                   Reference Guide).
+                                                   
+                                                   For gfx12* targets, see "Cache Controls: SCOPE and Temporal-Hint" in the
+                                                   appropriate instruction set reference (e.g. Chapter 4.1.1 in the "RDNA4"
+                                                   Instruction Set Architecture Reference Guide).
+
+                                                                                                      
+  llvm.amdgcn.global.store.b128                    This intrinsic is supported on gfx9, gfx10, gfx11, and gfx12 targets.
+  
+                                                   Signature:
+                                                   
+                                                   .. code-block:: llvm
+                                                      
+                                                      void @llvm.amdgcn.global.store.b128(
+                                                          ptr addrspace(1), ; destination
+                                                          <4 x i32>,        ; value
+                                                          metadata)         ; scope    - e.g. '!0' where '!0 = !{!"wavegroup"}'
+
+                                                   Writes the value to the destination address with cache
+                                                   behavior specified by the scope.
+
+                                                   The following table shows the mapping between valid scope values and target
+                                                   instruction flags or field values.
+
+                                                   ============== ========================== ========================== ========================== ========================== ==========================
+                                                   targets        instruction                           ``"wavefront"``            ``"workgroup"``                ``"agent"``      ``""`` (empty string)
+                                                   ============== ========================== ========================== ========================== ========================== ==========================
+                                                   gfx90*         ``global_store_dwordx4``                                                                            
+                                                   
+                                                   gfx942, gfx950 ``global_store_dwordx4``                       (wave)            ``sc0`` (group)           ``sc1`` (device)       ``sc0 sc1`` (system)
+                                                   
+                                                   gfx10*         ``global_store_dwordx4``
+                                                   
+                                                   gfx11*         ``global_store_dwordx4``
+                                                   
+                                                   gfx120*        ``global_store_b128``                            (CU)    ``scope:SCOPE_SE`` (SE)  ``scope:SCOPE_DEV`` (DEV)  ``scope:SCOPE_SYS`` (SYS)
+                                                   
+                                                   gfx125*        ``global_store_b128``                            (CU)                             ``scope:SCOPE_DEV`` (DEV)  ``scope:SCOPE_SYS`` (SYS)
+                                                   ============== ========================== ========================== ========================== ========================== ==========================
+
+                                                   For gfx90*, see "GLC Bit Explained" in the appropriate instruction set reference
+                                                   (e.g. Chapter 9.1.10 in "AMD Instinct MI100" Instruction Set Architecture Reference
+                                                   Guide).
+                                                   
+                                                   For gfx942 and gfx950 targets, see "Memory Scope and Temporal Controls" in the
+                                                   appropriate instruction set reference (e.g. Chapter 9.1.10.2 in the "AMD Instinct
+                                                   MI300" Instruction Set Architecture Reference Guide).
+
+                                                   For gfx10* targets, see "GLC, DLC and SLC Bit Explained" in the appropriate
+                                                   instruction set reference (e.g. Chapter 8.1.10 in "RDNA 2" Instruction Set
+                                                   Architecture Reference Guide)
+                                                   
+                                                   For gfx11* targets, see "Cache Controls: SLC, GLC and DLC" in the appropriate
+                                                   instruction set reference (e.g. Chapter 4.1.1 in "RDNA3" Instruction Set
+                                                   Architecture Reference Guide).
+                                                   
+                                                   For gfx12* targets, see "Cache Controls: SCOPE and Temporal-Hint" in the
+                                                   appropriate instruction set reference (e.g. Chapter 4.1.1 in the "RDNA4"
+                                                   Instruction Set Architecture Reference Guide).
+                                                                   
   ==============================================   ==========================================================
 
 .. TODO::
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h b/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h
index 3a2509345b776..f21923827039c 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h
@@ -246,6 +246,12 @@ enum {
   /// - SizeInBits(ULEB128) - The size of the pointer value in bits.
   GIM_CheckPointerToAny,
 
+  /// Check the machine type of the specified operand
+  /// - InsnID(ULEB128) - Instruction ID
+  /// - OpIdx(ULEB128) - Operand index
+  /// - MachineOperandType(ULEB128) - Expected type
+  GIM_CheckMachineOperandType,
+
   /// Check the register bank for the specified operand
   /// - InsnID(ULEB128) - Instruction ID
   /// - OpIdx(ULEB128) - Operand index
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h b/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h
index a50a0a04fe2bc..b6b2b14c94f66 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h
@@ -768,6 +768,15 @@ bool GIMatchTableExecutor::executeMatchTable(
 
       break;
     }
+    case GIM_CheckMachineOperandType: {
+      uint64_t InsnID = readULEB();
+      uint64_t OpIdx = readULEB();
+      uint64_t MOTy = readULEB();
+      MachineOperand &MO = State.MIs[InsnID]->getOperand(OpIdx);
+      if (MO.getType() != MOTy)
+        return false;
+      break;
+    }
     case GIM_RecordNamedOperand: {
       uint64_t InsnID = readULEB();
       uint64_t OpIdx = readULEB();
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index 19d5f24c5d5e0..4780e143bbb7b 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -906,6 +906,31 @@ def int_amdgcn_bitop3 :
                         [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty],
                         [IntrNoMem, IntrSpeculatable, ImmArg<ArgIndex<3>>]>;
 
+class AMDGPUGlobalStore : Intrinsic <
+  [],
+  [global_ptr_ty,          // Base global pointer to store to
+   llvm_v4i32_ty,          // Data to store
+   llvm_metadata_ty],      // Scope
+  [ IntrWriteMem, WriteOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>,
+    IntrWillReturn, IntrNoCallback, IntrNoFree ],
+  "",
+  [SDNPMemOperand, SDNPMayStore]
+>;
+
+def int_amdgcn_global_store_b128 : AMDGPUGlobalStore;
+
+class AMDGPUGlobalLoad : Intrinsic <
+  [llvm_v4i32_ty],
+  [global_ptr_ty,          // Base global pointer to load from
+   llvm_metadata_ty],      // Scope
+  [ IntrReadMem, ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>, IntrWillReturn,
+    IntrNoCallback, IntrNoFree ],
+  "",
+  [SDNPMemOperand, SDNPMayLoad]
+>;
+
+def int_amdgcn_global_load_b128 : AMDGPUGlobalLoad;
+
 } // TargetPrefix = "amdgcn"
 
 // New-style image intrinsics
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index 543c26dfe25e0..91dd61ff8da05 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -5755,6 +5755,15 @@ void Verifier::visitInstruction(Instruction &I) {
   InstsInThisBlock.insert(&I);
 }
 
+inline MDString *getMetadataValueAsString(MetadataAsValue *MDV) {
+  if (!MDV)
+    return nullptr;
+  auto *MD = dyn_cast<MDTuple>(MDV->getMetadata());
+  if (!MD || MD->getNumOperands() != 1)
+    return nullptr;
+  return dyn_cast<MDString>(MD->getOperand(0));
+}
+
 /// Allow intrinsics to be verified in different ways.
 void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
   Function *IF = Call.getCalledFunction();
@@ -6966,14 +6975,32 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
           &Call, PtrArg);
 
     // Last argument must be a MD string
-    auto *Op = cast<MetadataAsValue>(Call.getArgOperand(Call.arg_size() - 1));
-    MDNode *MD = cast<MDNode>(Op->getMetadata());
-    Check((MD->getNumOperands() == 1) && isa<MDString>(MD->getOperand(0)),
+    auto *Op =
+        dyn_cast<MetadataAsValue>(Call.getArgOperand(Call.arg_size() - 1));
+    Check(getMetadataValueAsString(Op) != nullptr,
           "cooperative atomic intrinsics require that the last argument is a "
           "metadata string",
           &Call, Op);
     break;
   }
+  case Intrinsic::amdgcn_global_load_b128:
+  case Intrinsic::amdgcn_global_store_b128: {
+    auto *Op =
+        dyn_cast<MetadataAsValue>(Call.getArgOperand(Call.arg_size() - 1));
+    MDString *MDStr = getMetadataValueAsString(Op);
+    Check(MDStr != nullptr,
+          "global load/store intrinsics require that the last argument is a "
+          "metadata string",
+          &Call, Op);
+
+    StringRef Scope = MDStr->getString();
+    Check(Scope == "" || Scope == "agent" || Scope == "workgroup" ||
+              Scope == "wavefront",
+          "'" + Scope +
+              "' is not a valid scope for global load/store intrinsics",
+          &Call, Op);
+    break;
+  }
   case Intrinsic::nvvm_setmaxnreg_inc_sync_aligned_u32:
   case Intrinsic::nvvm_setmaxnreg_dec_sync_aligned_u32: {
     Value *V = Call.getArgOperand(0);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index 266c708f48737..d9e17e2606eab 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -5628,6 +5628,9 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
     case Intrinsic::amdgcn_flat_prefetch:
     case Intrinsic::amdgcn_global_prefetch:
       return getDefaultMappingVOP(MI);
+    case Intrinsic::amdgcn_global_load_b128:
+    case Intrinsic::amdgcn_global_store_b128:
+      return getDefaultMappingAllVGPR(MI);
     default:
       return getInvalidInstructionMapping();
     }
diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td
index 9e38af91c7ccf..56cbb7284b227 100644
--- a/llvm/lib/Target/AMDGPU/FLATInstructions.td
+++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td
@@ -1826,6 +1826,21 @@ multiclass GlobalFLATStorePats<FLAT_Pseudo inst, SDPatternOperator node,
   }
 }
 
+class LoadIntrinWithScopeMetadata<SDPatternOperator intrin> : PatFrag<
+  (ops node:$ptr),
+  (intrin $ptr, srcvalue)>;
+def global_load_b128_intrin_pat : LoadIntrinWithScopeMetadata<int_amdgcn_global_load_b128>;
+
+class StoreIntrinWithScopeMetadata<SDPatternOperator intrin> : PatFrag<
+  (ops node:$data, node:$ptr),
+  (intrin $ptr, $data, srcvalue)>;
+def global_store_b128_intrin_pat : StoreIntrinWithScopeMetadata<int_amdgcn_global_store_b128>;
+
+let SubtargetPredicate = HasFlatGlobalInsts in {
+defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORDX4, global_load_b128_intrin_pat, v4i32>;
+defm : GlobalFLATStorePats <GLOBAL_STORE_DWORDX4, global_store_b128_intrin_pat, v4i32>;
+}
+
 multiclass GlobalFLATStorePats_D16_t16<string inst, SDPatternOperator node, ValueType vt> {
   def : FlatStoreSignedPat<!cast<FLAT_Pseudo>(inst#"_t16"), node, vt> {
     let AddedComplexity = 10;
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 101fefcc4574b..8a01f91fb75cf 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -1649,6 +1649,26 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
     Info.flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
     return true;
   }
+  case Intrinsic::amdgcn_global_load_b128:
+  case Intrinsic::amdgcn_global_store_b128: {
+    bool IsStore = IntrID == Intrinsic::amdgcn_global_store_b128;
+    Info.opc = IsStore ? ISD::INTRINSIC_VOID : ISD::INTRINSIC_W_CHAIN;
+    Info.memVT = EVT::getIntegerVT(CI.getContext(), 128);
+    Info.ptrVal = CI.getArgOperand(0);
+    Info.flags |=
+        IsStore ? MachineMemOperand::MOStore : MachineMemOperand::MOLoad;
+    // Pretend to be atomic so that SIMemoryLegalizer::expandStore sets cache
+    // flags appropriately.
+    Info.order = AtomicOrdering::Monotonic;
+
+    LLVMContext &Ctx = CI.getContext();
+    unsigned ScopeIdx = CI.arg_size() - 1;
+    MDNode *ScopeMD = cast<MDNode>(
+        cast<MetadataAsValue>(CI.getArgOperand(ScopeIdx))->getMetadata());
+    StringRef Scope = cast<MDString>(ScopeMD->getOperand(0))->getString();
+    Info.ssid = Ctx.getOrInsertSyncScopeID(Scope);
+    return true;
+  }
   case Intrinsic::amdgcn_load_to_lds:
   case Intrinsic::amdgcn_global_load_lds: {
     Info.opc = ISD::INTRINSIC_VOID;
@@ -1753,6 +1773,8 @@ bool SITargetLowering::getAddrModeArguments(const IntrinsicInst *II,
   case Intrinsic::amdgcn_global_store_async_from_lds_b32:
   case Intrinsic::amdgcn_global_store_async_from_lds_b64:
   case Intrinsic::amdgcn_global_store_async_from_lds_b128:
+  case Intrinsic::amdgcn_global_load_b128:
+  case Intrinsic::amdgcn_global_store_b128:
     Ptr = II->getArgOperand(0);
     break;
   case Intrinsic::amdgcn_load_to_lds:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.load.b128.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.load.b128.ll
new file mode 100644
index 0000000000000..9fde46b1f8239
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.load.b128.ll
@@ -0,0 +1,30869 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx9-generic    < %s  | FileCheck -check-prefixes=GFX,GFX-SDAG,GFX9-GENERIC-SDAG    %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx906          < %s  | FileCheck -check-prefixes=GFX,GFX-SDAG,GFX906-SDAG          %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx908          < %s  | FileCheck -check-prefixes=GFX,GFX-SDAG,GFX908-SDAG          %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx90a          < %s  | FileCheck -check-prefixes=GFX,GFX-SDAG,GFX90A-SDAG          %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx9-4-generic  < %s  | FileCheck -check-prefixes=GFX,GFX-SDAG,GFX9-4-GENERIC-SDAG  %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx942          < %s  | FileCheck -check-prefixes=GFX,GFX-SDAG,GFX942-SDAG          %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx950          < %s  | FileCheck -check-prefixes=GFX,GFX-SDAG,GFX950-SDAG          %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx10-1-generic < %s  | FileCheck -check-prefixes=GFX,GFX-SDAG,GFX10-1-GENERIC-SDAG %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1012         < %s  | FileCheck -check-prefixes=GFX,GFX-SDAG,GFX1012-SDAG         %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx10-3-generic < %s  | FileCheck -check-prefixes=GFX,GFX-SDAG,GFX10-3-GENERIC-SDAG %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx11-generic   < %s  | FileCheck -check-prefixes=GFX,GFX-SDAG,GFX11-GENERIC-SDAG   %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250         < %s  | FileCheck -check-prefixes=GFX,GFX-SDAG,GFX1250-SDAG         %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx12-generic   < %s  | FileCheck -check-prefixes=GFX,GFX-SDAG,GFX12-GENERIC-SDAG   %s
+
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx9-generic    < %s  | FileCheck -check-prefixes=GFX,GFX-ISEL,GFX9-GENERIC-ISEL    %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx906          < %s  | FileCheck -check-prefixes=GFX,GFX-ISEL,GFX906-ISEL          %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx908          < %s  | FileCheck -check-prefixes=GFX,GFX-ISEL,GFX908-ISEL          %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx90a          < %s  | FileCheck -check-prefixes=GFX,GFX-ISEL,GFX90A-ISEL          %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx9-4-generic  < %s  | FileCheck -check-prefixes=GFX,GFX-ISEL,GFX9-4-GENERIC-ISEL  %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx942          < %s  | FileCheck -check-prefixes=GFX,GFX-ISEL,GFX942-ISEL          %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx950          < %s  | FileCheck -check-prefixes=GFX,GFX-ISEL,GFX950-ISEL          %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx10-1-generic < %s  | FileCheck -check-prefixes=GFX,GFX-ISEL,GFX10-1-GENERIC-ISEL %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1012         < %s  | FileCheck -check-prefixes=GFX,GFX-ISEL,GFX1012-ISEL         %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx10-3-generic < %s  | FileCheck -check-prefixes=GFX,GFX-ISEL,GFX10-3-GENERIC-ISEL %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx11-generic   < %s  | FileCheck -check-prefixes=GFX,GFX-ISEL,GFX11-GENERIC-ISEL   %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1250         < %s  | FileCheck -check-prefixes=GFX,GFX-ISEL,GFX1250-ISEL         %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx12-generic   < %s  | FileCheck -check-prefixes=GFX,GFX-ISEL,GFX12-GENERIC-ISEL   %s
+
+
+;;==============================================================================
+;; A few basic test cases
+;;==============================================================================
+define <4 x i32> @global_load_b128_0_00(ptr addrspace(1) %addr) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_b128_0_00:
+; GFX9-GENERIC-SDAG:       ; %bb.0: ; %entry
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_b128_0_00:
+; GFX906-SDAG:       ; %bb.0: ; %entry
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_b128_0_00:
+; GFX908-SDAG:       ; %bb.0: ; %entry
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_b128_0_00:
+; GFX90A-SDAG:       ; %bb.0: ; %entry
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_b128_0_00:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0: ; %entry
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_b128_0_00:
+; GFX942-SDAG:       ; %bb.0: ; %entry
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_b128_0_00:
+; GFX950-SDAG:       ; %bb.0: ; %entry
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_b128_0_00:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0: ; %entry
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_b128_0_00:
+; GFX1012-SDAG:       ; %bb.0: ; %entry
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_b128_0_00:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0: ; %entry
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_b128_0_00:
+; GFX11-GENERIC-SDAG:       ; %bb.0: ; %entry
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_b128_0_00:
+; GFX1250-SDAG:       ; %bb.0: ; %entry
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_b128_0_00:
+; GFX12-GENERIC-SDAG:       ; %bb.0: ; %entry
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_b128_0_00:
+; GFX9-GENERIC-ISEL:       ; %bb.0: ; %entry
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_b128_0_00:
+; GFX906-ISEL:       ; %bb.0: ; %entry
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_b128_0_00:
+; GFX908-ISEL:       ; %bb.0: ; %entry
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_b128_0_00:
+; GFX90A-ISEL:       ; %bb.0: ; %entry
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_b128_0_00:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0: ; %entry
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_b128_0_00:
+; GFX942-ISEL:       ; %bb.0: ; %entry
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_b128_0_00:
+; GFX950-ISEL:       ; %bb.0: ; %entry
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_b128_0_00:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0: ; %entry
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_b128_0_00:
+; GFX1012-ISEL:       ; %bb.0: ; %entry
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_b128_0_00:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0: ; %entry
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_b128_0_00:
+; GFX11-GENERIC-ISEL:       ; %bb.0: ; %entry
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_b128_0_00:
+; GFX1250-ISEL:       ; %bb.0: ; %entry
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_b128_0_00:
+; GFX12-GENERIC-ISEL:       ; %bb.0: ; %entry
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+entry:
+  %data = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %addr, metadata !0)
+  ret <4 x i32> %data
+}
+
+define <4 x i32> @global_load_b128_0_01(ptr addrspace(1) %addr) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_b128_0_01:
+; GFX9-GENERIC-SDAG:       ; %bb.0: ; %entry
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_b128_0_01:
+; GFX906-SDAG:       ; %bb.0: ; %entry
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_b128_0_01:
+; GFX908-SDAG:       ; %bb.0: ; %entry
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_b128_0_01:
+; GFX90A-SDAG:       ; %bb.0: ; %entry
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_b128_0_01:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0: ; %entry
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_b128_0_01:
+; GFX942-SDAG:       ; %bb.0: ; %entry
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_b128_0_01:
+; GFX950-SDAG:       ; %bb.0: ; %entry
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_b128_0_01:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0: ; %entry
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_b128_0_01:
+; GFX1012-SDAG:       ; %bb.0: ; %entry
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_b128_0_01:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0: ; %entry
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_b128_0_01:
+; GFX11-GENERIC-SDAG:       ; %bb.0: ; %entry
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_b128_0_01:
+; GFX1250-SDAG:       ; %bb.0: ; %entry
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_b128_0_01:
+; GFX12-GENERIC-SDAG:       ; %bb.0: ; %entry
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SE
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_b128_0_01:
+; GFX9-GENERIC-ISEL:       ; %bb.0: ; %entry
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_b128_0_01:
+; GFX906-ISEL:       ; %bb.0: ; %entry
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_b128_0_01:
+; GFX908-ISEL:       ; %bb.0: ; %entry
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_b128_0_01:
+; GFX90A-ISEL:       ; %bb.0: ; %entry
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_b128_0_01:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0: ; %entry
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_b128_0_01:
+; GFX942-ISEL:       ; %bb.0: ; %entry
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_b128_0_01:
+; GFX950-ISEL:       ; %bb.0: ; %entry
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_b128_0_01:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0: ; %entry
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_b128_0_01:
+; GFX1012-ISEL:       ; %bb.0: ; %entry
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_b128_0_01:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0: ; %entry
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_b128_0_01:
+; GFX11-GENERIC-ISEL:       ; %bb.0: ; %entry
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_b128_0_01:
+; GFX1250-ISEL:       ; %bb.0: ; %entry
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_b128_0_01:
+; GFX12-GENERIC-ISEL:       ; %bb.0: ; %entry
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SE
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+entry:
+  %data = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %addr, metadata !1)
+  ret <4 x i32> %data
+}
+
+define <4 x i32> @global_load_b128_0_10(ptr addrspace(1) %addr) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_b128_0_10:
+; GFX9-GENERIC-SDAG:       ; %bb.0: ; %entry
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_b128_0_10:
+; GFX906-SDAG:       ; %bb.0: ; %entry
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_b128_0_10:
+; GFX908-SDAG:       ; %bb.0: ; %entry
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_b128_0_10:
+; GFX90A-SDAG:       ; %bb.0: ; %entry
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_b128_0_10:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0: ; %entry
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc1
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_b128_0_10:
+; GFX942-SDAG:       ; %bb.0: ; %entry
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc1
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_b128_0_10:
+; GFX950-SDAG:       ; %bb.0: ; %entry
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc1
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_b128_0_10:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0: ; %entry
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_b128_0_10:
+; GFX1012-SDAG:       ; %bb.0: ; %entry
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_b128_0_10:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0: ; %entry
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_b128_0_10:
+; GFX11-GENERIC-SDAG:       ; %bb.0: ; %entry
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_b128_0_10:
+; GFX1250-SDAG:       ; %bb.0: ; %entry
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off scope:SCOPE_DEV
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_b128_0_10:
+; GFX12-GENERIC-SDAG:       ; %bb.0: ; %entry
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off scope:SCOPE_DEV
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_b128_0_10:
+; GFX9-GENERIC-ISEL:       ; %bb.0: ; %entry
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_b128_0_10:
+; GFX906-ISEL:       ; %bb.0: ; %entry
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_b128_0_10:
+; GFX908-ISEL:       ; %bb.0: ; %entry
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_b128_0_10:
+; GFX90A-ISEL:       ; %bb.0: ; %entry
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_b128_0_10:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0: ; %entry
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc1
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_b128_0_10:
+; GFX942-ISEL:       ; %bb.0: ; %entry
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc1
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_b128_0_10:
+; GFX950-ISEL:       ; %bb.0: ; %entry
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc1
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_b128_0_10:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0: ; %entry
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_b128_0_10:
+; GFX1012-ISEL:       ; %bb.0: ; %entry
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_b128_0_10:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0: ; %entry
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_b128_0_10:
+; GFX11-GENERIC-ISEL:       ; %bb.0: ; %entry
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_b128_0_10:
+; GFX1250-ISEL:       ; %bb.0: ; %entry
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off scope:SCOPE_DEV
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_b128_0_10:
+; GFX12-GENERIC-ISEL:       ; %bb.0: ; %entry
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off scope:SCOPE_DEV
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+entry:
+  %data = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %addr, metadata !2)
+  ret <4 x i32> %data
+}
+
+define <4 x i32> @global_load_b128_0_11(ptr addrspace(1) %addr) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_b128_0_11:
+; GFX9-GENERIC-SDAG:       ; %bb.0: ; %entry
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_b128_0_11:
+; GFX906-SDAG:       ; %bb.0: ; %entry
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_b128_0_11:
+; GFX908-SDAG:       ; %bb.0: ; %entry
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_b128_0_11:
+; GFX90A-SDAG:       ; %bb.0: ; %entry
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_b128_0_11:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0: ; %entry
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_b128_0_11:
+; GFX942-SDAG:       ; %bb.0: ; %entry
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_b128_0_11:
+; GFX950-SDAG:       ; %bb.0: ; %entry
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_b128_0_11:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0: ; %entry
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_b128_0_11:
+; GFX1012-SDAG:       ; %bb.0: ; %entry
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_b128_0_11:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0: ; %entry
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_b128_0_11:
+; GFX11-GENERIC-SDAG:       ; %bb.0: ; %entry
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_b128_0_11:
+; GFX1250-SDAG:       ; %bb.0: ; %entry
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_b128_0_11:
+; GFX12-GENERIC-SDAG:       ; %bb.0: ; %entry
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_b128_0_11:
+; GFX9-GENERIC-ISEL:       ; %bb.0: ; %entry
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_b128_0_11:
+; GFX906-ISEL:       ; %bb.0: ; %entry
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_b128_0_11:
+; GFX908-ISEL:       ; %bb.0: ; %entry
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_b128_0_11:
+; GFX90A-ISEL:       ; %bb.0: ; %entry
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_b128_0_11:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0: ; %entry
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_b128_0_11:
+; GFX942-ISEL:       ; %bb.0: ; %entry
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_b128_0_11:
+; GFX950-ISEL:       ; %bb.0: ; %entry
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_b128_0_11:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0: ; %entry
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_b128_0_11:
+; GFX1012-ISEL:       ; %bb.0: ; %entry
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_b128_0_11:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0: ; %entry
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_b128_0_11:
+; GFX11-GENERIC-ISEL:       ; %bb.0: ; %entry
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_b128_0_11:
+; GFX1250-ISEL:       ; %bb.0: ; %entry
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_b128_0_11:
+; GFX12-GENERIC-ISEL:       ; %bb.0: ; %entry
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+entry:
+  %data = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %addr, metadata !3)
+  ret <4 x i32> %data
+}
+
+define <4 x i32> @global_load_b128_saddr_0_00(ptr addrspace(1) inreg %addr) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_b128_saddr_0_00:
+; GFX9-GENERIC-SDAG:       ; %bb.0: ; %entry
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_b128_saddr_0_00:
+; GFX906-SDAG:       ; %bb.0: ; %entry
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_b128_saddr_0_00:
+; GFX908-SDAG:       ; %bb.0: ; %entry
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_b128_saddr_0_00:
+; GFX90A-SDAG:       ; %bb.0: ; %entry
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_b128_saddr_0_00:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0: ; %entry
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1]
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_b128_saddr_0_00:
+; GFX942-SDAG:       ; %bb.0: ; %entry
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1]
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_b128_saddr_0_00:
+; GFX950-SDAG:       ; %bb.0: ; %entry
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1]
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_b128_saddr_0_00:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0: ; %entry
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_b128_saddr_0_00:
+; GFX1012-SDAG:       ; %bb.0: ; %entry
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_b128_saddr_0_00:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0: ; %entry
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_b128_saddr_0_00:
+; GFX11-GENERIC-SDAG:       ; %bb.0: ; %entry
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1]
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_b128_saddr_0_00:
+; GFX1250-SDAG:       ; %bb.0: ; %entry
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1]
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_b128_saddr_0_00:
+; GFX12-GENERIC-SDAG:       ; %bb.0: ; %entry
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1]
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_b128_saddr_0_00:
+; GFX9-GENERIC-ISEL:       ; %bb.0: ; %entry
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_b128_saddr_0_00:
+; GFX906-ISEL:       ; %bb.0: ; %entry
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_b128_saddr_0_00:
+; GFX908-ISEL:       ; %bb.0: ; %entry
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_b128_saddr_0_00:
+; GFX90A-ISEL:       ; %bb.0: ; %entry
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_b128_saddr_0_00:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0: ; %entry
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1]
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_b128_saddr_0_00:
+; GFX942-ISEL:       ; %bb.0: ; %entry
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1]
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_b128_saddr_0_00:
+; GFX950-ISEL:       ; %bb.0: ; %entry
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1]
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_b128_saddr_0_00:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0: ; %entry
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_b128_saddr_0_00:
+; GFX1012-ISEL:       ; %bb.0: ; %entry
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_b128_saddr_0_00:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0: ; %entry
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_b128_saddr_0_00:
+; GFX11-GENERIC-ISEL:       ; %bb.0: ; %entry
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1]
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_b128_saddr_0_00:
+; GFX1250-ISEL:       ; %bb.0: ; %entry
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1]
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_b128_saddr_0_00:
+; GFX12-GENERIC-ISEL:       ; %bb.0: ; %entry
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1]
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+entry:
+  %data = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %addr, metadata !0)
+  ret <4 x i32> %data
+}
+
+define <4 x i32> @global_load_b128_saddr_0_01(ptr addrspace(1) inreg %addr) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_b128_saddr_0_01:
+; GFX9-GENERIC-SDAG:       ; %bb.0: ; %entry
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_b128_saddr_0_01:
+; GFX906-SDAG:       ; %bb.0: ; %entry
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_b128_saddr_0_01:
+; GFX908-SDAG:       ; %bb.0: ; %entry
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_b128_saddr_0_01:
+; GFX90A-SDAG:       ; %bb.0: ; %entry
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_b128_saddr_0_01:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0: ; %entry
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] sc0
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_b128_saddr_0_01:
+; GFX942-SDAG:       ; %bb.0: ; %entry
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] sc0
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_b128_saddr_0_01:
+; GFX950-SDAG:       ; %bb.0: ; %entry
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] sc0
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_b128_saddr_0_01:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0: ; %entry
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_b128_saddr_0_01:
+; GFX1012-SDAG:       ; %bb.0: ; %entry
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_b128_saddr_0_01:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0: ; %entry
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_b128_saddr_0_01:
+; GFX11-GENERIC-SDAG:       ; %bb.0: ; %entry
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] glc
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_b128_saddr_0_01:
+; GFX1250-SDAG:       ; %bb.0: ; %entry
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1]
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_b128_saddr_0_01:
+; GFX12-GENERIC-SDAG:       ; %bb.0: ; %entry
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_SE
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_b128_saddr_0_01:
+; GFX9-GENERIC-ISEL:       ; %bb.0: ; %entry
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_b128_saddr_0_01:
+; GFX906-ISEL:       ; %bb.0: ; %entry
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_b128_saddr_0_01:
+; GFX908-ISEL:       ; %bb.0: ; %entry
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_b128_saddr_0_01:
+; GFX90A-ISEL:       ; %bb.0: ; %entry
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_b128_saddr_0_01:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0: ; %entry
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] sc0
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_b128_saddr_0_01:
+; GFX942-ISEL:       ; %bb.0: ; %entry
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] sc0
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_b128_saddr_0_01:
+; GFX950-ISEL:       ; %bb.0: ; %entry
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] sc0
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_b128_saddr_0_01:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0: ; %entry
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_b128_saddr_0_01:
+; GFX1012-ISEL:       ; %bb.0: ; %entry
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_b128_saddr_0_01:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0: ; %entry
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_b128_saddr_0_01:
+; GFX11-GENERIC-ISEL:       ; %bb.0: ; %entry
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] glc
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_b128_saddr_0_01:
+; GFX1250-ISEL:       ; %bb.0: ; %entry
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1]
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_b128_saddr_0_01:
+; GFX12-GENERIC-ISEL:       ; %bb.0: ; %entry
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_SE
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+entry:
+  %data = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %addr, metadata !1)
+  ret <4 x i32> %data
+}
+
+define <4 x i32> @global_load_b128_saddr_0_02(ptr addrspace(1) inreg %addr) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_b128_saddr_0_02:
+; GFX9-GENERIC-SDAG:       ; %bb.0: ; %entry
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_b128_saddr_0_02:
+; GFX906-SDAG:       ; %bb.0: ; %entry
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_b128_saddr_0_02:
+; GFX908-SDAG:       ; %bb.0: ; %entry
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_b128_saddr_0_02:
+; GFX90A-SDAG:       ; %bb.0: ; %entry
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_b128_saddr_0_02:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0: ; %entry
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] sc1
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_b128_saddr_0_02:
+; GFX942-SDAG:       ; %bb.0: ; %entry
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] sc1
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_b128_saddr_0_02:
+; GFX950-SDAG:       ; %bb.0: ; %entry
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] sc1
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_b128_saddr_0_02:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0: ; %entry
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_b128_saddr_0_02:
+; GFX1012-SDAG:       ; %bb.0: ; %entry
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_b128_saddr_0_02:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0: ; %entry
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_b128_saddr_0_02:
+; GFX11-GENERIC-SDAG:       ; %bb.0: ; %entry
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] glc
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_b128_saddr_0_02:
+; GFX1250-SDAG:       ; %bb.0: ; %entry
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_DEV
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_b128_saddr_0_02:
+; GFX12-GENERIC-SDAG:       ; %bb.0: ; %entry
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_DEV
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_b128_saddr_0_02:
+; GFX9-GENERIC-ISEL:       ; %bb.0: ; %entry
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_b128_saddr_0_02:
+; GFX906-ISEL:       ; %bb.0: ; %entry
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_b128_saddr_0_02:
+; GFX908-ISEL:       ; %bb.0: ; %entry
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_b128_saddr_0_02:
+; GFX90A-ISEL:       ; %bb.0: ; %entry
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_b128_saddr_0_02:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0: ; %entry
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] sc1
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_b128_saddr_0_02:
+; GFX942-ISEL:       ; %bb.0: ; %entry
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] sc1
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_b128_saddr_0_02:
+; GFX950-ISEL:       ; %bb.0: ; %entry
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] sc1
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_b128_saddr_0_02:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0: ; %entry
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_b128_saddr_0_02:
+; GFX1012-ISEL:       ; %bb.0: ; %entry
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_b128_saddr_0_02:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0: ; %entry
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_b128_saddr_0_02:
+; GFX11-GENERIC-ISEL:       ; %bb.0: ; %entry
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] glc
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_b128_saddr_0_02:
+; GFX1250-ISEL:       ; %bb.0: ; %entry
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_DEV
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_b128_saddr_0_02:
+; GFX12-GENERIC-ISEL:       ; %bb.0: ; %entry
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_DEV
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+entry:
+  %data = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %addr, metadata !2)
+  ret <4 x i32> %data
+}
+
+define <4 x i32> @global_load_b128_saddr_0_03(ptr addrspace(1) inreg %addr) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_b128_saddr_0_03:
+; GFX9-GENERIC-SDAG:       ; %bb.0: ; %entry
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_b128_saddr_0_03:
+; GFX906-SDAG:       ; %bb.0: ; %entry
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_b128_saddr_0_03:
+; GFX908-SDAG:       ; %bb.0: ; %entry
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_b128_saddr_0_03:
+; GFX90A-SDAG:       ; %bb.0: ; %entry
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_b128_saddr_0_03:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0: ; %entry
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] sc0 sc1
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_b128_saddr_0_03:
+; GFX942-SDAG:       ; %bb.0: ; %entry
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] sc0 sc1
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_b128_saddr_0_03:
+; GFX950-SDAG:       ; %bb.0: ; %entry
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] sc0 sc1
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_b128_saddr_0_03:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0: ; %entry
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_b128_saddr_0_03:
+; GFX1012-SDAG:       ; %bb.0: ; %entry
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_b128_saddr_0_03:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0: ; %entry
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_b128_saddr_0_03:
+; GFX11-GENERIC-SDAG:       ; %bb.0: ; %entry
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] glc
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_b128_saddr_0_03:
+; GFX1250-SDAG:       ; %bb.0: ; %entry
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_SYS
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_b128_saddr_0_03:
+; GFX12-GENERIC-SDAG:       ; %bb.0: ; %entry
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_SYS
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_b128_saddr_0_03:
+; GFX9-GENERIC-ISEL:       ; %bb.0: ; %entry
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_b128_saddr_0_03:
+; GFX906-ISEL:       ; %bb.0: ; %entry
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_b128_saddr_0_03:
+; GFX908-ISEL:       ; %bb.0: ; %entry
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_b128_saddr_0_03:
+; GFX90A-ISEL:       ; %bb.0: ; %entry
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_b128_saddr_0_03:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0: ; %entry
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] sc0 sc1
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_b128_saddr_0_03:
+; GFX942-ISEL:       ; %bb.0: ; %entry
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] sc0 sc1
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_b128_saddr_0_03:
+; GFX950-ISEL:       ; %bb.0: ; %entry
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] sc0 sc1
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_b128_saddr_0_03:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0: ; %entry
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_b128_saddr_0_03:
+; GFX1012-ISEL:       ; %bb.0: ; %entry
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_b128_saddr_0_03:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0: ; %entry
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_b128_saddr_0_03:
+; GFX11-GENERIC-ISEL:       ; %bb.0: ; %entry
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] glc
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_b128_saddr_0_03:
+; GFX1250-ISEL:       ; %bb.0: ; %entry
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_SYS
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_b128_saddr_0_03:
+; GFX12-GENERIC-ISEL:       ; %bb.0: ; %entry
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_SYS
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+entry:
+  %data = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %addr, metadata !3)
+  ret <4 x i32> %data
+}
+
+;;==============================================================================
+;; Signed offset addressing modes (derived from global-saddr-load.ll) {
+;;==============================================================================
+;;------------------------------------------------------------------------------
+;; No vgpr offset, constants
+;;------------------------------------------------------------------------------
+
+;; base only
+define <4 x float> @global_load_i8_offset_0(ptr addrspace(1) %sbase) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_i8_offset_0:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_i8_offset_0:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_i8_offset_0:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_i8_offset_0:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_i8_offset_0:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_i8_offset_0:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_i8_offset_0:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_i8_offset_0:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_i8_offset_0:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_i8_offset_0:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_i8_offset_0:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_i8_offset_0:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_i8_offset_0:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_i8_offset_0:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_i8_offset_0:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_i8_offset_0:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_i8_offset_0:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_i8_offset_0:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_i8_offset_0:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_i8_offset_0:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_i8_offset_0:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_i8_offset_0:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_i8_offset_0:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_i8_offset_0:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_i8_offset_0:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_i8_offset_0:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %sbase, metadata !0)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+;; base with maximum gfx9 immediate offset
+define <4 x float> @global_load_i8_offset_4095(ptr addrspace(1) %sbase) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_i8_offset_4095:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_i8_offset_4095:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_i8_offset_4095:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_i8_offset_4095:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_i8_offset_4095:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095 sc0
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_i8_offset_4095:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095 sc0
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_i8_offset_4095:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095 sc0
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_i8_offset_4095:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2047 glc
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_i8_offset_4095:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX1012-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2047 glc
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_i8_offset_4095:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2047 glc
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_i8_offset_4095:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:4095 glc
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_i8_offset_4095:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:4095
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_i8_offset_4095:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:4095 scope:SCOPE_SE
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_i8_offset_4095:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_i8_offset_4095:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_i8_offset_4095:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_i8_offset_4095:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_i8_offset_4095:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095 sc0
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_i8_offset_4095:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095 sc0
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_i8_offset_4095:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095 sc0
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_i8_offset_4095:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfff, v0
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_i8_offset_4095:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfff, v0
+; GFX1012-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_i8_offset_4095:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfff, v0
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_i8_offset_4095:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:4095 glc
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_i8_offset_4095:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:4095
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_i8_offset_4095:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:4095 scope:SCOPE_SE
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 4095
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep0, metadata !1)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+;; base with maximum gfx9 immediate offset + 1
+define <4 x float> @global_load_i8_offset_4096(ptr addrspace(1) %sbase) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_i8_offset_4096:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX9-GENERIC-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_i8_offset_4096:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX906-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_i8_offset_4096:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX908-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_i8_offset_4096:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX90A-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_i8_offset_4096:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX9-4-GENERIC-SDAG-NEXT:    s_nop 1
+; GFX9-4-GENERIC-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc1
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_i8_offset_4096:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX942-SDAG-NEXT:    s_nop 1
+; GFX942-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc1
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_i8_offset_4096:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX950-SDAG-NEXT:    s_nop 1
+; GFX950-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc1
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_i8_offset_4096:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1000, v0
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_i8_offset_4096:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1000, v0
+; GFX1012-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_i8_offset_4096:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1000, v0
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_i8_offset_4096:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1000, v0
+; GFX11-GENERIC-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_i8_offset_4096:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:4096 scope:SCOPE_DEV
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_i8_offset_4096:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:4096 scope:SCOPE_DEV
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_i8_offset_4096:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX9-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_i8_offset_4096:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX906-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_i8_offset_4096:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX908-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_i8_offset_4096:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX90A-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_i8_offset_4096:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX9-4-GENERIC-ISEL-NEXT:    s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc1
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_i8_offset_4096:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX942-ISEL-NEXT:    s_nop 1
+; GFX942-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc1
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_i8_offset_4096:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX950-ISEL-NEXT:    s_nop 1
+; GFX950-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc1
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_i8_offset_4096:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1000, v0
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_i8_offset_4096:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1000, v0
+; GFX1012-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_i8_offset_4096:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1000, v0
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_i8_offset_4096:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1000, v0
+; GFX11-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_i8_offset_4096:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:4096 scope:SCOPE_DEV
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_i8_offset_4096:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:4096 scope:SCOPE_DEV
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 4096
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep0, metadata !2)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+;; base with maximum gfx9 immediate offset + 2
+define <4 x float> @global_load_i8_offset_4097(ptr addrspace(1) %sbase) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_i8_offset_4097:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX9-GENERIC-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:1 glc
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_i8_offset_4097:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX906-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:1 glc
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_i8_offset_4097:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX908-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:1 glc
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_i8_offset_4097:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX90A-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:1 glc
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_i8_offset_4097:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX9-4-GENERIC-SDAG-NEXT:    s_nop 1
+; GFX9-4-GENERIC-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:1 sc0 sc1
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_i8_offset_4097:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX942-SDAG-NEXT:    s_nop 1
+; GFX942-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:1 sc0 sc1
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_i8_offset_4097:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX950-SDAG-NEXT:    s_nop 1
+; GFX950-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:1 sc0 sc1
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_i8_offset_4097:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1000, v0
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:1 glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_i8_offset_4097:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1000, v0
+; GFX1012-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:1 glc dlc
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_i8_offset_4097:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1000, v0
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:1 glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_i8_offset_4097:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1000, v0
+; GFX11-GENERIC-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:1 glc
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_i8_offset_4097:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:4097 scope:SCOPE_SYS
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_i8_offset_4097:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:4097 scope:SCOPE_SYS
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_i8_offset_4097:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1001, v0
+; GFX9-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_i8_offset_4097:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1001, v0
+; GFX906-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_i8_offset_4097:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1001, v0
+; GFX908-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_i8_offset_4097:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1001, v0
+; GFX90A-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_i8_offset_4097:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1001, v0
+; GFX9-4-GENERIC-ISEL-NEXT:    s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_i8_offset_4097:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1001, v0
+; GFX942-ISEL-NEXT:    s_nop 1
+; GFX942-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_i8_offset_4097:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1001, v0
+; GFX950-ISEL-NEXT:    s_nop 1
+; GFX950-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_i8_offset_4097:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1001, v0
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_i8_offset_4097:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1001, v0
+; GFX1012-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_i8_offset_4097:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1001, v0
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_i8_offset_4097:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1001, v0
+; GFX11-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_i8_offset_4097:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:4097 scope:SCOPE_SYS
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_i8_offset_4097:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:4097 scope:SCOPE_SYS
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 4097
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep0, metadata !3)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+;; base with maximum negative gfx9 immediate offset
+define <4 x float> @global_load_i8_offset_neg4096(ptr addrspace(1) %sbase) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_i8_offset_neg4096:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-4096
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_i8_offset_neg4096:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-4096
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_i8_offset_neg4096:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-4096
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_i8_offset_neg4096:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-4096
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_i8_offset_neg4096:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-4096
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_i8_offset_neg4096:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-4096
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_i8_offset_neg4096:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-4096
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_i8_offset_neg4096:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_i8_offset_neg4096:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
+; GFX1012-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_i8_offset_neg4096:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_i8_offset_neg4096:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:-4096
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_i8_offset_neg4096:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:-4096
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_i8_offset_neg4096:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:-4096
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_i8_offset_neg4096:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-4096
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_i8_offset_neg4096:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-4096
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_i8_offset_neg4096:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-4096
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_i8_offset_neg4096:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-4096
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_i8_offset_neg4096:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-4096
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_i8_offset_neg4096:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-4096
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_i8_offset_neg4096:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-4096
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_i8_offset_neg4096:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_i8_offset_neg4096:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
+; GFX1012-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_i8_offset_neg4096:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_i8_offset_neg4096:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:-4096
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_i8_offset_neg4096:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:-4096
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_i8_offset_neg4096:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:-4096
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 -4096
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep0, metadata !0)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+;; base with maximum negative gfx9 immediate offset -1
+define <4 x float> @global_load_i8_offset_neg4097(ptr addrspace(1) %sbase) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_i8_offset_neg4097:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
+; GFX9-GENERIC-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-1
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_i8_offset_neg4097:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
+; GFX906-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-1
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_i8_offset_neg4097:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
+; GFX908-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-1
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_i8_offset_neg4097:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
+; GFX90A-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-1
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_i8_offset_neg4097:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
+; GFX9-4-GENERIC-SDAG-NEXT:    s_nop 1
+; GFX9-4-GENERIC-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-1 sc0
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_i8_offset_neg4097:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
+; GFX942-SDAG-NEXT:    s_nop 1
+; GFX942-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-1 sc0
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_i8_offset_neg4097:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
+; GFX950-SDAG-NEXT:    s_nop 1
+; GFX950-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-1 sc0
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_i8_offset_neg4097:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_i8_offset_neg4097:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
+; GFX1012-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_i8_offset_neg4097:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_i8_offset_neg4097:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
+; GFX11-GENERIC-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:-1 glc
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_i8_offset_neg4097:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:-4097
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_i8_offset_neg4097:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:-4097 scope:SCOPE_SE
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_i8_offset_neg4097:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0xffffefff, v0
+; GFX9-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_i8_offset_neg4097:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0xffffefff, v0
+; GFX906-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_i8_offset_neg4097:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0xffffefff, v0
+; GFX908-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_i8_offset_neg4097:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0xffffefff, v0
+; GFX90A-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_i8_offset_neg4097:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0xffffefff, v0
+; GFX9-4-GENERIC-ISEL-NEXT:    s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_i8_offset_neg4097:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0xffffefff, v0
+; GFX942-ISEL-NEXT:    s_nop 1
+; GFX942-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_i8_offset_neg4097:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0xffffefff, v0
+; GFX950-ISEL-NEXT:    s_nop 1
+; GFX950-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_i8_offset_neg4097:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0xffffefff, v0
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_i8_offset_neg4097:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0xffffefff, v0
+; GFX1012-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_i8_offset_neg4097:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0xffffefff, v0
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_i8_offset_neg4097:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0xffffefff, v0
+; GFX11-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_i8_offset_neg4097:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:-4097
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_i8_offset_neg4097:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:-4097 scope:SCOPE_SE
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 -4097
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep0, metadata !1)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+;; base with maximum negative gfx9 immediate offset -2
+define <4 x float> @global_load_i8_offset_neg4098(ptr addrspace(1) %sbase) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_i8_offset_neg4098:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
+; GFX9-GENERIC-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2 glc
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_i8_offset_neg4098:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
+; GFX906-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2 glc
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_i8_offset_neg4098:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
+; GFX908-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2 glc
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_i8_offset_neg4098:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
+; GFX90A-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2 glc
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_i8_offset_neg4098:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
+; GFX9-4-GENERIC-SDAG-NEXT:    s_nop 1
+; GFX9-4-GENERIC-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2 sc1
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_i8_offset_neg4098:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
+; GFX942-SDAG-NEXT:    s_nop 1
+; GFX942-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2 sc1
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_i8_offset_neg4098:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
+; GFX950-SDAG-NEXT:    s_nop 1
+; GFX950-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2 sc1
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_i8_offset_neg4098:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2 glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_i8_offset_neg4098:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
+; GFX1012-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2 glc dlc
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_i8_offset_neg4098:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2 glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_i8_offset_neg4098:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
+; GFX11-GENERIC-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:-2 glc
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_i8_offset_neg4098:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:-4098 scope:SCOPE_DEV
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_i8_offset_neg4098:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:-4098 scope:SCOPE_DEV
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_i8_offset_neg4098:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0xffffeffe, v0
+; GFX9-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_i8_offset_neg4098:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0xffffeffe, v0
+; GFX906-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_i8_offset_neg4098:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0xffffeffe, v0
+; GFX908-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_i8_offset_neg4098:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0xffffeffe, v0
+; GFX90A-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_i8_offset_neg4098:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0xffffeffe, v0
+; GFX9-4-GENERIC-ISEL-NEXT:    s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc1
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_i8_offset_neg4098:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0xffffeffe, v0
+; GFX942-ISEL-NEXT:    s_nop 1
+; GFX942-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc1
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_i8_offset_neg4098:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0xffffeffe, v0
+; GFX950-ISEL-NEXT:    s_nop 1
+; GFX950-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc1
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_i8_offset_neg4098:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0xffffeffe, v0
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_i8_offset_neg4098:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0xffffeffe, v0
+; GFX1012-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_i8_offset_neg4098:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0xffffeffe, v0
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_i8_offset_neg4098:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0xffffeffe, v0
+; GFX11-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_i8_offset_neg4098:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:-4098 scope:SCOPE_DEV
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_i8_offset_neg4098:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:-4098 scope:SCOPE_DEV
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 -4098
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep0, metadata !2)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+;; base with maximum gfx10 immediate offset
+define <4 x float> @global_load_i8_offset_2048(ptr addrspace(1) %sbase) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_i8_offset_2048:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2048 glc
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_i8_offset_2048:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2048 glc
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_i8_offset_2048:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2048 glc
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_i8_offset_2048:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2048 glc
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_i8_offset_2048:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2048 sc0 sc1
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_i8_offset_2048:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2048 sc0 sc1
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_i8_offset_2048:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2048 sc0 sc1
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_i8_offset_2048:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_i8_offset_2048:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX1012-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_i8_offset_2048:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_i8_offset_2048:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:2048 glc
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_i8_offset_2048:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:2048 scope:SCOPE_SYS
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_i8_offset_2048:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:2048 scope:SCOPE_SYS
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_i8_offset_2048:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2048 glc
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_i8_offset_2048:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2048 glc
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_i8_offset_2048:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2048 glc
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_i8_offset_2048:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2048 glc
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_i8_offset_2048:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2048 sc0 sc1
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_i8_offset_2048:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2048 sc0 sc1
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_i8_offset_2048:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2048 sc0 sc1
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_i8_offset_2048:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_i8_offset_2048:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX1012-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_i8_offset_2048:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_i8_offset_2048:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:2048 glc
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_i8_offset_2048:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:2048 scope:SCOPE_SYS
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_i8_offset_2048:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:2048 scope:SCOPE_SYS
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 2048
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep0, metadata !3)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+;; base with maximum gfx10 immediate offset + 1
+define <4 x float> @global_load_i8_offset_2049(ptr addrspace(1) %sbase) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_i8_offset_2049:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2049
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_i8_offset_2049:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2049
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_i8_offset_2049:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2049
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_i8_offset_2049:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2049
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_i8_offset_2049:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2049
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_i8_offset_2049:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2049
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_i8_offset_2049:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2049
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_i8_offset_2049:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:1
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_i8_offset_2049:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX1012-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:1
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_i8_offset_2049:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:1
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_i8_offset_2049:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:2049
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_i8_offset_2049:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:2049
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_i8_offset_2049:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:2049
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_i8_offset_2049:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2049
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_i8_offset_2049:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2049
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_i8_offset_2049:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2049
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_i8_offset_2049:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2049
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_i8_offset_2049:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2049
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_i8_offset_2049:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2049
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_i8_offset_2049:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2049
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_i8_offset_2049:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0x801, v0
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_i8_offset_2049:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0x801, v0
+; GFX1012-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_i8_offset_2049:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0x801, v0
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_i8_offset_2049:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:2049
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_i8_offset_2049:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:2049
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_i8_offset_2049:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:2049
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 2049
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep0, metadata !0)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+;; base with maximum gfx10 immediate offset + 2
+define <4 x float> @global_load_i8_offset_2050(ptr addrspace(1) %sbase) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_i8_offset_2050:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2050
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_i8_offset_2050:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2050
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_i8_offset_2050:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2050
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_i8_offset_2050:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2050
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_i8_offset_2050:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2050 sc0
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_i8_offset_2050:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2050 sc0
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_i8_offset_2050:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2050 sc0
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_i8_offset_2050:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2 glc
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_i8_offset_2050:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX1012-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2 glc
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_i8_offset_2050:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2 glc
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_i8_offset_2050:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:2050 glc
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_i8_offset_2050:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:2050
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_i8_offset_2050:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:2050 scope:SCOPE_SE
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_i8_offset_2050:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2050
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_i8_offset_2050:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2050
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_i8_offset_2050:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2050
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_i8_offset_2050:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2050
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_i8_offset_2050:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2050 sc0
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_i8_offset_2050:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2050 sc0
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_i8_offset_2050:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2050 sc0
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_i8_offset_2050:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0x802, v0
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_i8_offset_2050:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0x802, v0
+; GFX1012-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_i8_offset_2050:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0x802, v0
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_i8_offset_2050:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:2050 glc
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_i8_offset_2050:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:2050
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_i8_offset_2050:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:2050 scope:SCOPE_SE
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 2050
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep0, metadata !1)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+;; base with maximum negative gfx10 immediate offset
+define <4 x float> @global_load_i8_offset_neg2048(ptr addrspace(1) %sbase) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_i8_offset_neg2048:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 glc
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_i8_offset_neg2048:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 glc
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_i8_offset_neg2048:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 glc
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_i8_offset_neg2048:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 glc
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_i8_offset_neg2048:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 sc1
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_i8_offset_neg2048:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 sc1
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_i8_offset_neg2048:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 sc1
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_i8_offset_neg2048:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_i8_offset_neg2048:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 glc dlc
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_i8_offset_neg2048:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_i8_offset_neg2048:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:-2048 glc
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_i8_offset_neg2048:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:-2048 scope:SCOPE_DEV
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_i8_offset_neg2048:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:-2048 scope:SCOPE_DEV
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_i8_offset_neg2048:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 glc
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_i8_offset_neg2048:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 glc
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_i8_offset_neg2048:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 glc
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_i8_offset_neg2048:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 glc
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_i8_offset_neg2048:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 sc1
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_i8_offset_neg2048:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 sc1
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_i8_offset_neg2048:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 sc1
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_i8_offset_neg2048:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_i8_offset_neg2048:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 glc dlc
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_i8_offset_neg2048:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_i8_offset_neg2048:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:-2048 glc
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_i8_offset_neg2048:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:-2048 scope:SCOPE_DEV
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_i8_offset_neg2048:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:-2048 scope:SCOPE_DEV
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 -2048
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep0, metadata !2)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+;; base with maximum negative gfx10 immediate offset - 1
+define <4 x float> @global_load_i8_offset_neg2049(ptr addrspace(1) %sbase) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_i8_offset_neg2049:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2049 glc
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_i8_offset_neg2049:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2049 glc
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_i8_offset_neg2049:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2049 glc
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_i8_offset_neg2049:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2049 glc
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_i8_offset_neg2049:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2049 sc0 sc1
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_i8_offset_neg2049:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2049 sc0 sc1
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_i8_offset_neg2049:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2049 sc0 sc1
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_i8_offset_neg2049:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfffff800, v0
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_i8_offset_neg2049:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfffff800, v0
+; GFX1012-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc dlc
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_i8_offset_neg2049:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfffff800, v0
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_i8_offset_neg2049:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:-2049 glc
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_i8_offset_neg2049:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:-2049 scope:SCOPE_SYS
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_i8_offset_neg2049:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:-2049 scope:SCOPE_SYS
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_i8_offset_neg2049:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2049 glc
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_i8_offset_neg2049:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2049 glc
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_i8_offset_neg2049:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2049 glc
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_i8_offset_neg2049:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2049 glc
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_i8_offset_neg2049:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2049 sc0 sc1
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_i8_offset_neg2049:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2049 sc0 sc1
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_i8_offset_neg2049:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2049 sc0 sc1
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_i8_offset_neg2049:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfffff7ff, v0
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_i8_offset_neg2049:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfffff7ff, v0
+; GFX1012-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_i8_offset_neg2049:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfffff7ff, v0
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_i8_offset_neg2049:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:-2049 glc
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_i8_offset_neg2049:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:-2049 scope:SCOPE_SYS
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_i8_offset_neg2049:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:-2049 scope:SCOPE_SYS
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 -2049
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep0, metadata !3)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+;; base with maximum negative gfx10 immediate offset - 1
+define <4 x float> @global_load_i8_offset_neg2050(ptr addrspace(1) %sbase) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_i8_offset_neg2050:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2050
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_i8_offset_neg2050:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2050
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_i8_offset_neg2050:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2050
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_i8_offset_neg2050:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2050
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_i8_offset_neg2050:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2050
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_i8_offset_neg2050:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2050
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_i8_offset_neg2050:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2050
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_i8_offset_neg2050:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfffff800, v0
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_i8_offset_neg2050:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfffff800, v0
+; GFX1012-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_i8_offset_neg2050:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfffff800, v0
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_i8_offset_neg2050:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:-2050
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_i8_offset_neg2050:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:-2050
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_i8_offset_neg2050:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:-2050
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_i8_offset_neg2050:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2050
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_i8_offset_neg2050:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2050
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_i8_offset_neg2050:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2050
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_i8_offset_neg2050:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2050
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_i8_offset_neg2050:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2050
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_i8_offset_neg2050:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2050
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_i8_offset_neg2050:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2050
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_i8_offset_neg2050:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfffff7fe, v0
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_i8_offset_neg2050:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfffff7fe, v0
+; GFX1012-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_i8_offset_neg2050:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfffff7fe, v0
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_i8_offset_neg2050:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:-2050
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_i8_offset_neg2050:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:-2050
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_i8_offset_neg2050:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:-2050
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 -2050
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep0, metadata !0)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+define <4 x float> @global_load_i8_offset_0x7FFFFF(ptr addrspace(1) %sbase) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_i8_offset_0x7FFFFF:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x7ff000, v0
+; GFX9-GENERIC-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_i8_offset_0x7FFFFF:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x7ff000, v0
+; GFX906-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_i8_offset_0x7FFFFF:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x7ff000, v0
+; GFX908-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_i8_offset_0x7FFFFF:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x7ff000, v0
+; GFX90A-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_i8_offset_0x7FFFFF:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x7ff000, v0
+; GFX9-4-GENERIC-SDAG-NEXT:    s_nop 1
+; GFX9-4-GENERIC-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095 sc0
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_i8_offset_0x7FFFFF:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x7ff000, v0
+; GFX942-SDAG-NEXT:    s_nop 1
+; GFX942-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095 sc0
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_i8_offset_0x7FFFFF:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x7ff000, v0
+; GFX950-SDAG-NEXT:    s_nop 1
+; GFX950-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095 sc0
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_i8_offset_0x7FFFFF:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x7ff800, v0
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2047 glc
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_i8_offset_0x7FFFFF:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x7ff800, v0
+; GFX1012-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2047 glc
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_i8_offset_0x7FFFFF:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x7ff800, v0
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2047 glc
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_i8_offset_0x7FFFFF:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x7ff000, v0
+; GFX11-GENERIC-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:4095 glc
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_i8_offset_0x7FFFFF:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:8388607
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_i8_offset_0x7FFFFF:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:8388607 scope:SCOPE_SE
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_i8_offset_0x7FFFFF:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0x7fffff, v0
+; GFX9-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_i8_offset_0x7FFFFF:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0x7fffff, v0
+; GFX906-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_i8_offset_0x7FFFFF:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0x7fffff, v0
+; GFX908-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_i8_offset_0x7FFFFF:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0x7fffff, v0
+; GFX90A-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_i8_offset_0x7FFFFF:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0x7fffff, v0
+; GFX9-4-GENERIC-ISEL-NEXT:    s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_i8_offset_0x7FFFFF:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0x7fffff, v0
+; GFX942-ISEL-NEXT:    s_nop 1
+; GFX942-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_i8_offset_0x7FFFFF:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0x7fffff, v0
+; GFX950-ISEL-NEXT:    s_nop 1
+; GFX950-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_i8_offset_0x7FFFFF:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0x7fffff, v0
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_i8_offset_0x7FFFFF:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0x7fffff, v0
+; GFX1012-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_i8_offset_0x7FFFFF:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0x7fffff, v0
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_i8_offset_0x7FFFFF:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0x7fffff, v0
+; GFX11-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_i8_offset_0x7FFFFF:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:8388607
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_i8_offset_0x7FFFFF:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:8388607 scope:SCOPE_SE
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 8388607
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep0, metadata !1)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+define <4 x float> @global_load_i8_offset_0xFFFFFF(ptr addrspace(1) %sbase) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_i8_offset_0xFFFFFF:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0xff800000, v0
+; GFX9-GENERIC-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_i8_offset_0xFFFFFF:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0xff800000, v0
+; GFX906-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_i8_offset_0xFFFFFF:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0xff800000, v0
+; GFX908-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_i8_offset_0xFFFFFF:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0xff800000, v0
+; GFX90A-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_i8_offset_0xFFFFFF:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0xff800000, v0
+; GFX9-4-GENERIC-SDAG-NEXT:    s_nop 1
+; GFX9-4-GENERIC-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc1
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_i8_offset_0xFFFFFF:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0xff800000, v0
+; GFX942-SDAG-NEXT:    s_nop 1
+; GFX942-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc1
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_i8_offset_0xFFFFFF:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0xff800000, v0
+; GFX950-SDAG-NEXT:    s_nop 1
+; GFX950-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc1
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_i8_offset_0xFFFFFF:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0xff800000, v0
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_i8_offset_0xFFFFFF:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0xff800000, v0
+; GFX1012-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_i8_offset_0xFFFFFF:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0xff800000, v0
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_i8_offset_0xFFFFFF:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0xff800000, v0
+; GFX11-GENERIC-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_i8_offset_0xFFFFFF:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:-8388608 scope:SCOPE_DEV
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_i8_offset_0xFFFFFF:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:-8388608 scope:SCOPE_DEV
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_i8_offset_0xFFFFFF:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0xff800000, v0
+; GFX9-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_i8_offset_0xFFFFFF:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0xff800000, v0
+; GFX906-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_i8_offset_0xFFFFFF:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0xff800000, v0
+; GFX908-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_i8_offset_0xFFFFFF:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0xff800000, v0
+; GFX90A-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_i8_offset_0xFFFFFF:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0xff800000, v0
+; GFX9-4-GENERIC-ISEL-NEXT:    s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc1
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_i8_offset_0xFFFFFF:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0xff800000, v0
+; GFX942-ISEL-NEXT:    s_nop 1
+; GFX942-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc1
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_i8_offset_0xFFFFFF:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0xff800000, v0
+; GFX950-ISEL-NEXT:    s_nop 1
+; GFX950-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc1
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_i8_offset_0xFFFFFF:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0xff800000, v0
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_i8_offset_0xFFFFFF:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0xff800000, v0
+; GFX1012-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_i8_offset_0xFFFFFF:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0xff800000, v0
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_i8_offset_0xFFFFFF:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0xff800000, v0
+; GFX11-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_i8_offset_0xFFFFFF:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:-8388608 scope:SCOPE_DEV
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_i8_offset_0xFFFFFF:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:-8388608 scope:SCOPE_DEV
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 -8388608
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep0, metadata !2)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+define <4 x float> @global_load_i8_offset_0xFFFFFFFF(ptr addrspace(1) %sbase) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_i8_offset_0xFFFFFFFF:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
+; GFX9-GENERIC-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095 glc
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_i8_offset_0xFFFFFFFF:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
+; GFX906-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095 glc
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_i8_offset_0xFFFFFFFF:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
+; GFX908-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095 glc
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_i8_offset_0xFFFFFFFF:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
+; GFX90A-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095 glc
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_i8_offset_0xFFFFFFFF:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
+; GFX9-4-GENERIC-SDAG-NEXT:    s_nop 1
+; GFX9-4-GENERIC-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095 sc0 sc1
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_i8_offset_0xFFFFFFFF:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
+; GFX942-SDAG-NEXT:    s_nop 1
+; GFX942-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095 sc0 sc1
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_i8_offset_0xFFFFFFFF:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
+; GFX950-SDAG-NEXT:    s_nop 1
+; GFX950-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095 sc0 sc1
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_i8_offset_0xFFFFFFFF:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfffff800, v0
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2047 glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_i8_offset_0xFFFFFFFF:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfffff800, v0
+; GFX1012-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2047 glc dlc
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_i8_offset_0xFFFFFFFF:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfffff800, v0
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2047 glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_i8_offset_0xFFFFFFFF:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
+; GFX11-GENERIC-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:4095 glc
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_i8_offset_0xFFFFFFFF:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0xff800000, v0
+; GFX1250-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:8388607 scope:SCOPE_SYS
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_i8_offset_0xFFFFFFFF:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0xff800000, v0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:8388607 scope:SCOPE_SYS
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_i8_offset_0xFFFFFFFF:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, -1, v0
+; GFX9-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_i8_offset_0xFFFFFFFF:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, -1, v0
+; GFX906-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_i8_offset_0xFFFFFFFF:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, -1, v0
+; GFX908-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_i8_offset_0xFFFFFFFF:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, -1, v0
+; GFX90A-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_i8_offset_0xFFFFFFFF:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, -1, v0
+; GFX9-4-GENERIC-ISEL-NEXT:    s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_i8_offset_0xFFFFFFFF:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, -1, v0
+; GFX942-ISEL-NEXT:    s_nop 1
+; GFX942-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_i8_offset_0xFFFFFFFF:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, -1, v0
+; GFX950-ISEL-NEXT:    s_nop 1
+; GFX950-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_i8_offset_0xFFFFFFFF:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, -1
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_i8_offset_0xFFFFFFFF:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, -1
+; GFX1012-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_i8_offset_0xFFFFFFFF:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, -1
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_i8_offset_0xFFFFFFFF:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, -1
+; GFX11-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_i8_offset_0xFFFFFFFF:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, -1
+; GFX1250-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_i8_offset_0xFFFFFFFF:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, -1
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 4294967295
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep0, metadata !3)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+define <4 x float> @global_load_i8_offset_0x100000000(ptr addrspace(1) %sbase) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_i8_offset_0x100000000:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    v_add_u32_e32 v1, 1, v1
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_i8_offset_0x100000000:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    v_add_u32_e32 v1, 1, v1
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_i8_offset_0x100000000:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    v_add_u32_e32 v1, 1, v1
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_i8_offset_0x100000000:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    v_add_u32_e32 v1, 1, v1
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_i8_offset_0x100000000:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    v_add_u32_e32 v1, 1, v1
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_i8_offset_0x100000000:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    v_add_u32_e32 v1, 1, v1
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_i8_offset_0x100000000:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    v_add_u32_e32 v1, 1, v1
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_i8_offset_0x100000000:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_nc_u32_e32 v1, 1, v1
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_i8_offset_0x100000000:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    v_add_nc_u32_e32 v1, 1, v1
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_i8_offset_0x100000000:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_nc_u32_e32 v1, 1, v1
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_i8_offset_0x100000000:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_nc_u32_e32 v1, 1, v1
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_i8_offset_0x100000000:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    v_add_nc_u32_e32 v1, 1, v1
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_i8_offset_0x100000000:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    v_add_nc_u32_e32 v1, 1, v1
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_i8_offset_0x100000000:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0, v0
+; GFX9-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_i8_offset_0x100000000:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0, v0
+; GFX906-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_i8_offset_0x100000000:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0, v0
+; GFX908-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_i8_offset_0x100000000:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0, v0
+; GFX90A-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_i8_offset_0x100000000:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0, v0
+; GFX9-4-GENERIC-ISEL-NEXT:    s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_i8_offset_0x100000000:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0, v0
+; GFX942-ISEL-NEXT:    s_nop 1
+; GFX942-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_i8_offset_0x100000000:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0, v0
+; GFX950-ISEL-NEXT:    s_nop 1
+; GFX950-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_i8_offset_0x100000000:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, 0
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 1, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_i8_offset_0x100000000:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, 0
+; GFX1012-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 1, v1, vcc_lo
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_i8_offset_0x100000000:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, 0
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 1, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_i8_offset_0x100000000:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, 0
+; GFX11-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 1, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_i8_offset_0x100000000:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, 0
+; GFX1250-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 1, v1, vcc_lo
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_i8_offset_0x100000000:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, 0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 1, v1, vcc_lo
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 4294967296
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep0, metadata !0)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+define <4 x float> @global_load_i8_offset_0x100000001(ptr addrspace(1) %sbase) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_i8_offset_0x100000001:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0, v0
+; GFX9-GENERIC-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:1
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_i8_offset_0x100000001:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0, v0
+; GFX906-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:1
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_i8_offset_0x100000001:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0, v0
+; GFX908-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:1
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_i8_offset_0x100000001:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0, v0
+; GFX90A-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:1
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_i8_offset_0x100000001:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0, v0
+; GFX9-4-GENERIC-SDAG-NEXT:    s_nop 1
+; GFX9-4-GENERIC-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:1 sc0
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_i8_offset_0x100000001:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0, v0
+; GFX942-SDAG-NEXT:    s_nop 1
+; GFX942-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:1 sc0
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_i8_offset_0x100000001:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0, v0
+; GFX950-SDAG-NEXT:    s_nop 1
+; GFX950-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:1 sc0
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_i8_offset_0x100000001:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0, v0
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 1, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:1 glc
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_i8_offset_0x100000001:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0, v0
+; GFX1012-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 1, v1, vcc_lo
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:1 glc
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_i8_offset_0x100000001:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0, v0
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 1, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:1 glc
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_i8_offset_0x100000001:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0, v0
+; GFX11-GENERIC-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 1, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:1 glc
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_i8_offset_0x100000001:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0, v0
+; GFX1250-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 1, v1, vcc_lo
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:1
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_i8_offset_0x100000001:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0, v0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 1, v1, vcc_lo
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:1 scope:SCOPE_SE
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_i8_offset_0x100000001:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 1, v0
+; GFX9-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_i8_offset_0x100000001:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 1, v0
+; GFX906-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_i8_offset_0x100000001:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 1, v0
+; GFX908-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_i8_offset_0x100000001:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 1, v0
+; GFX90A-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_i8_offset_0x100000001:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 1, v0
+; GFX9-4-GENERIC-ISEL-NEXT:    s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_i8_offset_0x100000001:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 1, v0
+; GFX942-ISEL-NEXT:    s_nop 1
+; GFX942-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_i8_offset_0x100000001:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 1, v0
+; GFX950-ISEL-NEXT:    s_nop 1
+; GFX950-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_i8_offset_0x100000001:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, 1
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 1, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_i8_offset_0x100000001:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, 1
+; GFX1012-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 1, v1, vcc_lo
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_i8_offset_0x100000001:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, 1
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 1, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_i8_offset_0x100000001:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, 1
+; GFX11-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 1, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_i8_offset_0x100000001:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, 1
+; GFX1250-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 1, v1, vcc_lo
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_i8_offset_0x100000001:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, 1
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 1, v1, vcc_lo
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SE
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 4294967297
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep0, metadata !1)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+define <4 x float> @global_load_i8_offset_0x100000FFF(ptr addrspace(1) %sbase) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_i8_offset_0x100000FFF:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0, v0
+; GFX9-GENERIC-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095 glc
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_i8_offset_0x100000FFF:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0, v0
+; GFX906-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095 glc
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_i8_offset_0x100000FFF:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0, v0
+; GFX908-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095 glc
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_i8_offset_0x100000FFF:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0, v0
+; GFX90A-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095 glc
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_i8_offset_0x100000FFF:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0, v0
+; GFX9-4-GENERIC-SDAG-NEXT:    s_nop 1
+; GFX9-4-GENERIC-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095 sc1
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_i8_offset_0x100000FFF:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0, v0
+; GFX942-SDAG-NEXT:    s_nop 1
+; GFX942-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095 sc1
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_i8_offset_0x100000FFF:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0, v0
+; GFX950-SDAG-NEXT:    s_nop 1
+; GFX950-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095 sc1
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_i8_offset_0x100000FFF:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 1, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2047 glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_i8_offset_0x100000FFF:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX1012-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 1, v1, vcc_lo
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2047 glc dlc
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_i8_offset_0x100000FFF:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 1, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2047 glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_i8_offset_0x100000FFF:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0, v0
+; GFX11-GENERIC-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 1, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:4095 glc
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_i8_offset_0x100000FFF:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0, v0
+; GFX1250-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 1, v1, vcc_lo
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:4095 scope:SCOPE_DEV
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_i8_offset_0x100000FFF:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0, v0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 1, v1, vcc_lo
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:4095 scope:SCOPE_DEV
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_i8_offset_0x100000FFF:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfff, v0
+; GFX9-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_i8_offset_0x100000FFF:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfff, v0
+; GFX906-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_i8_offset_0x100000FFF:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfff, v0
+; GFX908-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_i8_offset_0x100000FFF:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfff, v0
+; GFX90A-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_i8_offset_0x100000FFF:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfff, v0
+; GFX9-4-GENERIC-ISEL-NEXT:    s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc1
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_i8_offset_0x100000FFF:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfff, v0
+; GFX942-ISEL-NEXT:    s_nop 1
+; GFX942-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc1
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_i8_offset_0x100000FFF:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfff, v0
+; GFX950-ISEL-NEXT:    s_nop 1
+; GFX950-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc1
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_i8_offset_0x100000FFF:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfff, v0
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 1, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_i8_offset_0x100000FFF:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfff, v0
+; GFX1012-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 1, v1, vcc_lo
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_i8_offset_0x100000FFF:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfff, v0
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 1, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_i8_offset_0x100000FFF:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfff, v0
+; GFX11-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 1, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_i8_offset_0x100000FFF:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfff, v0
+; GFX1250-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 1, v1, vcc_lo
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off scope:SCOPE_DEV
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_i8_offset_0x100000FFF:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfff, v0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 1, v1, vcc_lo
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off scope:SCOPE_DEV
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 4294971391
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep0, metadata !2)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+define <4 x float> @global_load_i8_offset_0x100001000(ptr addrspace(1) %sbase) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_i8_offset_0x100001000:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX9-GENERIC-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_i8_offset_0x100001000:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX906-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_i8_offset_0x100001000:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX908-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_i8_offset_0x100001000:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX90A-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_i8_offset_0x100001000:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX9-4-GENERIC-SDAG-NEXT:    s_nop 1
+; GFX9-4-GENERIC-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_i8_offset_0x100001000:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX942-SDAG-NEXT:    s_nop 1
+; GFX942-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_i8_offset_0x100001000:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX950-SDAG-NEXT:    s_nop 1
+; GFX950-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_i8_offset_0x100001000:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1000, v0
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 1, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_i8_offset_0x100001000:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1000, v0
+; GFX1012-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 1, v1, vcc_lo
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_i8_offset_0x100001000:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1000, v0
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 1, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_i8_offset_0x100001000:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1000, v0
+; GFX11-GENERIC-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 1, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_i8_offset_0x100001000:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0, v0
+; GFX1250-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 1, v1, vcc_lo
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:4096 scope:SCOPE_SYS
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_i8_offset_0x100001000:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0, v0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 1, v1, vcc_lo
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:4096 scope:SCOPE_SYS
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_i8_offset_0x100001000:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX9-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_i8_offset_0x100001000:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX906-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_i8_offset_0x100001000:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX908-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_i8_offset_0x100001000:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX90A-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_i8_offset_0x100001000:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX9-4-GENERIC-ISEL-NEXT:    s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_i8_offset_0x100001000:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX942-ISEL-NEXT:    s_nop 1
+; GFX942-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_i8_offset_0x100001000:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX950-ISEL-NEXT:    s_nop 1
+; GFX950-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_i8_offset_0x100001000:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1000, v0
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 1, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_i8_offset_0x100001000:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1000, v0
+; GFX1012-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 1, v1, vcc_lo
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_i8_offset_0x100001000:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1000, v0
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 1, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_i8_offset_0x100001000:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1000, v0
+; GFX11-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 1, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_i8_offset_0x100001000:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1000, v0
+; GFX1250-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 1, v1, vcc_lo
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_i8_offset_0x100001000:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1000, v0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 1, v1, vcc_lo
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 4294971392
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep0, metadata !3)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+define <4 x float> @global_load_i8_offset_neg0xFFFFFFFF(ptr addrspace(1) %sbase) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_i8_offset_neg0xFFFFFFFF:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX9-GENERIC-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-4095
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_i8_offset_neg0xFFFFFFFF:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX906-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-4095
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_i8_offset_neg0xFFFFFFFF:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX908-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-4095
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_i8_offset_neg0xFFFFFFFF:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX90A-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-4095
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_i8_offset_neg0xFFFFFFFF:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX9-4-GENERIC-SDAG-NEXT:    s_nop 1
+; GFX9-4-GENERIC-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-4095
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_i8_offset_neg0xFFFFFFFF:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX942-SDAG-NEXT:    s_nop 1
+; GFX942-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-4095
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_i8_offset_neg0xFFFFFFFF:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX950-SDAG-NEXT:    s_nop 1
+; GFX950-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-4095
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_i8_offset_neg0xFFFFFFFF:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2047
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_i8_offset_neg0xFFFFFFFF:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX1012-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2047
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_i8_offset_neg0xFFFFFFFF:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2047
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_i8_offset_neg0xFFFFFFFF:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1000, v0
+; GFX11-GENERIC-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:-4095
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_i8_offset_neg0xFFFFFFFF:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x800000, v0
+; GFX1250-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:-8388607
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_i8_offset_neg0xFFFFFFFF:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x800000, v0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:-8388607
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_i8_offset_neg0xFFFFFFFF:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 1, v0
+; GFX9-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_i8_offset_neg0xFFFFFFFF:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 1, v0
+; GFX906-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_i8_offset_neg0xFFFFFFFF:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 1, v0
+; GFX908-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_i8_offset_neg0xFFFFFFFF:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 1, v0
+; GFX90A-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_i8_offset_neg0xFFFFFFFF:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 1, v0
+; GFX9-4-GENERIC-ISEL-NEXT:    s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_i8_offset_neg0xFFFFFFFF:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 1, v0
+; GFX942-ISEL-NEXT:    s_nop 1
+; GFX942-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_i8_offset_neg0xFFFFFFFF:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 1, v0
+; GFX950-ISEL-NEXT:    s_nop 1
+; GFX950-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_i8_offset_neg0xFFFFFFFF:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, 1
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_i8_offset_neg0xFFFFFFFF:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, 1
+; GFX1012-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_i8_offset_neg0xFFFFFFFF:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, 1
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_i8_offset_neg0xFFFFFFFF:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, 1
+; GFX11-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_i8_offset_neg0xFFFFFFFF:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, 1
+; GFX1250-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_i8_offset_neg0xFFFFFFFF:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, 1
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 -4294967295
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep0, metadata !0)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+define <4 x float> @global_load_i8_offset_neg0x100000000(ptr addrspace(1) %sbase) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_i8_offset_neg0x100000000:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    v_add_u32_e32 v1, -1, v1
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_i8_offset_neg0x100000000:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    v_add_u32_e32 v1, -1, v1
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_i8_offset_neg0x100000000:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    v_add_u32_e32 v1, -1, v1
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_i8_offset_neg0x100000000:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    v_add_u32_e32 v1, -1, v1
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_i8_offset_neg0x100000000:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    v_add_u32_e32 v1, -1, v1
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_i8_offset_neg0x100000000:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    v_add_u32_e32 v1, -1, v1
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_i8_offset_neg0x100000000:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    v_add_u32_e32 v1, -1, v1
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_i8_offset_neg0x100000000:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_nc_u32_e32 v1, -1, v1
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_i8_offset_neg0x100000000:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    v_add_nc_u32_e32 v1, -1, v1
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_i8_offset_neg0x100000000:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_nc_u32_e32 v1, -1, v1
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_i8_offset_neg0x100000000:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_nc_u32_e32 v1, -1, v1
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_i8_offset_neg0x100000000:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    v_add_nc_u32_e32 v1, -1, v1
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_i8_offset_neg0x100000000:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    v_add_nc_u32_e32 v1, -1, v1
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SE
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_i8_offset_neg0x100000000:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0, v0
+; GFX9-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_i8_offset_neg0x100000000:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0, v0
+; GFX906-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_i8_offset_neg0x100000000:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0, v0
+; GFX908-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_i8_offset_neg0x100000000:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0, v0
+; GFX90A-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_i8_offset_neg0x100000000:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0, v0
+; GFX9-4-GENERIC-ISEL-NEXT:    s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_i8_offset_neg0x100000000:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0, v0
+; GFX942-ISEL-NEXT:    s_nop 1
+; GFX942-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_i8_offset_neg0x100000000:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0, v0
+; GFX950-ISEL-NEXT:    s_nop 1
+; GFX950-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_i8_offset_neg0x100000000:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, 0
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_i8_offset_neg0x100000000:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, 0
+; GFX1012-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_i8_offset_neg0x100000000:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, 0
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_i8_offset_neg0x100000000:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, 0
+; GFX11-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_i8_offset_neg0x100000000:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, 0
+; GFX1250-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_i8_offset_neg0x100000000:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, 0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SE
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 -4294967296
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep0, metadata !1)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+define <4 x float> @global_load_i8_offset_neg0x100000001(ptr addrspace(1) %sbase) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_i8_offset_neg0x100000001:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0, v0
+; GFX9-GENERIC-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_i8_offset_neg0x100000001:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0, v0
+; GFX906-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_i8_offset_neg0x100000001:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0, v0
+; GFX908-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_i8_offset_neg0x100000001:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0, v0
+; GFX90A-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_i8_offset_neg0x100000001:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0, v0
+; GFX9-4-GENERIC-SDAG-NEXT:    s_nop 1
+; GFX9-4-GENERIC-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-1 sc1
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_i8_offset_neg0x100000001:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0, v0
+; GFX942-SDAG-NEXT:    s_nop 1
+; GFX942-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-1 sc1
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_i8_offset_neg0x100000001:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0, v0
+; GFX950-SDAG-NEXT:    s_nop 1
+; GFX950-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-1 sc1
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_i8_offset_neg0x100000001:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0, v0
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_i8_offset_neg0x100000001:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0, v0
+; GFX1012-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc dlc
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_i8_offset_neg0x100000001:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0, v0
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_i8_offset_neg0x100000001:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0, v0
+; GFX11-GENERIC-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:-1 glc
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_i8_offset_neg0x100000001:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0, v0
+; GFX1250-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:-1 scope:SCOPE_DEV
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_i8_offset_neg0x100000001:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0, v0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:-1 scope:SCOPE_DEV
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_i8_offset_neg0x100000001:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, -1, v0
+; GFX9-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, -2, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_i8_offset_neg0x100000001:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, -1, v0
+; GFX906-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, -2, v1, vcc
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_i8_offset_neg0x100000001:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, -1, v0
+; GFX908-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, -2, v1, vcc
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_i8_offset_neg0x100000001:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, -1, v0
+; GFX90A-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, -2, v1, vcc
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_i8_offset_neg0x100000001:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, -1, v0
+; GFX9-4-GENERIC-ISEL-NEXT:    s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, -2, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc1
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_i8_offset_neg0x100000001:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, -1, v0
+; GFX942-ISEL-NEXT:    s_nop 1
+; GFX942-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, -2, v1, vcc
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc1
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_i8_offset_neg0x100000001:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, -1, v0
+; GFX950-ISEL-NEXT:    s_nop 1
+; GFX950-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, -2, v1, vcc
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc1
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_i8_offset_neg0x100000001:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, -1
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -2, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_i8_offset_neg0x100000001:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, -1
+; GFX1012-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -2, v1, vcc_lo
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_i8_offset_neg0x100000001:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, -1
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, -2, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_i8_offset_neg0x100000001:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, -1
+; GFX11-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, -2, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_i8_offset_neg0x100000001:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, -1
+; GFX1250-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, -2, v1, vcc_lo
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off scope:SCOPE_DEV
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_i8_offset_neg0x100000001:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, -1
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, -2, v1, vcc_lo
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off scope:SCOPE_DEV
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 -4294967297
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep0, metadata !2)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+;;------------------------------------------------------------------------------
+;; Basic addressing patterns
+;;------------------------------------------------------------------------------
+
+;; Basic pattern, no immediate offset.
+define <4 x float> @global_load_i8_zext_vgpr(ptr addrspace(1) %sbase, i32 %voffset) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_i8_zext_vgpr:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_i8_zext_vgpr:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_i8_zext_vgpr:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX9-4-GENERIC-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_i8_zext_vgpr:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX942-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_i8_zext_vgpr:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX950-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_i8_zext_vgpr:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_i8_zext_vgpr:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX1250-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT:    v_add_nc_u64_e32 v[0:1], v[0:1], v[2:3]
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_i8_zext_vgpr:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_i8_zext_vgpr:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_i8_zext_vgpr:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-4-GENERIC-ISEL-NEXT:    s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_i8_zext_vgpr:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX942-ISEL-NEXT:    s_nop 1
+; GFX942-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_i8_zext_vgpr:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX950-ISEL-NEXT:    s_nop 1
+; GFX950-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_i8_zext_vgpr:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_i8_zext_vgpr:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1250-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %zext.offset = zext i32 %voffset to i64
+  %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep0, metadata !3)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+;; Maximum positive offset on gfx9
+define <4 x float> @global_load_i8_zext_vgpr_offset_4095(ptr addrspace(1) %sbase, i32 %voffset) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4095:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4095:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4095:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4095:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4095:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX9-4-GENERIC-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4095:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX942-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4095:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX950-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4095:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2047
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4095:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX1012-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2047
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4095:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2047
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4095:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:4095
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4095:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX1250-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT:    v_add_nc_u64_e32 v[0:1], v[0:1], v[2:3]
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:4095
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4095:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:4095
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4095:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4095:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4095:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4095:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4095:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-4-GENERIC-ISEL-NEXT:    s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4095:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX942-ISEL-NEXT:    s_nop 1
+; GFX942-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4095:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX950-ISEL-NEXT:    s_nop 1
+; GFX950-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4095:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfff, v0
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4095:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfff, v0
+; GFX1012-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4095:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfff, v0
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4095:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:4095
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4095:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1250-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:4095
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4095:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:4095
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %zext.offset = zext i32 %voffset to i64
+  %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
+  %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 4095
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep1, metadata !0)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+;; Maximum positive offset on gfx9 + 1
+define <4 x float> @global_load_i8_zext_vgpr_offset_4096(ptr addrspace(1) %sbase, i32 %voffset) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4096:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX9-GENERIC-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4096:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX906-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4096:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX908-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4096:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX90A-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4096:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX9-4-GENERIC-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX9-4-GENERIC-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX9-4-GENERIC-SDAG-NEXT:    s_nop 1
+; GFX9-4-GENERIC-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4096:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX942-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX942-SDAG-NEXT:    s_nop 1
+; GFX942-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4096:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX950-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX950-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX950-SDAG-NEXT:    s_nop 1
+; GFX950-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4096:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1000, v0
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4096:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1000, v0
+; GFX1012-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4096:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1000, v0
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4096:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1000, v0
+; GFX11-GENERIC-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4096:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX1250-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT:    v_add_nc_u64_e32 v[0:1], v[0:1], v[2:3]
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:4096
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4096:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:4096 scope:SCOPE_SE
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4096:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX9-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4096:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX906-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4096:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX908-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4096:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX90A-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4096:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-4-GENERIC-ISEL-NEXT:    s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX9-4-GENERIC-ISEL-NEXT:    s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4096:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX942-ISEL-NEXT:    s_nop 1
+; GFX942-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX942-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX942-ISEL-NEXT:    s_nop 1
+; GFX942-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4096:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX950-ISEL-NEXT:    s_nop 1
+; GFX950-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX950-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX950-ISEL-NEXT:    s_nop 1
+; GFX950-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4096:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1000, v0
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4096:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1000, v0
+; GFX1012-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4096:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1000, v0
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4096:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1000, v0
+; GFX11-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4096:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1250-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:4096
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4096:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:4096 scope:SCOPE_SE
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %zext.offset = zext i32 %voffset to i64
+  %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
+  %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 4096
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep1, metadata !1)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+;; Maximum negative offset on gfx9
+define <4 x float> @global_load_i8_zext_vgpr_offset_neg4096(ptr addrspace(1) %sbase, i32 %voffset) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg4096:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-4096 glc
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg4096:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-4096 glc
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg4096:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-4096 glc
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg4096:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-4096 glc
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg4096:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX9-4-GENERIC-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-4096 sc1
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg4096:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX942-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-4096 sc1
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg4096:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX950-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-4096 sc1
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg4096:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg4096:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
+; GFX1012-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg4096:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg4096:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:-4096 glc
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg4096:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX1250-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT:    v_add_nc_u64_e32 v[0:1], v[0:1], v[2:3]
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:-4096 scope:SCOPE_DEV
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg4096:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:-4096 scope:SCOPE_DEV
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg4096:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-4096 glc
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg4096:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-4096 glc
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg4096:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-4096 glc
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg4096:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-4096 glc
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg4096:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-4-GENERIC-ISEL-NEXT:    s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-4096 sc1
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg4096:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX942-ISEL-NEXT:    s_nop 1
+; GFX942-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-4096 sc1
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg4096:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX950-ISEL-NEXT:    s_nop 1
+; GFX950-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-4096 sc1
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg4096:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg4096:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
+; GFX1012-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg4096:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg4096:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:-4096 glc
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg4096:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1250-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:-4096 scope:SCOPE_DEV
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg4096:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:-4096 scope:SCOPE_DEV
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %zext.offset = zext i32 %voffset to i64
+  %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
+  %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -4096
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep1, metadata !2)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+;; Maximum negative offset on gfx9 - 1
+define <4 x float> @global_load_i8_zext_vgpr_offset_neg4097(ptr addrspace(1) %sbase, i32 %voffset) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg4097:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
+; GFX9-GENERIC-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg4097:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
+; GFX906-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg4097:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
+; GFX908-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg4097:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
+; GFX90A-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg4097:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX9-4-GENERIC-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX9-4-GENERIC-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
+; GFX9-4-GENERIC-SDAG-NEXT:    s_nop 1
+; GFX9-4-GENERIC-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-1 sc0 sc1
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg4097:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX942-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
+; GFX942-SDAG-NEXT:    s_nop 1
+; GFX942-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-1 sc0 sc1
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg4097:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX950-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX950-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
+; GFX950-SDAG-NEXT:    s_nop 1
+; GFX950-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-1 sc0 sc1
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg4097:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg4097:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
+; GFX1012-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc dlc
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg4097:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg4097:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
+; GFX11-GENERIC-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:-1 glc
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg4097:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX1250-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT:    v_add_nc_u64_e32 v[0:1], v[0:1], v[2:3]
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:-4097 scope:SCOPE_SYS
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg4097:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:-4097 scope:SCOPE_SYS
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg4097:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0xffffefff, v0
+; GFX9-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg4097:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0xffffefff, v0
+; GFX906-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg4097:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0xffffefff, v0
+; GFX908-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg4097:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0xffffefff, v0
+; GFX90A-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg4097:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-4-GENERIC-ISEL-NEXT:    s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0xffffefff, v0
+; GFX9-4-GENERIC-ISEL-NEXT:    s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg4097:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX942-ISEL-NEXT:    s_nop 1
+; GFX942-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX942-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0xffffefff, v0
+; GFX942-ISEL-NEXT:    s_nop 1
+; GFX942-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg4097:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX950-ISEL-NEXT:    s_nop 1
+; GFX950-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX950-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0xffffefff, v0
+; GFX950-ISEL-NEXT:    s_nop 1
+; GFX950-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg4097:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0xffffefff, v0
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg4097:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0xffffefff, v0
+; GFX1012-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg4097:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0xffffefff, v0
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg4097:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0xffffefff, v0
+; GFX11-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg4097:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1250-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:-4097 scope:SCOPE_SYS
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg4097:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:-4097 scope:SCOPE_SYS
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %zext.offset = zext i32 %voffset to i64
+  %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
+  %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -4097
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep1, metadata !3)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+;; Maximum positive offset on gfx10
+define <4 x float> @global_load_i8_zext_vgpr_offset_2047(ptr addrspace(1) %sbase, i32 %voffset) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_2047:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2047
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_i8_zext_vgpr_offset_2047:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2047
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_i8_zext_vgpr_offset_2047:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2047
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_i8_zext_vgpr_offset_2047:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2047
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_2047:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX9-4-GENERIC-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2047
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_i8_zext_vgpr_offset_2047:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX942-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2047
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_i8_zext_vgpr_offset_2047:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX950-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2047
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_2047:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2047
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_i8_zext_vgpr_offset_2047:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2047
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_2047:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2047
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_2047:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:2047
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_i8_zext_vgpr_offset_2047:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX1250-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT:    v_add_nc_u64_e32 v[0:1], v[0:1], v[2:3]
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:2047
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_2047:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:2047
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_2047:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2047
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_i8_zext_vgpr_offset_2047:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2047
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_i8_zext_vgpr_offset_2047:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2047
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_i8_zext_vgpr_offset_2047:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2047
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_2047:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-4-GENERIC-ISEL-NEXT:    s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2047
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_i8_zext_vgpr_offset_2047:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX942-ISEL-NEXT:    s_nop 1
+; GFX942-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2047
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_i8_zext_vgpr_offset_2047:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX950-ISEL-NEXT:    s_nop 1
+; GFX950-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2047
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_2047:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2047
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_i8_zext_vgpr_offset_2047:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2047
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_2047:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2047
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_2047:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:2047
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_i8_zext_vgpr_offset_2047:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1250-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:2047
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_2047:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:2047
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %zext.offset = zext i32 %voffset to i64
+  %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
+  %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 2047
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep1, metadata !0)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+;; Maximum positive offset on gfx10 + 1
+define <4 x float> @global_load_i8_zext_vgpr_offset_2048(ptr addrspace(1) %sbase, i32 %voffset) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_2048:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2048
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_i8_zext_vgpr_offset_2048:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2048
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_i8_zext_vgpr_offset_2048:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2048
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_i8_zext_vgpr_offset_2048:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2048
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_2048:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX9-4-GENERIC-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2048 sc0
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_i8_zext_vgpr_offset_2048:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX942-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2048 sc0
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_i8_zext_vgpr_offset_2048:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX950-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2048 sc0
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_2048:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_i8_zext_vgpr_offset_2048:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX1012-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_2048:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_2048:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:2048 glc
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_i8_zext_vgpr_offset_2048:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX1250-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT:    v_add_nc_u64_e32 v[0:1], v[0:1], v[2:3]
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:2048
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_2048:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:2048 scope:SCOPE_SE
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_2048:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2048
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_i8_zext_vgpr_offset_2048:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2048
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_i8_zext_vgpr_offset_2048:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2048
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_i8_zext_vgpr_offset_2048:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2048
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_2048:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-4-GENERIC-ISEL-NEXT:    s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2048 sc0
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_i8_zext_vgpr_offset_2048:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX942-ISEL-NEXT:    s_nop 1
+; GFX942-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2048 sc0
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_i8_zext_vgpr_offset_2048:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX950-ISEL-NEXT:    s_nop 1
+; GFX950-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2048 sc0
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_2048:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_i8_zext_vgpr_offset_2048:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX1012-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_2048:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_2048:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:2048 glc
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_i8_zext_vgpr_offset_2048:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1250-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:2048
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_2048:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:2048 scope:SCOPE_SE
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %zext.offset = zext i32 %voffset to i64
+  %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
+  %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 2048
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep1, metadata !1)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+;; Maximum negative offset on gfx10
+define <4 x float> @global_load_i8_zext_vgpr_offset_neg2048(ptr addrspace(1) %sbase, i32 %voffset) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg2048:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 glc
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg2048:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 glc
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg2048:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 glc
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg2048:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 glc
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg2048:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX9-4-GENERIC-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 sc1
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg2048:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX942-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 sc1
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg2048:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX950-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 sc1
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg2048:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg2048:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 glc dlc
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg2048:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg2048:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:-2048 glc
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg2048:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX1250-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT:    v_add_nc_u64_e32 v[0:1], v[0:1], v[2:3]
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:-2048 scope:SCOPE_DEV
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg2048:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:-2048 scope:SCOPE_DEV
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg2048:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 glc
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg2048:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 glc
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg2048:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 glc
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg2048:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 glc
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg2048:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-4-GENERIC-ISEL-NEXT:    s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 sc1
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg2048:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX942-ISEL-NEXT:    s_nop 1
+; GFX942-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 sc1
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg2048:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX950-ISEL-NEXT:    s_nop 1
+; GFX950-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 sc1
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg2048:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg2048:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 glc dlc
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg2048:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg2048:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:-2048 glc
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg2048:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1250-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:-2048 scope:SCOPE_DEV
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg2048:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:-2048 scope:SCOPE_DEV
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %zext.offset = zext i32 %voffset to i64
+  %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
+  %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -2048
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep1, metadata !2)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+;; Maximum negative offset on gfx10 - 1
+define <4 x float> @global_load_i8_zext_vgpr_offset_neg2049(ptr addrspace(1) %sbase, i32 %voffset) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg2049:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2049 glc
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg2049:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2049 glc
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg2049:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2049 glc
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg2049:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2049 glc
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg2049:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX9-4-GENERIC-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2049 sc0 sc1
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg2049:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX942-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2049 sc0 sc1
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg2049:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX950-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2049 sc0 sc1
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg2049:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfffff800, v0
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg2049:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfffff800, v0
+; GFX1012-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc dlc
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg2049:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfffff800, v0
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg2049:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:-2049 glc
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg2049:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX1250-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT:    v_add_nc_u64_e32 v[0:1], v[0:1], v[2:3]
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:-2049 scope:SCOPE_SYS
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg2049:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:-2049 scope:SCOPE_SYS
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg2049:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2049 glc
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg2049:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2049 glc
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg2049:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2049 glc
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg2049:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2049 glc
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg2049:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-4-GENERIC-ISEL-NEXT:    s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2049 sc0 sc1
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg2049:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX942-ISEL-NEXT:    s_nop 1
+; GFX942-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2049 sc0 sc1
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg2049:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX950-ISEL-NEXT:    s_nop 1
+; GFX950-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2049 sc0 sc1
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg2049:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfffff7ff, v0
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg2049:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfffff7ff, v0
+; GFX1012-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg2049:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfffff7ff, v0
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg2049:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:-2049 glc
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg2049:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1250-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:-2049 scope:SCOPE_SYS
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg2049:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:-2049 scope:SCOPE_SYS
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %zext.offset = zext i32 %voffset to i64
+  %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
+  %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -2049
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep1, metadata !3)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+;; Maximum positive offset on gfx12.
+define <4 x float> @global_load_i8_zext_vgpr_offset_0x7FFFFF(ptr addrspace(1) %sbase, i32 %voffset) { %zext.offset = zext i32 %voffset to i64
+; GFX9-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x7ff000, v0
+; GFX9-GENERIC-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x7ff000, v0
+; GFX906-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x7ff000, v0
+; GFX908-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x7ff000, v0
+; GFX90A-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX9-4-GENERIC-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX9-4-GENERIC-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x7ff000, v0
+; GFX9-4-GENERIC-SDAG-NEXT:    s_nop 1
+; GFX9-4-GENERIC-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX942-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x7ff000, v0
+; GFX942-SDAG-NEXT:    s_nop 1
+; GFX942-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX950-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX950-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x7ff000, v0
+; GFX950-SDAG-NEXT:    s_nop 1
+; GFX950-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x7ff800, v0
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2047
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x7ff800, v0
+; GFX1012-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2047
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x7ff800, v0
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2047
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x7ff000, v0
+; GFX11-GENERIC-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:4095
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX1250-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT:    v_add_nc_u64_e32 v[0:1], v[0:1], v[2:3]
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:8388607
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:8388607
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0x7fffff, v0
+; GFX9-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0x7fffff, v0
+; GFX906-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0x7fffff, v0
+; GFX908-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0x7fffff, v0
+; GFX90A-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-4-GENERIC-ISEL-NEXT:    s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0x7fffff, v0
+; GFX9-4-GENERIC-ISEL-NEXT:    s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX942-ISEL-NEXT:    s_nop 1
+; GFX942-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX942-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0x7fffff, v0
+; GFX942-ISEL-NEXT:    s_nop 1
+; GFX942-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX950-ISEL-NEXT:    s_nop 1
+; GFX950-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX950-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0x7fffff, v0
+; GFX950-ISEL-NEXT:    s_nop 1
+; GFX950-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0x7fffff, v0
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0x7fffff, v0
+; GFX1012-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0x7fffff, v0
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0x7fffff, v0
+; GFX11-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1250-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:8388607
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:8388607
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
+  %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 8388607
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep1, metadata !0)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+;; Minimum offset on gfx12.
+define <4 x float> @global_load_i8_zext_vgpr_offset_0xFFFFFF(ptr addrspace(1) %sbase, i32 %voffset) { %zext.offset = zext i32 %voffset to i64
+; GFX9-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0xff800000, v0
+; GFX9-GENERIC-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0xff800000, v0
+; GFX906-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0xff800000, v0
+; GFX908-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0xff800000, v0
+; GFX90A-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX9-4-GENERIC-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX9-4-GENERIC-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0xff800000, v0
+; GFX9-4-GENERIC-SDAG-NEXT:    s_nop 1
+; GFX9-4-GENERIC-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX942-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0xff800000, v0
+; GFX942-SDAG-NEXT:    s_nop 1
+; GFX942-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX950-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX950-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0xff800000, v0
+; GFX950-SDAG-NEXT:    s_nop 1
+; GFX950-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0xff800000, v0
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0xff800000, v0
+; GFX1012-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0xff800000, v0
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0xff800000, v0
+; GFX11-GENERIC-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX1250-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT:    v_add_nc_u64_e32 v[0:1], v[0:1], v[2:3]
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:-8388608
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:-8388608 scope:SCOPE_SE
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0xff800000, v0
+; GFX9-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0xff800000, v0
+; GFX906-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0xff800000, v0
+; GFX908-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0xff800000, v0
+; GFX90A-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-4-GENERIC-ISEL-NEXT:    s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0xff800000, v0
+; GFX9-4-GENERIC-ISEL-NEXT:    s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX942-ISEL-NEXT:    s_nop 1
+; GFX942-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX942-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0xff800000, v0
+; GFX942-ISEL-NEXT:    s_nop 1
+; GFX942-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX950-ISEL-NEXT:    s_nop 1
+; GFX950-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX950-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0xff800000, v0
+; GFX950-ISEL-NEXT:    s_nop 1
+; GFX950-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0xff800000, v0
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0xff800000, v0
+; GFX1012-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0xff800000, v0
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0xff800000, v0
+; GFX11-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1250-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:-8388608
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:-8388608 scope:SCOPE_SE
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
+  %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -8388608
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep1, metadata !1)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+
+;; Maximum positive offset on gfx9, and immediate needs to be moved lower.
+define <4 x float> @global_load_i8_zext_vgpr_offset_4095_gep_order(ptr addrspace(1) %sbase, i32 %voffset) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4095_gep_order:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095 glc
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4095_gep_order:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095 glc
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4095_gep_order:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095 glc
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4095_gep_order:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095 glc
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4095_gep_order:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX9-4-GENERIC-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095 sc1
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4095_gep_order:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX942-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095 sc1
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4095_gep_order:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX950-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095 sc1
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4095_gep_order:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2047 glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4095_gep_order:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX1012-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2047 glc dlc
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4095_gep_order:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2047 glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4095_gep_order:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:4095 glc
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4095_gep_order:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX1250-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT:    v_add_nc_u64_e32 v[0:1], v[0:1], v[2:3]
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:4095 scope:SCOPE_DEV
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4095_gep_order:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:4095 scope:SCOPE_DEV
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4095_gep_order:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095 glc
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4095_gep_order:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095 glc
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4095_gep_order:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095 glc
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4095_gep_order:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095 glc
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4095_gep_order:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-4-GENERIC-ISEL-NEXT:    s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095 sc1
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4095_gep_order:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX942-ISEL-NEXT:    s_nop 1
+; GFX942-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095 sc1
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4095_gep_order:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX950-ISEL-NEXT:    s_nop 1
+; GFX950-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095 sc1
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4095_gep_order:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfff, v0
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4095_gep_order:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfff, v0
+; GFX1012-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4095_gep_order:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfff, v0
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4095_gep_order:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:4095 glc
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4095_gep_order:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1250-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:4095 scope:SCOPE_DEV
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4095_gep_order:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:4095 scope:SCOPE_DEV
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %zext.offset = zext i32 %voffset to i64
+  %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 4095
+  %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 %zext.offset
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep1, metadata !2)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+;; pointer addressing done in integers
+define <4 x float> @global_load_i8_zext_vgpr_ptrtoint(ptr addrspace(1) %sbase, i32 %voffset) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX9-4-GENERIC-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX942-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX950-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX1250-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT:    v_add_nc_u64_e32 v[0:1], v[0:1], v[2:3]
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-4-GENERIC-ISEL-NEXT:    s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX942-ISEL-NEXT:    s_nop 1
+; GFX942-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX950-ISEL-NEXT:    s_nop 1
+; GFX950-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1250-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %zext.offset = zext i32 %voffset to i64
+  %sbase.as.int = ptrtoint ptr addrspace(1) %sbase to i64
+  %add = add i64 %sbase.as.int, %zext.offset
+  %dirty.gep = inttoptr i64 %add to ptr addrspace(1)
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %dirty.gep, metadata !3)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+;; zext forced to LHS of addressing expression
+define <4 x float> @global_load_i8_zext_vgpr_ptrtoint_commute_add(ptr addrspace(1) %sbase, i32 %voffset) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v2, v0
+; GFX9-GENERIC-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v2, v0
+; GFX906-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v2, v0
+; GFX908-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v2, v0
+; GFX90A-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX9-4-GENERIC-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[2:3], 0, v[0:1]
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX942-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[2:3], 0, v[0:1]
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX950-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[2:3], 0, v[0:1]
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v2, v0
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v2, v0
+; GFX1012-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v2, v0
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v2, v0
+; GFX11-GENERIC-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX1250-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT:    v_add_nc_u64_e32 v[0:1], v[2:3], v[0:1]
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v2, v0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-4-GENERIC-ISEL-NEXT:    s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX942-ISEL-NEXT:    s_nop 1
+; GFX942-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX950-ISEL-NEXT:    s_nop 1
+; GFX950-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1250-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %zext.offset = zext i32 %voffset to i64
+  %sbase.as.int = ptrtoint ptr addrspace(1) %sbase to i64
+  %add = add i64 %zext.offset, %sbase.as.int
+  %dirty.gep = inttoptr i64 %add to ptr addrspace(1)
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %dirty.gep, metadata !0)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+;; zext forced to LHS of addressing expression, with immediate offset
+define <4 x float> @global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0(ptr addrspace(1) %sbase, i32 %voffset) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v2, v0
+; GFX9-GENERIC-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:128
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v2, v0
+; GFX906-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:128
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v2, v0
+; GFX908-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:128
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v2, v0
+; GFX90A-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:128
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX9-4-GENERIC-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[2:3], 0, v[0:1]
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:128 sc0
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX942-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[2:3], 0, v[0:1]
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:128 sc0
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX950-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[2:3], 0, v[0:1]
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:128 sc0
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v2, v0
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v2, v0
+; GFX1012-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v2, v0
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v2, v0
+; GFX11-GENERIC-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:128 glc
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX1250-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT:    v_add_nc_u64_e32 v[0:1], v[2:3], v[0:1]
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:128
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v2, v0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:128 scope:SCOPE_SE
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:128
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:128
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:128
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:128
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-4-GENERIC-ISEL-NEXT:    s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:128 sc0
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX942-ISEL-NEXT:    s_nop 1
+; GFX942-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:128 sc0
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX950-ISEL-NEXT:    s_nop 1
+; GFX950-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:128 sc0
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:128 glc
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1250-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:128
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:128 scope:SCOPE_SE
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %zext.offset = zext i32 %voffset to i64
+  %sbase.as.int = ptrtoint ptr addrspace(1) %sbase to i64
+  %add = add i64 %zext.offset, %sbase.as.int
+  %add.immoffset = add i64 %add, 128
+  %dirty.gep = inttoptr i64 %add.immoffset to ptr addrspace(1)
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %dirty.gep, metadata !1)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+;; zext forced to LHS of addressing expression, with immediate offset in non-canonical position
+define <4 x float> @global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1(ptr addrspace(1) %sbase, i32 %voffset) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX9-4-GENERIC-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:128 sc1
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX942-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:128 sc1
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX950-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:128 sc1
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc dlc
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:128 glc
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX1250-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT:    v_add_nc_u64_e32 v[0:1], v[0:1], v[2:3]
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:128 scope:SCOPE_DEV
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:128 scope:SCOPE_DEV
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-4-GENERIC-ISEL-NEXT:    s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:128 sc1
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX942-ISEL-NEXT:    s_nop 1
+; GFX942-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:128 sc1
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX950-ISEL-NEXT:    s_nop 1
+; GFX950-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:128 sc1
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc dlc
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:128 glc
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1250-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:128 scope:SCOPE_DEV
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:128 scope:SCOPE_DEV
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %zext.offset = zext i32 %voffset to i64
+  %sbase.as.int = ptrtoint ptr addrspace(1) %sbase to i64
+  %add.immoffset = add i64 %sbase.as.int, 128
+  %add = add i64 %zext.offset, %add.immoffset
+  %dirty.gep = inttoptr i64 %add to ptr addrspace(1)
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %dirty.gep, metadata !2)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+;;------------------------------------------------------------------------------
+;; Uniformity edge cases
+;;------------------------------------------------------------------------------
+
+;; Both 64-bit base and 32-bit offset are scalar
+define <4 x float> @global_load_i8_zext_uniform_offset(ptr addrspace(1) %sbase, i32 %soffset) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_i8_zext_uniform_offset:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_i8_zext_uniform_offset:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_i8_zext_uniform_offset:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_i8_zext_uniform_offset:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_i8_zext_uniform_offset:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX9-4-GENERIC-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_i8_zext_uniform_offset:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX942-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_i8_zext_uniform_offset:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX950-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_i8_zext_uniform_offset:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_i8_zext_uniform_offset:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_i8_zext_uniform_offset:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_i8_zext_uniform_offset:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_i8_zext_uniform_offset:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX1250-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT:    v_add_nc_u64_e32 v[0:1], v[0:1], v[2:3]
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_i8_zext_uniform_offset:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_i8_zext_uniform_offset:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_i8_zext_uniform_offset:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_i8_zext_uniform_offset:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_i8_zext_uniform_offset:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_i8_zext_uniform_offset:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-4-GENERIC-ISEL-NEXT:    s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_i8_zext_uniform_offset:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX942-ISEL-NEXT:    s_nop 1
+; GFX942-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_i8_zext_uniform_offset:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX950-ISEL-NEXT:    s_nop 1
+; GFX950-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_i8_zext_uniform_offset:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_i8_zext_uniform_offset:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_i8_zext_uniform_offset:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_i8_zext_uniform_offset:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_i8_zext_uniform_offset:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1250-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_i8_zext_uniform_offset:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %zext.offset = zext i32 %soffset to i64
+  %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep0, metadata !3)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+;; Both 64-bit base and 32-bit offset are scalar, with immediate offset.
+define <4 x float> @global_load_i8_zext_uniform_offset_immoffset(ptr addrspace(1) %sbase, i32 %soffset) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_i8_zext_uniform_offset_immoffset:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-24
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_i8_zext_uniform_offset_immoffset:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-24
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_i8_zext_uniform_offset_immoffset:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-24
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_i8_zext_uniform_offset_immoffset:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-24
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_i8_zext_uniform_offset_immoffset:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX9-4-GENERIC-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-24
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_i8_zext_uniform_offset_immoffset:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX942-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-24
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_i8_zext_uniform_offset_immoffset:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX950-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-24
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_i8_zext_uniform_offset_immoffset:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-24
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_i8_zext_uniform_offset_immoffset:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-24
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_i8_zext_uniform_offset_immoffset:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-24
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_i8_zext_uniform_offset_immoffset:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:-24
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_i8_zext_uniform_offset_immoffset:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX1250-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT:    v_add_nc_u64_e32 v[0:1], v[0:1], v[2:3]
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:-24
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_i8_zext_uniform_offset_immoffset:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:-24
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_i8_zext_uniform_offset_immoffset:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-24
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_i8_zext_uniform_offset_immoffset:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-24
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_i8_zext_uniform_offset_immoffset:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-24
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_i8_zext_uniform_offset_immoffset:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-24
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_i8_zext_uniform_offset_immoffset:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-4-GENERIC-ISEL-NEXT:    s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-24
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_i8_zext_uniform_offset_immoffset:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX942-ISEL-NEXT:    s_nop 1
+; GFX942-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-24
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_i8_zext_uniform_offset_immoffset:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX950-ISEL-NEXT:    s_nop 1
+; GFX950-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-24
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_i8_zext_uniform_offset_immoffset:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-24
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_i8_zext_uniform_offset_immoffset:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-24
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_i8_zext_uniform_offset_immoffset:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-24
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_i8_zext_uniform_offset_immoffset:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:-24
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_i8_zext_uniform_offset_immoffset:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1250-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:-24
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_i8_zext_uniform_offset_immoffset:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:-24
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %zext.offset = zext i32 %soffset to i64
+  %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
+  %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -24
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep1, metadata !0)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+;; Both components uniform, zext forced to LHS of addressing expression
+define <4 x float> @global_load_i8_zext_sgpr_ptrtoint_commute_add(ptr addrspace(1) %sbase, i32 %soffset) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v2, v0
+; GFX9-GENERIC-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v2, v0
+; GFX906-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v2, v0
+; GFX908-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v2, v0
+; GFX90A-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX9-4-GENERIC-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[2:3], 0, v[0:1]
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX942-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[2:3], 0, v[0:1]
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX950-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[2:3], 0, v[0:1]
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v2, v0
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v2, v0
+; GFX1012-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v2, v0
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v2, v0
+; GFX11-GENERIC-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX1250-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT:    v_add_nc_u64_e32 v[0:1], v[2:3], v[0:1]
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v2, v0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SE
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-4-GENERIC-ISEL-NEXT:    s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX942-ISEL-NEXT:    s_nop 1
+; GFX942-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX950-ISEL-NEXT:    s_nop 1
+; GFX950-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1250-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SE
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %zext.offset = zext i32 %soffset to i64
+  %sbase.as.int = ptrtoint ptr addrspace(1) %sbase to i64
+  %add = add i64 %zext.offset, %sbase.as.int
+  %dirty.gep = inttoptr i64 %add to ptr addrspace(1)
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %dirty.gep, metadata !1)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+;; Both components uniform, zext forced to LHS of addressing expression, with immediate offset
+define <4 x float> @global_load_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0(ptr addrspace(1) %sbase, i32 %soffset) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v2, v0
+; GFX9-GENERIC-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v2, v0
+; GFX906-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v2, v0
+; GFX908-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v2, v0
+; GFX90A-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX9-4-GENERIC-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[2:3], 0, v[0:1]
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:128 sc1
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX942-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[2:3], 0, v[0:1]
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:128 sc1
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX950-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[2:3], 0, v[0:1]
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:128 sc1
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v2, v0
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v2, v0
+; GFX1012-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc dlc
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v2, v0
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v2, v0
+; GFX11-GENERIC-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:128 glc
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX1250-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT:    v_add_nc_u64_e32 v[0:1], v[2:3], v[0:1]
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:128 scope:SCOPE_DEV
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v2, v0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:128 scope:SCOPE_DEV
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-4-GENERIC-ISEL-NEXT:    s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:128 sc1
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX942-ISEL-NEXT:    s_nop 1
+; GFX942-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:128 sc1
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX950-ISEL-NEXT:    s_nop 1
+; GFX950-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:128 sc1
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc dlc
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:128 glc
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1250-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:128 scope:SCOPE_DEV
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:128 scope:SCOPE_DEV
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %zext.offset = zext i32 %soffset to i64
+  %sbase.as.int = ptrtoint ptr addrspace(1) %sbase to i64
+  %add = add i64 %zext.offset, %sbase.as.int
+  %add.immoffset = add i64 %add, 128
+  %dirty.gep = inttoptr i64 %add.immoffset to ptr addrspace(1)
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %dirty.gep, metadata !2)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+;; divergent 64-bit base, 32-bit scalar offset.
+define <4 x float> @global_load_i8_vgpr64_sgpr32(ptr addrspace(1) %vbase, i32 %soffset) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_i8_vgpr64_sgpr32:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_i8_vgpr64_sgpr32:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_i8_vgpr64_sgpr32:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_i8_vgpr64_sgpr32:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_i8_vgpr64_sgpr32:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX9-4-GENERIC-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_i8_vgpr64_sgpr32:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX942-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_i8_vgpr64_sgpr32:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX950-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_i8_vgpr64_sgpr32:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_i8_vgpr64_sgpr32:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_i8_vgpr64_sgpr32:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_i8_vgpr64_sgpr32:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_i8_vgpr64_sgpr32:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX1250-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT:    v_add_nc_u64_e32 v[0:1], v[0:1], v[2:3]
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_i8_vgpr64_sgpr32:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_i8_vgpr64_sgpr32:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_i8_vgpr64_sgpr32:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_i8_vgpr64_sgpr32:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_i8_vgpr64_sgpr32:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_i8_vgpr64_sgpr32:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-4-GENERIC-ISEL-NEXT:    s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_i8_vgpr64_sgpr32:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX942-ISEL-NEXT:    s_nop 1
+; GFX942-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_i8_vgpr64_sgpr32:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX950-ISEL-NEXT:    s_nop 1
+; GFX950-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_i8_vgpr64_sgpr32:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_i8_vgpr64_sgpr32:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_i8_vgpr64_sgpr32:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_i8_vgpr64_sgpr32:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_i8_vgpr64_sgpr32:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1250-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_i8_vgpr64_sgpr32:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %zext.offset = zext i32 %soffset to i64
+  %gep0 = getelementptr inbounds i8, ptr addrspace(1) %vbase, i64 %zext.offset
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep0, metadata !3)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+;; divergent 64-bit base, 32-bit scalar offset, with imm offset
+define <4 x float> @global_load_i8_vgpr64_sgpr32_offset_4095(ptr addrspace(1) %vbase, i32 %soffset) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_i8_vgpr64_sgpr32_offset_4095:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_i8_vgpr64_sgpr32_offset_4095:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_i8_vgpr64_sgpr32_offset_4095:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_i8_vgpr64_sgpr32_offset_4095:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_i8_vgpr64_sgpr32_offset_4095:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX9-4-GENERIC-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_i8_vgpr64_sgpr32_offset_4095:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX942-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_i8_vgpr64_sgpr32_offset_4095:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX950-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_i8_vgpr64_sgpr32_offset_4095:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2047
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_i8_vgpr64_sgpr32_offset_4095:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX1012-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2047
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_i8_vgpr64_sgpr32_offset_4095:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2047
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_i8_vgpr64_sgpr32_offset_4095:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:4095
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_i8_vgpr64_sgpr32_offset_4095:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX1250-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT:    v_add_nc_u64_e32 v[0:1], v[0:1], v[2:3]
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:4095
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_i8_vgpr64_sgpr32_offset_4095:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:4095
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_i8_vgpr64_sgpr32_offset_4095:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_i8_vgpr64_sgpr32_offset_4095:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_i8_vgpr64_sgpr32_offset_4095:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_i8_vgpr64_sgpr32_offset_4095:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_i8_vgpr64_sgpr32_offset_4095:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-4-GENERIC-ISEL-NEXT:    s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_i8_vgpr64_sgpr32_offset_4095:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX942-ISEL-NEXT:    s_nop 1
+; GFX942-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_i8_vgpr64_sgpr32_offset_4095:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX950-ISEL-NEXT:    s_nop 1
+; GFX950-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_i8_vgpr64_sgpr32_offset_4095:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfff, v0
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_i8_vgpr64_sgpr32_offset_4095:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfff, v0
+; GFX1012-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_i8_vgpr64_sgpr32_offset_4095:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfff, v0
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_i8_vgpr64_sgpr32_offset_4095:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:4095
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_i8_vgpr64_sgpr32_offset_4095:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1250-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:4095
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_i8_vgpr64_sgpr32_offset_4095:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:4095
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %zext.offset = zext i32 %soffset to i64
+  %gep0 = getelementptr inbounds i8, ptr addrspace(1) %vbase, i64 %zext.offset
+  %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 4095
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep1, metadata !0)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+;;------------------------------------------------------------------------------
+;; Natural addressing shifts with restricted range
+;;------------------------------------------------------------------------------
+
+;; Cannot push the shift into 32-bits, and cannot match.
+define <4 x float> @global_load_f32_natural_addressing(ptr addrspace(1) %sbase, ptr addrspace(1) %voffset.ptr) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_f32_natural_addressing:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dword v2, v[2:3], off
+; GFX9-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    v_lshlrev_b64 v[2:3], 2, v[2:3]
+; GFX9-GENERIC-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_f32_natural_addressing:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    global_load_dword v2, v[2:3], off
+; GFX906-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    v_lshlrev_b64 v[2:3], 2, v[2:3]
+; GFX906-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_f32_natural_addressing:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    global_load_dword v2, v[2:3], off
+; GFX908-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    v_lshlrev_b64 v[2:3], 2, v[2:3]
+; GFX908-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_f32_natural_addressing:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    global_load_dword v2, v[2:3], off
+; GFX90A-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    v_lshlrev_b64 v[2:3], 2, v[2:3]
+; GFX90A-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_f32_natural_addressing:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dword v2, v[2:3], off
+; GFX9-4-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[2:3], 2, v[0:1]
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_f32_natural_addressing:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    global_load_dword v2, v[2:3], off
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[2:3], 2, v[0:1]
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_f32_natural_addressing:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    global_load_dword v2, v[2:3], off
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[2:3], 2, v[0:1]
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_f32_natural_addressing:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dword v2, v[2:3], off
+; GFX10-1-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_lshlrev_b64 v[2:3], 2, v[2:3]
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_f32_natural_addressing:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    global_load_dword v2, v[2:3], off
+; GFX1012-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    v_lshlrev_b64 v[2:3], 2, v[2:3]
+; GFX1012-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_f32_natural_addressing:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dword v2, v[2:3], off
+; GFX10-3-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_lshlrev_b64 v[2:3], 2, v[2:3]
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_f32_natural_addressing:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b32 v2, v[2:3], off
+; GFX11-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT:    v_lshlrev_b64 v[2:3], 2, v[2:3]
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_f32_natural_addressing:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    global_load_b32 v2, v[2:3], off
+; GFX1250-SDAG-NEXT:    s_wait_xcnt 0x0
+; GFX1250-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[2:3], 2, v[0:1]
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_f32_natural_addressing:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b32 v2, v[2:3], off
+; GFX12-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX12-GENERIC-SDAG-NEXT:    v_lshlrev_b64_e32 v[2:3], 2, v[2:3]
+; GFX12-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX12-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SE
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_f32_natural_addressing:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dword v2, v[2:3], off
+; GFX9-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v3, 0
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    v_lshlrev_b64 v[2:3], 2, v[2:3]
+; GFX9-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_f32_natural_addressing:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    global_load_dword v2, v[2:3], off
+; GFX906-ISEL-NEXT:    v_mov_b32_e32 v3, 0
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    v_lshlrev_b64 v[2:3], 2, v[2:3]
+; GFX906-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_f32_natural_addressing:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    global_load_dword v2, v[2:3], off
+; GFX908-ISEL-NEXT:    v_mov_b32_e32 v3, 0
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    v_lshlrev_b64 v[2:3], 2, v[2:3]
+; GFX908-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_f32_natural_addressing:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    global_load_dword v2, v[2:3], off
+; GFX90A-ISEL-NEXT:    v_mov_b32_e32 v3, 0
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    v_lshlrev_b64 v[2:3], 2, v[2:3]
+; GFX90A-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_f32_natural_addressing:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dword v2, v[2:3], off
+; GFX9-4-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v3, 0
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    v_lshl_add_u64 v[0:1], v[2:3], 2, v[0:1]
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_f32_natural_addressing:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    global_load_dword v2, v[2:3], off
+; GFX942-ISEL-NEXT:    v_mov_b32_e32 v3, 0
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    v_lshl_add_u64 v[0:1], v[2:3], 2, v[0:1]
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_f32_natural_addressing:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    global_load_dword v2, v[2:3], off
+; GFX950-ISEL-NEXT:    v_mov_b32_e32 v3, 0
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    v_lshl_add_u64 v[0:1], v[2:3], 2, v[0:1]
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_f32_natural_addressing:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dword v2, v[2:3], off
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v3, 0
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    v_lshlrev_b64 v[2:3], 2, v[2:3]
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_f32_natural_addressing:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    global_load_dword v2, v[2:3], off
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v3, 0
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    v_lshlrev_b64 v[2:3], 2, v[2:3]
+; GFX1012-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_f32_natural_addressing:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dword v2, v[2:3], off
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v3, 0
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    v_lshlrev_b64 v[2:3], 2, v[2:3]
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_f32_natural_addressing:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b32 v2, v[2:3], off
+; GFX11-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v3, 0
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT:    v_lshlrev_b64 v[2:3], 2, v[2:3]
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_f32_natural_addressing:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    global_load_b32 v2, v[2:3], off
+; GFX1250-ISEL-NEXT:    s_wait_xcnt 0x0
+; GFX1250-ISEL-NEXT:    v_mov_b32_e32 v3, 0
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-ISEL-NEXT:    v_lshl_add_u64 v[0:1], v[2:3], 2, v[0:1]
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_f32_natural_addressing:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b32 v2, v[2:3], off
+; GFX12-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v3, 0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX12-GENERIC-ISEL-NEXT:    v_lshlrev_b64_e32 v[2:3], 2, v[2:3]
+; GFX12-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX12-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SE
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %voffset = load i32, ptr addrspace(1) %voffset.ptr
+  %zext.offset = zext i32 %voffset to i64
+  %gep = getelementptr inbounds float, ptr addrspace(1) %sbase, i64 %zext.offset
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep, metadata !1)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+;; Cannot push the shift into 32-bits, with an immediate offset.
+define <4 x float> @global_load_f32_natural_addressing_immoffset(ptr addrspace(1) %sbase, ptr addrspace(1) %voffset.ptr) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_f32_natural_addressing_immoffset:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dword v2, v[2:3], off
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_f32_natural_addressing_immoffset:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    global_load_dword v2, v[2:3], off
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_f32_natural_addressing_immoffset:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    global_load_dword v2, v[2:3], off
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_f32_natural_addressing_immoffset:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    global_load_dword v2, v[2:3], off
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_f32_natural_addressing_immoffset:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dword v2, v[2:3], off
+; GFX9-4-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:128 sc1
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_f32_natural_addressing_immoffset:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    global_load_dword v2, v[2:3], off
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:128 sc1
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_f32_natural_addressing_immoffset:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    global_load_dword v2, v[2:3], off
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:128 sc1
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_f32_natural_addressing_immoffset:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dword v2, v[2:3], off
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_f32_natural_addressing_immoffset:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    global_load_dword v2, v[2:3], off
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc dlc
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_f32_natural_addressing_immoffset:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dword v2, v[2:3], off
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_f32_natural_addressing_immoffset:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b32 v2, v[2:3], off
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:128 glc
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_f32_natural_addressing_immoffset:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    global_load_b32 v2, v[2:3], off
+; GFX1250-SDAG-NEXT:    s_wait_xcnt 0x0
+; GFX1250-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT:    v_add_nc_u64_e32 v[0:1], v[0:1], v[2:3]
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:128 scope:SCOPE_DEV
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_f32_natural_addressing_immoffset:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b32 v2, v[2:3], off
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:128 scope:SCOPE_DEV
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_f32_natural_addressing_immoffset:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dword v2, v[2:3], off
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_f32_natural_addressing_immoffset:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    global_load_dword v2, v[2:3], off
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_f32_natural_addressing_immoffset:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    global_load_dword v2, v[2:3], off
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_f32_natural_addressing_immoffset:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    global_load_dword v2, v[2:3], off
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_f32_natural_addressing_immoffset:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dword v2, v[2:3], off
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-4-GENERIC-ISEL-NEXT:    s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:128 sc1
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_f32_natural_addressing_immoffset:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    global_load_dword v2, v[2:3], off
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX942-ISEL-NEXT:    s_nop 1
+; GFX942-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:128 sc1
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_f32_natural_addressing_immoffset:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    global_load_dword v2, v[2:3], off
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX950-ISEL-NEXT:    s_nop 1
+; GFX950-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:128 sc1
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_f32_natural_addressing_immoffset:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dword v2, v[2:3], off
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_f32_natural_addressing_immoffset:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    global_load_dword v2, v[2:3], off
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc dlc
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_f32_natural_addressing_immoffset:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dword v2, v[2:3], off
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_f32_natural_addressing_immoffset:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b32 v2, v[2:3], off
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:128 glc
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_f32_natural_addressing_immoffset:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    global_load_b32 v2, v[2:3], off
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1250-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:128 scope:SCOPE_DEV
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_f32_natural_addressing_immoffset:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b32 v2, v[2:3], off
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:128 scope:SCOPE_DEV
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %voffset = load i32, ptr addrspace(1) %voffset.ptr
+  %zext.offset = zext i32 %voffset to i64
+  %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
+  %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 128
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep1, metadata !2)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+;; Range is sufficiently restricted to push the shift into 32-bits.
+define <4 x float> @global_load_f32_zext_vgpr_range(ptr addrspace(1) %sbase, ptr addrspace(1) %voffset.ptr) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_f32_zext_vgpr_range:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dword v2, v[2:3], off
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 2, v2
+; GFX9-GENERIC-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_f32_zext_vgpr_range:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    global_load_dword v2, v[2:3], off
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 2, v2
+; GFX906-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_f32_zext_vgpr_range:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    global_load_dword v2, v[2:3], off
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 2, v2
+; GFX908-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_f32_zext_vgpr_range:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    global_load_dword v2, v[2:3], off
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 2, v2
+; GFX90A-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_f32_zext_vgpr_range:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dword v2, v[2:3], off
+; GFX9-4-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 2, v2
+; GFX9-4-GENERIC-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_f32_zext_vgpr_range:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    global_load_dword v2, v[2:3], off
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 2, v2
+; GFX942-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_f32_zext_vgpr_range:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    global_load_dword v2, v[2:3], off
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 2, v2
+; GFX950-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_f32_zext_vgpr_range:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dword v2, v[2:3], off
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 2, v2
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_f32_zext_vgpr_range:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    global_load_dword v2, v[2:3], off
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 2, v2
+; GFX1012-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_f32_zext_vgpr_range:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dword v2, v[2:3], off
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 2, v2
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_f32_zext_vgpr_range:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b32 v2, v[2:3], off
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 2, v2
+; GFX11-GENERIC-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_f32_zext_vgpr_range:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    global_load_b32 v2, v[2:3], off
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_xcnt 0x0
+; GFX1250-SDAG-NEXT:    v_dual_mov_b32 v3, 0 :: v_dual_lshlrev_b32 v2, 2, v2
+; GFX1250-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT:    v_add_nc_u64_e32 v[0:1], v[0:1], v[2:3]
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_f32_zext_vgpr_range:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b32 v2, v[2:3], off
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 2, v2
+; GFX12-GENERIC-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX12-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_f32_zext_vgpr_range:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dword v2, v[2:3], off
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    v_lshlrev_b32_e32 v2, 2, v2
+; GFX9-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_f32_zext_vgpr_range:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    global_load_dword v2, v[2:3], off
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    v_lshlrev_b32_e32 v2, 2, v2
+; GFX906-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_f32_zext_vgpr_range:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    global_load_dword v2, v[2:3], off
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    v_lshlrev_b32_e32 v2, 2, v2
+; GFX908-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_f32_zext_vgpr_range:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    global_load_dword v2, v[2:3], off
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    v_lshlrev_b32_e32 v2, 2, v2
+; GFX90A-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_f32_zext_vgpr_range:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dword v2, v[2:3], off
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    v_lshlrev_b32_e32 v2, 2, v2
+; GFX9-4-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-4-GENERIC-ISEL-NEXT:    s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_f32_zext_vgpr_range:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    global_load_dword v2, v[2:3], off
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    v_lshlrev_b32_e32 v2, 2, v2
+; GFX942-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX942-ISEL-NEXT:    s_nop 1
+; GFX942-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_f32_zext_vgpr_range:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    global_load_dword v2, v[2:3], off
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    v_lshlrev_b32_e32 v2, 2, v2
+; GFX950-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX950-ISEL-NEXT:    s_nop 1
+; GFX950-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_f32_zext_vgpr_range:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dword v2, v[2:3], off
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    v_lshlrev_b32_e32 v2, 2, v2
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_f32_zext_vgpr_range:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    global_load_dword v2, v[2:3], off
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    v_lshlrev_b32_e32 v2, 2, v2
+; GFX1012-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_f32_zext_vgpr_range:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dword v2, v[2:3], off
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    v_lshlrev_b32_e32 v2, 2, v2
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_f32_zext_vgpr_range:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b32 v2, v[2:3], off
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    v_lshlrev_b32_e32 v2, 2, v2
+; GFX11-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_f32_zext_vgpr_range:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    global_load_b32 v2, v[2:3], off
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_xcnt 0x0
+; GFX1250-ISEL-NEXT:    v_lshlrev_b32_e32 v2, 2, v2
+; GFX1250-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1250-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_f32_zext_vgpr_range:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b32 v2, v[2:3], off
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    v_lshlrev_b32_e32 v2, 2, v2
+; GFX12-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX12-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %voffset = load i32, ptr addrspace(1) %voffset.ptr, !range !4, !noundef !{}
+  %zext.offset = zext i32 %voffset to i64
+  %gep = getelementptr inbounds float, ptr addrspace(1) %sbase, i64 %zext.offset
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep, metadata !3)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+;; Range is sufficiently restricted to push the shift into 32-bits, with an imm offset
+define <4 x float> @global_load_f32_zext_vgpr_range_imm_offset(ptr addrspace(1) %sbase, ptr addrspace(1) %voffset.ptr) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_f32_zext_vgpr_range_imm_offset:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dword v2, v[2:3], off
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 2, v2
+; GFX9-GENERIC-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:400
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_f32_zext_vgpr_range_imm_offset:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    global_load_dword v2, v[2:3], off
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 2, v2
+; GFX906-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:400
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_f32_zext_vgpr_range_imm_offset:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    global_load_dword v2, v[2:3], off
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 2, v2
+; GFX908-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:400
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_f32_zext_vgpr_range_imm_offset:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    global_load_dword v2, v[2:3], off
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 2, v2
+; GFX90A-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:400
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_f32_zext_vgpr_range_imm_offset:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dword v2, v[2:3], off
+; GFX9-4-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 2, v2
+; GFX9-4-GENERIC-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:400
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_f32_zext_vgpr_range_imm_offset:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    global_load_dword v2, v[2:3], off
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 2, v2
+; GFX942-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:400
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_f32_zext_vgpr_range_imm_offset:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    global_load_dword v2, v[2:3], off
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 2, v2
+; GFX950-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:400
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_f32_zext_vgpr_range_imm_offset:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dword v2, v[2:3], off
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 2, v2
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:400
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_f32_zext_vgpr_range_imm_offset:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    global_load_dword v2, v[2:3], off
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 2, v2
+; GFX1012-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:400
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_f32_zext_vgpr_range_imm_offset:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dword v2, v[2:3], off
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 2, v2
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:400
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_f32_zext_vgpr_range_imm_offset:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b32 v2, v[2:3], off
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 2, v2
+; GFX11-GENERIC-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:400
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_f32_zext_vgpr_range_imm_offset:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    global_load_b32 v2, v[2:3], off
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_xcnt 0x0
+; GFX1250-SDAG-NEXT:    v_dual_mov_b32 v3, 0 :: v_dual_lshlrev_b32 v2, 2, v2
+; GFX1250-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT:    v_add_nc_u64_e32 v[0:1], v[0:1], v[2:3]
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:400
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_f32_zext_vgpr_range_imm_offset:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b32 v2, v[2:3], off
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 2, v2
+; GFX12-GENERIC-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX12-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:400
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_f32_zext_vgpr_range_imm_offset:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dword v2, v[2:3], off
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    v_lshlrev_b32_e32 v2, 2, v2
+; GFX9-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:400
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_f32_zext_vgpr_range_imm_offset:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    global_load_dword v2, v[2:3], off
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    v_lshlrev_b32_e32 v2, 2, v2
+; GFX906-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:400
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_f32_zext_vgpr_range_imm_offset:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    global_load_dword v2, v[2:3], off
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    v_lshlrev_b32_e32 v2, 2, v2
+; GFX908-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:400
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_f32_zext_vgpr_range_imm_offset:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    global_load_dword v2, v[2:3], off
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    v_lshlrev_b32_e32 v2, 2, v2
+; GFX90A-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:400
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_f32_zext_vgpr_range_imm_offset:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dword v2, v[2:3], off
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    v_lshlrev_b32_e32 v2, 2, v2
+; GFX9-4-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-4-GENERIC-ISEL-NEXT:    s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:400
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_f32_zext_vgpr_range_imm_offset:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    global_load_dword v2, v[2:3], off
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    v_lshlrev_b32_e32 v2, 2, v2
+; GFX942-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX942-ISEL-NEXT:    s_nop 1
+; GFX942-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:400
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_f32_zext_vgpr_range_imm_offset:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    global_load_dword v2, v[2:3], off
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    v_lshlrev_b32_e32 v2, 2, v2
+; GFX950-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX950-ISEL-NEXT:    s_nop 1
+; GFX950-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:400
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_f32_zext_vgpr_range_imm_offset:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dword v2, v[2:3], off
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    v_lshlrev_b32_e32 v2, 2, v2
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:400
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_f32_zext_vgpr_range_imm_offset:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    global_load_dword v2, v[2:3], off
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    v_lshlrev_b32_e32 v2, 2, v2
+; GFX1012-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:400
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_f32_zext_vgpr_range_imm_offset:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dword v2, v[2:3], off
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    v_lshlrev_b32_e32 v2, 2, v2
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:400
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_f32_zext_vgpr_range_imm_offset:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b32 v2, v[2:3], off
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    v_lshlrev_b32_e32 v2, 2, v2
+; GFX11-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:400
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_f32_zext_vgpr_range_imm_offset:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    global_load_b32 v2, v[2:3], off
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_xcnt 0x0
+; GFX1250-ISEL-NEXT:    v_lshlrev_b32_e32 v2, 2, v2
+; GFX1250-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1250-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:400
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_f32_zext_vgpr_range_imm_offset:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b32 v2, v[2:3], off
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    v_lshlrev_b32_e32 v2, 2, v2
+; GFX12-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX12-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:400
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %voffset = load i32, ptr addrspace(1) %voffset.ptr, !range !4, !noundef !{}
+  %zext.offset = zext i32 %voffset to i64
+  %gep0 = getelementptr inbounds float, ptr addrspace(1) %sbase, i64 %zext.offset
+  %gep1 = getelementptr inbounds float, ptr addrspace(1) %gep0, i64 100
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep1, metadata !0)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+;; Range is 1 beyond the limit where we can move the shift into 32-bits.
+define <4 x float> @global_load_f32_zext_vgpr_range_too_large(ptr addrspace(1) %sbase, ptr addrspace(1) %voffset.ptr) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_f32_zext_vgpr_range_too_large:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dword v2, v[2:3], off
+; GFX9-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    v_lshlrev_b64 v[2:3], 2, v[2:3]
+; GFX9-GENERIC-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_f32_zext_vgpr_range_too_large:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    global_load_dword v2, v[2:3], off
+; GFX906-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    v_lshlrev_b64 v[2:3], 2, v[2:3]
+; GFX906-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_f32_zext_vgpr_range_too_large:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    global_load_dword v2, v[2:3], off
+; GFX908-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    v_lshlrev_b64 v[2:3], 2, v[2:3]
+; GFX908-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_f32_zext_vgpr_range_too_large:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    global_load_dword v2, v[2:3], off
+; GFX90A-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    v_lshlrev_b64 v[2:3], 2, v[2:3]
+; GFX90A-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_f32_zext_vgpr_range_too_large:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dword v2, v[2:3], off
+; GFX9-4-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[2:3], 2, v[0:1]
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_f32_zext_vgpr_range_too_large:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    global_load_dword v2, v[2:3], off
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[2:3], 2, v[0:1]
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_f32_zext_vgpr_range_too_large:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    global_load_dword v2, v[2:3], off
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[2:3], 2, v[0:1]
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_f32_zext_vgpr_range_too_large:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dword v2, v[2:3], off
+; GFX10-1-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_lshlrev_b64 v[2:3], 2, v[2:3]
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_f32_zext_vgpr_range_too_large:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    global_load_dword v2, v[2:3], off
+; GFX1012-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    v_lshlrev_b64 v[2:3], 2, v[2:3]
+; GFX1012-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_f32_zext_vgpr_range_too_large:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dword v2, v[2:3], off
+; GFX10-3-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_lshlrev_b64 v[2:3], 2, v[2:3]
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_f32_zext_vgpr_range_too_large:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b32 v2, v[2:3], off
+; GFX11-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT:    v_lshlrev_b64 v[2:3], 2, v[2:3]
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_f32_zext_vgpr_range_too_large:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    global_load_b32 v2, v[2:3], off
+; GFX1250-SDAG-NEXT:    s_wait_xcnt 0x0
+; GFX1250-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[2:3], 2, v[0:1]
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_f32_zext_vgpr_range_too_large:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b32 v2, v[2:3], off
+; GFX12-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX12-GENERIC-SDAG-NEXT:    v_lshlrev_b64_e32 v[2:3], 2, v[2:3]
+; GFX12-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX12-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SE
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_f32_zext_vgpr_range_too_large:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dword v2, v[2:3], off
+; GFX9-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v3, 0
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    v_lshlrev_b64 v[2:3], 2, v[2:3]
+; GFX9-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_f32_zext_vgpr_range_too_large:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    global_load_dword v2, v[2:3], off
+; GFX906-ISEL-NEXT:    v_mov_b32_e32 v3, 0
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    v_lshlrev_b64 v[2:3], 2, v[2:3]
+; GFX906-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_f32_zext_vgpr_range_too_large:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    global_load_dword v2, v[2:3], off
+; GFX908-ISEL-NEXT:    v_mov_b32_e32 v3, 0
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    v_lshlrev_b64 v[2:3], 2, v[2:3]
+; GFX908-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_f32_zext_vgpr_range_too_large:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    global_load_dword v2, v[2:3], off
+; GFX90A-ISEL-NEXT:    v_mov_b32_e32 v3, 0
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    v_lshlrev_b64 v[2:3], 2, v[2:3]
+; GFX90A-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_f32_zext_vgpr_range_too_large:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dword v2, v[2:3], off
+; GFX9-4-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v3, 0
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    v_lshl_add_u64 v[0:1], v[2:3], 2, v[0:1]
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_f32_zext_vgpr_range_too_large:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    global_load_dword v2, v[2:3], off
+; GFX942-ISEL-NEXT:    v_mov_b32_e32 v3, 0
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    v_lshl_add_u64 v[0:1], v[2:3], 2, v[0:1]
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_f32_zext_vgpr_range_too_large:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    global_load_dword v2, v[2:3], off
+; GFX950-ISEL-NEXT:    v_mov_b32_e32 v3, 0
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    v_lshl_add_u64 v[0:1], v[2:3], 2, v[0:1]
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_f32_zext_vgpr_range_too_large:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dword v2, v[2:3], off
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v3, 0
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    v_lshlrev_b64 v[2:3], 2, v[2:3]
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_f32_zext_vgpr_range_too_large:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    global_load_dword v2, v[2:3], off
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v3, 0
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    v_lshlrev_b64 v[2:3], 2, v[2:3]
+; GFX1012-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_f32_zext_vgpr_range_too_large:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dword v2, v[2:3], off
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v3, 0
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    v_lshlrev_b64 v[2:3], 2, v[2:3]
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_f32_zext_vgpr_range_too_large:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b32 v2, v[2:3], off
+; GFX11-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v3, 0
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT:    v_lshlrev_b64 v[2:3], 2, v[2:3]
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_f32_zext_vgpr_range_too_large:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    global_load_b32 v2, v[2:3], off
+; GFX1250-ISEL-NEXT:    s_wait_xcnt 0x0
+; GFX1250-ISEL-NEXT:    v_mov_b32_e32 v3, 0
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-ISEL-NEXT:    v_lshl_add_u64 v[0:1], v[2:3], 2, v[0:1]
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_f32_zext_vgpr_range_too_large:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b32 v2, v[2:3], off
+; GFX12-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v3, 0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX12-GENERIC-ISEL-NEXT:    v_lshlrev_b64_e32 v[2:3], 2, v[2:3]
+; GFX12-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX12-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SE
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %voffset = load i32, ptr addrspace(1) %voffset.ptr, !range !5, !noundef !{}
+  %zext.offset = zext i32 %voffset to i64
+  %gep = getelementptr inbounds float, ptr addrspace(1) %sbase, i64 %zext.offset
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep, metadata !1)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+;;------------------------------------------------------------------------------
+;; or-with-constant as add
+;;------------------------------------------------------------------------------
+
+;; Check add-as-or with split 64-bit or.
+define <4 x float> @global_load_i8_offset_or_i64_imm_offset_16(ptr addrspace(6) %sbase, i32 %idx) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_i8_offset_or_i64_imm_offset_16:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    v_or_b32_e32 v0, 16, v1
+; GFX9-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v1, 0
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_i8_offset_or_i64_imm_offset_16:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    v_or_b32_e32 v0, 16, v1
+; GFX906-SDAG-NEXT:    v_mov_b32_e32 v1, 0
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_i8_offset_or_i64_imm_offset_16:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    v_or_b32_e32 v0, 16, v1
+; GFX908-SDAG-NEXT:    v_mov_b32_e32 v1, 0
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_i8_offset_or_i64_imm_offset_16:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    v_or_b32_e32 v0, 16, v1
+; GFX90A-SDAG-NEXT:    v_mov_b32_e32 v1, 0
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_i8_offset_or_i64_imm_offset_16:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    v_or_b32_e32 v0, 16, v1
+; GFX9-4-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v1, 0
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc1
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_i8_offset_or_i64_imm_offset_16:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    v_or_b32_e32 v0, 16, v1
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v1, 0
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc1
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_i8_offset_or_i64_imm_offset_16:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    v_or_b32_e32 v0, 16, v1
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v1, 0
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc1
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_i8_offset_or_i64_imm_offset_16:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v2, 0
+; GFX10-1-GENERIC-SDAG-NEXT:    v_or_b32_e32 v1, 16, v1
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[1:2], off glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_i8_offset_or_i64_imm_offset_16:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    v_mov_b32_e32 v2, 0
+; GFX1012-SDAG-NEXT:    v_or_b32_e32 v1, 16, v1
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[1:2], off glc dlc
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_i8_offset_or_i64_imm_offset_16:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v2, 0
+; GFX10-3-GENERIC-SDAG-NEXT:    v_or_b32_e32 v1, 16, v1
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[1:2], off glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_i8_offset_or_i64_imm_offset_16:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v2, 0
+; GFX11-GENERIC-SDAG-NEXT:    v_or_b32_e32 v1, 16, v1
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[1:2], off glc
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_i8_offset_or_i64_imm_offset_16:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    v_dual_mov_b32 v3, 0 :: v_dual_bitop2_b32 v2, 16, v1 bitop3:0x54
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v[2:3], off scope:SCOPE_DEV
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_i8_offset_or_i64_imm_offset_16:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v2, 0
+; GFX12-GENERIC-SDAG-NEXT:    v_or_b32_e32 v1, 16, v1
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[1:2], off scope:SCOPE_DEV
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_i8_offset_or_i64_imm_offset_16:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    v_or_b32_e32 v0, 16, v1
+; GFX9-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_i8_offset_or_i64_imm_offset_16:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    v_or_b32_e32 v0, 16, v1
+; GFX906-ISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_i8_offset_or_i64_imm_offset_16:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    v_or_b32_e32 v0, 16, v1
+; GFX908-ISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_i8_offset_or_i64_imm_offset_16:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    v_or_b32_e32 v0, 16, v1
+; GFX90A-ISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_i8_offset_or_i64_imm_offset_16:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    v_or_b32_e32 v0, 16, v1
+; GFX9-4-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc1
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_i8_offset_or_i64_imm_offset_16:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    v_or_b32_e32 v0, 16, v1
+; GFX942-ISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc1
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_i8_offset_or_i64_imm_offset_16:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    v_or_b32_e32 v0, 16, v1
+; GFX950-ISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc1
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_i8_offset_or_i64_imm_offset_16:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v2, 0
+; GFX10-1-GENERIC-ISEL-NEXT:    v_or_b32_e32 v1, 16, v1
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[1:2], off glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_i8_offset_or_i64_imm_offset_16:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v2, 0
+; GFX1012-ISEL-NEXT:    v_or_b32_e32 v1, 16, v1
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[1:2], off glc dlc
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_i8_offset_or_i64_imm_offset_16:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v2, 0
+; GFX10-3-GENERIC-ISEL-NEXT:    v_or_b32_e32 v1, 16, v1
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[1:2], off glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_i8_offset_or_i64_imm_offset_16:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v2, 0
+; GFX11-GENERIC-ISEL-NEXT:    v_or_b32_e32 v1, 16, v1
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[1:2], off glc
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_i8_offset_or_i64_imm_offset_16:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    v_dual_mov_b32 v3, 0 :: v_dual_bitop2_b32 v2, 16, v1 bitop3:0x54
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v[2:3], off scope:SCOPE_DEV
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_i8_offset_or_i64_imm_offset_16:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v2, 0
+; GFX12-GENERIC-ISEL-NEXT:    v_or_b32_e32 v1, 16, v1
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[1:2], off scope:SCOPE_DEV
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %zext.idx = zext i32 %idx to i64
+  %or = or i64 %zext.idx, 16
+  %addr = inttoptr i64 %or to ptr addrspace(1)
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %addr, metadata !2)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+define <4 x float> @global_load_i8_offset_or_i64_imm_offset_4160(ptr addrspace(6) %sbase, i32 %idx) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_i8_offset_or_i64_imm_offset_4160:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    v_or_b32_e32 v0, 0x1040, v1
+; GFX9-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v1, 0
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_i8_offset_or_i64_imm_offset_4160:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    v_or_b32_e32 v0, 0x1040, v1
+; GFX906-SDAG-NEXT:    v_mov_b32_e32 v1, 0
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_i8_offset_or_i64_imm_offset_4160:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    v_or_b32_e32 v0, 0x1040, v1
+; GFX908-SDAG-NEXT:    v_mov_b32_e32 v1, 0
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_i8_offset_or_i64_imm_offset_4160:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    v_or_b32_e32 v0, 0x1040, v1
+; GFX90A-SDAG-NEXT:    v_mov_b32_e32 v1, 0
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_i8_offset_or_i64_imm_offset_4160:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    v_or_b32_e32 v0, 0x1040, v1
+; GFX9-4-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v1, 0
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_i8_offset_or_i64_imm_offset_4160:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    v_or_b32_e32 v0, 0x1040, v1
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v1, 0
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_i8_offset_or_i64_imm_offset_4160:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    v_or_b32_e32 v0, 0x1040, v1
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v1, 0
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_i8_offset_or_i64_imm_offset_4160:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v2, 0
+; GFX10-1-GENERIC-SDAG-NEXT:    v_or_b32_e32 v1, 0x1040, v1
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[1:2], off glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_i8_offset_or_i64_imm_offset_4160:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    v_mov_b32_e32 v2, 0
+; GFX1012-SDAG-NEXT:    v_or_b32_e32 v1, 0x1040, v1
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[1:2], off glc dlc
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_i8_offset_or_i64_imm_offset_4160:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v2, 0
+; GFX10-3-GENERIC-SDAG-NEXT:    v_or_b32_e32 v1, 0x1040, v1
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[1:2], off glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_i8_offset_or_i64_imm_offset_4160:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v2, 0
+; GFX11-GENERIC-SDAG-NEXT:    v_or_b32_e32 v1, 0x1040, v1
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[1:2], off glc
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_i8_offset_or_i64_imm_offset_4160:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX1250-SDAG-NEXT:    v_or_b32_e32 v2, 0x1040, v1
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v[2:3], off scope:SCOPE_SYS
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_i8_offset_or_i64_imm_offset_4160:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v2, 0
+; GFX12-GENERIC-SDAG-NEXT:    v_or_b32_e32 v1, 0x1040, v1
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[1:2], off scope:SCOPE_SYS
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_i8_offset_or_i64_imm_offset_4160:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    v_or_b32_e32 v0, 0x1040, v1
+; GFX9-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_i8_offset_or_i64_imm_offset_4160:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    v_or_b32_e32 v0, 0x1040, v1
+; GFX906-ISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_i8_offset_or_i64_imm_offset_4160:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    v_or_b32_e32 v0, 0x1040, v1
+; GFX908-ISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_i8_offset_or_i64_imm_offset_4160:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    v_or_b32_e32 v0, 0x1040, v1
+; GFX90A-ISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_i8_offset_or_i64_imm_offset_4160:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    v_or_b32_e32 v0, 0x1040, v1
+; GFX9-4-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_i8_offset_or_i64_imm_offset_4160:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    v_or_b32_e32 v0, 0x1040, v1
+; GFX942-ISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_i8_offset_or_i64_imm_offset_4160:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    v_or_b32_e32 v0, 0x1040, v1
+; GFX950-ISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_i8_offset_or_i64_imm_offset_4160:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v2, 0
+; GFX10-1-GENERIC-ISEL-NEXT:    v_or_b32_e32 v1, 0x1040, v1
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[1:2], off glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_i8_offset_or_i64_imm_offset_4160:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v2, 0
+; GFX1012-ISEL-NEXT:    v_or_b32_e32 v1, 0x1040, v1
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[1:2], off glc dlc
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_i8_offset_or_i64_imm_offset_4160:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v2, 0
+; GFX10-3-GENERIC-ISEL-NEXT:    v_or_b32_e32 v1, 0x1040, v1
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[1:2], off glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_i8_offset_or_i64_imm_offset_4160:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v2, 0
+; GFX11-GENERIC-ISEL-NEXT:    v_or_b32_e32 v1, 0x1040, v1
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[1:2], off glc
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_i8_offset_or_i64_imm_offset_4160:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    v_mov_b32_e32 v3, 0
+; GFX1250-ISEL-NEXT:    v_or_b32_e32 v2, 0x1040, v1
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v[2:3], off scope:SCOPE_SYS
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_i8_offset_or_i64_imm_offset_4160:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v2, 0
+; GFX12-GENERIC-ISEL-NEXT:    v_or_b32_e32 v1, 0x1040, v1
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[1:2], off scope:SCOPE_SYS
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %zext.idx = zext i32 %idx to i64
+  %or = or i64 %zext.idx, 4160
+  %addr = inttoptr i64 %or to ptr addrspace(1)
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %addr, metadata !3)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+;;------------------------------------------------------------------------------
+;; Full 64-bit scalar add.
+;;------------------------------------------------------------------------------
+define <4 x float> @global_addr_64bit_lsr_iv(ptr addrspace(1) %arg) {
+; GFX9-GENERIC-SDAG-LABEL: global_addr_64bit_lsr_iv:
+; GFX9-GENERIC-SDAG:       ; %bb.0: ; %bb
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_mov_b32 s4, -1
+; GFX9-GENERIC-SDAG-NEXT:  .LBB60_1: ; %bb3
+; GFX9-GENERIC-SDAG-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX9-GENERIC-SDAG-NEXT:    s_add_i32 s4, s4, 1
+; GFX9-GENERIC-SDAG-NEXT:    s_cmpk_eq_i32 s4, 0xff
+; GFX9-GENERIC-SDAG-NEXT:    s_cbranch_scc0 .LBB60_1
+; GFX9-GENERIC-SDAG-NEXT:  ; %bb.2: ; %bb2
+; GFX9-GENERIC-SDAG-NEXT:    s_mov_b32 s5, 0
+; GFX9-GENERIC-SDAG-NEXT:    s_lshl_b64 s[4:5], s[4:5], 2
+; GFX9-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v2, s5
+; GFX9-GENERIC-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, s4, v0
+; GFX9-GENERIC-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_addr_64bit_lsr_iv:
+; GFX906-SDAG:       ; %bb.0: ; %bb
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    s_mov_b32 s4, -1
+; GFX906-SDAG-NEXT:  .LBB60_1: ; %bb3
+; GFX906-SDAG-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX906-SDAG-NEXT:    s_add_i32 s4, s4, 1
+; GFX906-SDAG-NEXT:    s_cmpk_eq_i32 s4, 0xff
+; GFX906-SDAG-NEXT:    s_cbranch_scc0 .LBB60_1
+; GFX906-SDAG-NEXT:  ; %bb.2: ; %bb2
+; GFX906-SDAG-NEXT:    s_mov_b32 s5, 0
+; GFX906-SDAG-NEXT:    s_lshl_b64 s[4:5], s[4:5], 2
+; GFX906-SDAG-NEXT:    v_mov_b32_e32 v2, s5
+; GFX906-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, s4, v0
+; GFX906-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_addr_64bit_lsr_iv:
+; GFX908-SDAG:       ; %bb.0: ; %bb
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    s_mov_b32 s4, -1
+; GFX908-SDAG-NEXT:  .LBB60_1: ; %bb3
+; GFX908-SDAG-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX908-SDAG-NEXT:    s_add_i32 s4, s4, 1
+; GFX908-SDAG-NEXT:    s_cmpk_eq_i32 s4, 0xff
+; GFX908-SDAG-NEXT:    s_cbranch_scc0 .LBB60_1
+; GFX908-SDAG-NEXT:  ; %bb.2: ; %bb2
+; GFX908-SDAG-NEXT:    s_mov_b32 s5, 0
+; GFX908-SDAG-NEXT:    s_lshl_b64 s[4:5], s[4:5], 2
+; GFX908-SDAG-NEXT:    v_mov_b32_e32 v2, s5
+; GFX908-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, s4, v0
+; GFX908-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_addr_64bit_lsr_iv:
+; GFX90A-SDAG:       ; %bb.0: ; %bb
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    s_mov_b32 s4, -1
+; GFX90A-SDAG-NEXT:  .LBB60_1: ; %bb3
+; GFX90A-SDAG-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX90A-SDAG-NEXT:    s_add_i32 s4, s4, 1
+; GFX90A-SDAG-NEXT:    s_cmpk_eq_i32 s4, 0xff
+; GFX90A-SDAG-NEXT:    s_cbranch_scc0 .LBB60_1
+; GFX90A-SDAG-NEXT:  ; %bb.2: ; %bb2
+; GFX90A-SDAG-NEXT:    s_mov_b32 s5, 0
+; GFX90A-SDAG-NEXT:    s_lshl_b64 s[4:5], s[4:5], 2
+; GFX90A-SDAG-NEXT:    v_mov_b32_e32 v2, s5
+; GFX90A-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, s4, v0
+; GFX90A-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_addr_64bit_lsr_iv:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0: ; %bb
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_mov_b32 s0, -1
+; GFX9-4-GENERIC-SDAG-NEXT:  .LBB60_1: ; %bb3
+; GFX9-4-GENERIC-SDAG-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX9-4-GENERIC-SDAG-NEXT:    s_add_i32 s0, s0, 1
+; GFX9-4-GENERIC-SDAG-NEXT:    s_cmpk_eq_i32 s0, 0xff
+; GFX9-4-GENERIC-SDAG-NEXT:    s_cbranch_scc0 .LBB60_1
+; GFX9-4-GENERIC-SDAG-NEXT:  ; %bb.2: ; %bb2
+; GFX9-4-GENERIC-SDAG-NEXT:    s_mov_b32 s1, 0
+; GFX9-4-GENERIC-SDAG-NEXT:    v_lshl_add_u64 v[0:1], s[0:1], 2, v[0:1]
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_addr_64bit_lsr_iv:
+; GFX942-SDAG:       ; %bb.0: ; %bb
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    s_mov_b32 s0, -1
+; GFX942-SDAG-NEXT:  .LBB60_1: ; %bb3
+; GFX942-SDAG-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-SDAG-NEXT:    s_add_i32 s0, s0, 1
+; GFX942-SDAG-NEXT:    s_cmpk_eq_i32 s0, 0xff
+; GFX942-SDAG-NEXT:    s_cbranch_scc0 .LBB60_1
+; GFX942-SDAG-NEXT:  ; %bb.2: ; %bb2
+; GFX942-SDAG-NEXT:    s_mov_b32 s1, 0
+; GFX942-SDAG-NEXT:    v_lshl_add_u64 v[0:1], s[0:1], 2, v[0:1]
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_addr_64bit_lsr_iv:
+; GFX950-SDAG:       ; %bb.0: ; %bb
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    s_mov_b32 s0, -1
+; GFX950-SDAG-NEXT:  .LBB60_1: ; %bb3
+; GFX950-SDAG-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX950-SDAG-NEXT:    s_add_i32 s0, s0, 1
+; GFX950-SDAG-NEXT:    s_cmpk_eq_i32 s0, 0xff
+; GFX950-SDAG-NEXT:    s_cbranch_scc0 .LBB60_1
+; GFX950-SDAG-NEXT:  ; %bb.2: ; %bb2
+; GFX950-SDAG-NEXT:    s_mov_b32 s1, 0
+; GFX950-SDAG-NEXT:    v_lshl_add_u64 v[0:1], s[0:1], 2, v[0:1]
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_addr_64bit_lsr_iv:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0: ; %bb
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_mov_b32 s4, -1
+; GFX10-1-GENERIC-SDAG-NEXT:  .LBB60_1: ; %bb3
+; GFX10-1-GENERIC-SDAG-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX10-1-GENERIC-SDAG-NEXT:    s_add_i32 s4, s4, 1
+; GFX10-1-GENERIC-SDAG-NEXT:    s_cmpk_eq_i32 s4, 0xff
+; GFX10-1-GENERIC-SDAG-NEXT:    s_cbranch_scc0 .LBB60_1
+; GFX10-1-GENERIC-SDAG-NEXT:  ; %bb.2: ; %bb2
+; GFX10-1-GENERIC-SDAG-NEXT:    s_mov_b32 s5, 0
+; GFX10-1-GENERIC-SDAG-NEXT:    s_lshl_b64 s[4:5], s[4:5], 2
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, s4
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, s5, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_addr_64bit_lsr_iv:
+; GFX1012-SDAG:       ; %bb.0: ; %bb
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    s_mov_b32 s4, -1
+; GFX1012-SDAG-NEXT:  .LBB60_1: ; %bb3
+; GFX1012-SDAG-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX1012-SDAG-NEXT:    s_add_i32 s4, s4, 1
+; GFX1012-SDAG-NEXT:    s_cmpk_eq_i32 s4, 0xff
+; GFX1012-SDAG-NEXT:    s_cbranch_scc0 .LBB60_1
+; GFX1012-SDAG-NEXT:  ; %bb.2: ; %bb2
+; GFX1012-SDAG-NEXT:    s_mov_b32 s5, 0
+; GFX1012-SDAG-NEXT:    s_lshl_b64 s[4:5], s[4:5], 2
+; GFX1012-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, s4
+; GFX1012-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, s5, v1, vcc_lo
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_addr_64bit_lsr_iv:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0: ; %bb
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_mov_b32 s4, -1
+; GFX10-3-GENERIC-SDAG-NEXT:  .LBB60_1: ; %bb3
+; GFX10-3-GENERIC-SDAG-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX10-3-GENERIC-SDAG-NEXT:    s_add_i32 s4, s4, 1
+; GFX10-3-GENERIC-SDAG-NEXT:    s_cmpk_eq_i32 s4, 0xff
+; GFX10-3-GENERIC-SDAG-NEXT:    s_cbranch_scc0 .LBB60_1
+; GFX10-3-GENERIC-SDAG-NEXT:  ; %bb.2: ; %bb2
+; GFX10-3-GENERIC-SDAG-NEXT:    s_mov_b32 s5, 0
+; GFX10-3-GENERIC-SDAG-NEXT:    s_lshl_b64 s[4:5], s[4:5], 2
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, s4
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, s5, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_addr_64bit_lsr_iv:
+; GFX11-GENERIC-SDAG:       ; %bb.0: ; %bb
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_mov_b32 s0, -1
+; GFX11-GENERIC-SDAG-NEXT:  .LBB60_1: ; %bb3
+; GFX11-GENERIC-SDAG-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX11-GENERIC-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-GENERIC-SDAG-NEXT:    s_add_i32 s0, s0, 1
+; GFX11-GENERIC-SDAG-NEXT:    s_cmpk_eq_i32 s0, 0xff
+; GFX11-GENERIC-SDAG-NEXT:    s_cbranch_scc0 .LBB60_1
+; GFX11-GENERIC-SDAG-NEXT:  ; %bb.2: ; %bb2
+; GFX11-GENERIC-SDAG-NEXT:    s_mov_b32 s1, 0
+; GFX11-GENERIC-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-GENERIC-SDAG-NEXT:    s_lshl_b64 s[0:1], s[0:1], 2
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, s0
+; GFX11-GENERIC-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, s1, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_addr_64bit_lsr_iv:
+; GFX1250-SDAG:       ; %bb.0: ; %bb
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    s_mov_b32 s0, -1
+; GFX1250-SDAG-NEXT:  .LBB60_1: ; %bb3
+; GFX1250-SDAG-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX1250-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX1250-SDAG-NEXT:    s_add_co_i32 s0, s0, 1
+; GFX1250-SDAG-NEXT:    s_cmp_eq_u32 s0, 0xff
+; GFX1250-SDAG-NEXT:    s_cbranch_scc0 .LBB60_1
+; GFX1250-SDAG-NEXT:  ; %bb.2: ; %bb2
+; GFX1250-SDAG-NEXT:    s_mov_b32 s1, 0
+; GFX1250-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-SDAG-NEXT:    v_lshl_add_u64 v[0:1], s[0:1], 2, v[0:1]
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_addr_64bit_lsr_iv:
+; GFX12-GENERIC-SDAG:       ; %bb.0: ; %bb
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_mov_b32 s0, -1
+; GFX12-GENERIC-SDAG-NEXT:  .LBB60_1: ; %bb3
+; GFX12-GENERIC-SDAG-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_alu depctr_sa_sdst(0)
+; GFX12-GENERIC-SDAG-NEXT:    s_add_co_i32 s0, s0, 1
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_alu depctr_sa_sdst(0)
+; GFX12-GENERIC-SDAG-NEXT:    s_cmp_eq_u32 s0, 0xff
+; GFX12-GENERIC-SDAG-NEXT:    s_cbranch_scc0 .LBB60_1
+; GFX12-GENERIC-SDAG-NEXT:  ; %bb.2: ; %bb2
+; GFX12-GENERIC-SDAG-NEXT:    s_mov_b32 s1, 0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_alu depctr_sa_sdst(0)
+; GFX12-GENERIC-SDAG-NEXT:    s_lshl_b64 s[0:1], s[0:1], 2
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_alu depctr_sa_sdst(0)
+; GFX12-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, s0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, s1, v1, vcc_lo
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_addr_64bit_lsr_iv:
+; GFX9-GENERIC-ISEL:       ; %bb.0: ; %bb
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_mov_b32 s4, -1
+; GFX9-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v3, 0xff
+; GFX9-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v2, s4
+; GFX9-GENERIC-ISEL-NEXT:  .LBB60_1: ; %bb3
+; GFX9-GENERIC-ISEL-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX9-GENERIC-ISEL-NEXT:    v_add_u32_e32 v2, 1, v2
+; GFX9-GENERIC-ISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX9-GENERIC-ISEL-NEXT:    s_cbranch_vccz .LBB60_1
+; GFX9-GENERIC-ISEL-NEXT:  ; %bb.2: ; %bb2
+; GFX9-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v3, 0
+; GFX9-GENERIC-ISEL-NEXT:    v_lshlrev_b64 v[2:3], 2, v[2:3]
+; GFX9-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_addr_64bit_lsr_iv:
+; GFX906-ISEL:       ; %bb.0: ; %bb
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    s_mov_b32 s4, -1
+; GFX906-ISEL-NEXT:    v_mov_b32_e32 v3, 0xff
+; GFX906-ISEL-NEXT:    v_mov_b32_e32 v2, s4
+; GFX906-ISEL-NEXT:  .LBB60_1: ; %bb3
+; GFX906-ISEL-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX906-ISEL-NEXT:    v_add_u32_e32 v2, 1, v2
+; GFX906-ISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX906-ISEL-NEXT:    s_cbranch_vccz .LBB60_1
+; GFX906-ISEL-NEXT:  ; %bb.2: ; %bb2
+; GFX906-ISEL-NEXT:    v_mov_b32_e32 v3, 0
+; GFX906-ISEL-NEXT:    v_lshlrev_b64 v[2:3], 2, v[2:3]
+; GFX906-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_addr_64bit_lsr_iv:
+; GFX908-ISEL:       ; %bb.0: ; %bb
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    s_mov_b32 s4, -1
+; GFX908-ISEL-NEXT:    v_mov_b32_e32 v3, 0xff
+; GFX908-ISEL-NEXT:    v_mov_b32_e32 v2, s4
+; GFX908-ISEL-NEXT:  .LBB60_1: ; %bb3
+; GFX908-ISEL-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX908-ISEL-NEXT:    v_add_u32_e32 v2, 1, v2
+; GFX908-ISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX908-ISEL-NEXT:    s_cbranch_vccz .LBB60_1
+; GFX908-ISEL-NEXT:  ; %bb.2: ; %bb2
+; GFX908-ISEL-NEXT:    v_mov_b32_e32 v3, 0
+; GFX908-ISEL-NEXT:    v_lshlrev_b64 v[2:3], 2, v[2:3]
+; GFX908-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_addr_64bit_lsr_iv:
+; GFX90A-ISEL:       ; %bb.0: ; %bb
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    s_mov_b32 s4, -1
+; GFX90A-ISEL-NEXT:    v_mov_b32_e32 v3, 0xff
+; GFX90A-ISEL-NEXT:    v_mov_b32_e32 v2, s4
+; GFX90A-ISEL-NEXT:  .LBB60_1: ; %bb3
+; GFX90A-ISEL-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX90A-ISEL-NEXT:    v_add_u32_e32 v2, 1, v2
+; GFX90A-ISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX90A-ISEL-NEXT:    s_cbranch_vccz .LBB60_1
+; GFX90A-ISEL-NEXT:  ; %bb.2: ; %bb2
+; GFX90A-ISEL-NEXT:    v_mov_b32_e32 v3, 0
+; GFX90A-ISEL-NEXT:    v_lshlrev_b64 v[2:3], 2, v[2:3]
+; GFX90A-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_addr_64bit_lsr_iv:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0: ; %bb
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_mov_b32 s0, -1
+; GFX9-4-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v3, 0xff
+; GFX9-4-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v2, s0
+; GFX9-4-GENERIC-ISEL-NEXT:  .LBB60_1: ; %bb3
+; GFX9-4-GENERIC-ISEL-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX9-4-GENERIC-ISEL-NEXT:    v_add_u32_e32 v2, 1, v2
+; GFX9-4-GENERIC-ISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX9-4-GENERIC-ISEL-NEXT:    s_cbranch_vccz .LBB60_1
+; GFX9-4-GENERIC-ISEL-NEXT:  ; %bb.2: ; %bb2
+; GFX9-4-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v3, 0
+; GFX9-4-GENERIC-ISEL-NEXT:    v_lshl_add_u64 v[0:1], v[2:3], 2, v[0:1]
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_addr_64bit_lsr_iv:
+; GFX942-ISEL:       ; %bb.0: ; %bb
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    s_mov_b32 s0, -1
+; GFX942-ISEL-NEXT:    v_mov_b32_e32 v3, 0xff
+; GFX942-ISEL-NEXT:    v_mov_b32_e32 v2, s0
+; GFX942-ISEL-NEXT:  .LBB60_1: ; %bb3
+; GFX942-ISEL-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-ISEL-NEXT:    v_add_u32_e32 v2, 1, v2
+; GFX942-ISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX942-ISEL-NEXT:    s_cbranch_vccz .LBB60_1
+; GFX942-ISEL-NEXT:  ; %bb.2: ; %bb2
+; GFX942-ISEL-NEXT:    v_mov_b32_e32 v3, 0
+; GFX942-ISEL-NEXT:    v_lshl_add_u64 v[0:1], v[2:3], 2, v[0:1]
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_addr_64bit_lsr_iv:
+; GFX950-ISEL:       ; %bb.0: ; %bb
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    s_mov_b32 s0, -1
+; GFX950-ISEL-NEXT:    v_mov_b32_e32 v3, 0xff
+; GFX950-ISEL-NEXT:    v_mov_b32_e32 v2, s0
+; GFX950-ISEL-NEXT:  .LBB60_1: ; %bb3
+; GFX950-ISEL-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX950-ISEL-NEXT:    v_add_u32_e32 v2, 1, v2
+; GFX950-ISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX950-ISEL-NEXT:    s_cbranch_vccz .LBB60_1
+; GFX950-ISEL-NEXT:  ; %bb.2: ; %bb2
+; GFX950-ISEL-NEXT:    v_mov_b32_e32 v3, 0
+; GFX950-ISEL-NEXT:    v_lshl_add_u64 v[0:1], v[2:3], 2, v[0:1]
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_addr_64bit_lsr_iv:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0: ; %bb
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_mov_b32 s4, -1
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v2, s4
+; GFX10-1-GENERIC-ISEL-NEXT:  .LBB60_1: ; %bb3
+; GFX10-1-GENERIC-ISEL-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_nc_u32_e32 v2, 1, v2
+; GFX10-1-GENERIC-ISEL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0xff, v2
+; GFX10-1-GENERIC-ISEL-NEXT:    s_cbranch_vccz .LBB60_1
+; GFX10-1-GENERIC-ISEL-NEXT:  ; %bb.2: ; %bb2
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v3, 0
+; GFX10-1-GENERIC-ISEL-NEXT:    v_lshlrev_b64 v[2:3], 2, v[2:3]
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_addr_64bit_lsr_iv:
+; GFX1012-ISEL:       ; %bb.0: ; %bb
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    s_mov_b32 s4, -1
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v2, s4
+; GFX1012-ISEL-NEXT:  .LBB60_1: ; %bb3
+; GFX1012-ISEL-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX1012-ISEL-NEXT:    v_add_nc_u32_e32 v2, 1, v2
+; GFX1012-ISEL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0xff, v2
+; GFX1012-ISEL-NEXT:    s_cbranch_vccz .LBB60_1
+; GFX1012-ISEL-NEXT:  ; %bb.2: ; %bb2
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v3, 0
+; GFX1012-ISEL-NEXT:    v_lshlrev_b64 v[2:3], 2, v[2:3]
+; GFX1012-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_addr_64bit_lsr_iv:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0: ; %bb
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_mov_b32 s4, -1
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v2, s4
+; GFX10-3-GENERIC-ISEL-NEXT:  .LBB60_1: ; %bb3
+; GFX10-3-GENERIC-ISEL-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_nc_u32_e32 v2, 1, v2
+; GFX10-3-GENERIC-ISEL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0xff, v2
+; GFX10-3-GENERIC-ISEL-NEXT:    s_cbranch_vccz .LBB60_1
+; GFX10-3-GENERIC-ISEL-NEXT:  ; %bb.2: ; %bb2
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v3, 0
+; GFX10-3-GENERIC-ISEL-NEXT:    v_lshlrev_b64 v[2:3], 2, v[2:3]
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_addr_64bit_lsr_iv:
+; GFX11-GENERIC-ISEL:       ; %bb.0: ; %bb
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_mov_b32 s0, -1
+; GFX11-GENERIC-ISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v2, s0
+; GFX11-GENERIC-ISEL-NEXT:  .LBB60_1: ; %bb3
+; GFX11-GENERIC-ISEL-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX11-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_nc_u32_e32 v2, 1, v2
+; GFX11-GENERIC-ISEL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0xff, v2
+; GFX11-GENERIC-ISEL-NEXT:    s_cbranch_vccz .LBB60_1
+; GFX11-GENERIC-ISEL-NEXT:  ; %bb.2: ; %bb2
+; GFX11-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v3, 0
+; GFX11-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT:    v_lshlrev_b64 v[2:3], 2, v[2:3]
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_addr_64bit_lsr_iv:
+; GFX1250-ISEL:       ; %bb.0: ; %bb
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    s_mov_b32 s0, -1
+; GFX1250-ISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-ISEL-NEXT:    v_mov_b32_e32 v2, s0
+; GFX1250-ISEL-NEXT:  .LBB60_1: ; %bb3
+; GFX1250-ISEL-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX1250-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250-ISEL-NEXT:    v_add_nc_u32_e32 v2, 1, v2
+; GFX1250-ISEL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0xff, v2
+; GFX1250-ISEL-NEXT:    s_cbranch_vccz .LBB60_1
+; GFX1250-ISEL-NEXT:  ; %bb.2: ; %bb2
+; GFX1250-ISEL-NEXT:    v_mov_b32_e32 v3, 0
+; GFX1250-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-ISEL-NEXT:    v_lshl_add_u64 v[0:1], v[2:3], 2, v[0:1]
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_addr_64bit_lsr_iv:
+; GFX12-GENERIC-ISEL:       ; %bb.0: ; %bb
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_mov_b32 s0, -1
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_alu depctr_sa_sdst(0)
+; GFX12-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v2, s0
+; GFX12-GENERIC-ISEL-NEXT:  .LBB60_1: ; %bb3
+; GFX12-GENERIC-ISEL-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX12-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX12-GENERIC-ISEL-NEXT:    v_add_nc_u32_e32 v2, 1, v2
+; GFX12-GENERIC-ISEL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0xff, v2
+; GFX12-GENERIC-ISEL-NEXT:    s_cbranch_vccz .LBB60_1
+; GFX12-GENERIC-ISEL-NEXT:  ; %bb.2: ; %bb2
+; GFX12-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v3, 0
+; GFX12-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX12-GENERIC-ISEL-NEXT:    v_lshlrev_b64_e32 v[2:3], 2, v[2:3]
+; GFX12-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX12-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+bb:
+  br label %bb3
+
+bb2:                                              ; preds = %bb3
+  ret <4 x float> %i6
+
+bb3:                                              ; preds = %bb3, %bb
+  %i = phi i32 [ 0, %bb ], [ %i8, %bb3 ]
+  %i4 = zext i32 %i to i64
+  %i5 = getelementptr inbounds float, ptr addrspace(1) %arg, i64 %i4
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %i5, metadata !0)
+  %i6 = bitcast <4 x i32> %load to <4 x float>
+  %i8 = add nuw nsw i32 %i, 1
+  %i9 = icmp eq i32 %i8, 256
+  br i1 %i9, label %bb2, label %bb3
+}
+
+;; Make sure we only have a single zero vaddr initialization.
+
+define <4 x float> @global_addr_64bit_lsr_iv_multiload(ptr addrspace(1) %arg, ptr addrspace(1) %arg.1, i32 %x) {
+; GFX9-GENERIC-SDAG-LABEL: global_addr_64bit_lsr_iv_multiload:
+; GFX9-GENERIC-SDAG:       ; %bb.0: ; %bb
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_mov_b32 s4, -1
+; GFX9-GENERIC-SDAG-NEXT:  .LBB61_1: ; %bb5
+; GFX9-GENERIC-SDAG-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX9-GENERIC-SDAG-NEXT:    s_add_i32 s4, s4, 1
+; GFX9-GENERIC-SDAG-NEXT:    s_cmpk_eq_i32 s4, 0xff
+; GFX9-GENERIC-SDAG-NEXT:    s_cbranch_scc0 .LBB61_1
+; GFX9-GENERIC-SDAG-NEXT:  ; %bb.2: ; %bb2
+; GFX9-GENERIC-SDAG-NEXT:    s_mov_b32 s5, 0
+; GFX9-GENERIC-SDAG-NEXT:    s_lshl_b64 s[4:5], s[4:5], 2
+; GFX9-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v2, s5
+; GFX9-GENERIC-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, s4, v0
+; GFX9-GENERIC-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_addr_64bit_lsr_iv_multiload:
+; GFX906-SDAG:       ; %bb.0: ; %bb
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    s_mov_b32 s4, -1
+; GFX906-SDAG-NEXT:  .LBB61_1: ; %bb5
+; GFX906-SDAG-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX906-SDAG-NEXT:    s_add_i32 s4, s4, 1
+; GFX906-SDAG-NEXT:    s_cmpk_eq_i32 s4, 0xff
+; GFX906-SDAG-NEXT:    s_cbranch_scc0 .LBB61_1
+; GFX906-SDAG-NEXT:  ; %bb.2: ; %bb2
+; GFX906-SDAG-NEXT:    s_mov_b32 s5, 0
+; GFX906-SDAG-NEXT:    s_lshl_b64 s[4:5], s[4:5], 2
+; GFX906-SDAG-NEXT:    v_mov_b32_e32 v2, s5
+; GFX906-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, s4, v0
+; GFX906-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_addr_64bit_lsr_iv_multiload:
+; GFX908-SDAG:       ; %bb.0: ; %bb
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    s_mov_b32 s4, -1
+; GFX908-SDAG-NEXT:  .LBB61_1: ; %bb5
+; GFX908-SDAG-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX908-SDAG-NEXT:    s_add_i32 s4, s4, 1
+; GFX908-SDAG-NEXT:    s_cmpk_eq_i32 s4, 0xff
+; GFX908-SDAG-NEXT:    s_cbranch_scc0 .LBB61_1
+; GFX908-SDAG-NEXT:  ; %bb.2: ; %bb2
+; GFX908-SDAG-NEXT:    s_mov_b32 s5, 0
+; GFX908-SDAG-NEXT:    s_lshl_b64 s[4:5], s[4:5], 2
+; GFX908-SDAG-NEXT:    v_mov_b32_e32 v2, s5
+; GFX908-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, s4, v0
+; GFX908-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_addr_64bit_lsr_iv_multiload:
+; GFX90A-SDAG:       ; %bb.0: ; %bb
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    s_mov_b32 s4, -1
+; GFX90A-SDAG-NEXT:  .LBB61_1: ; %bb5
+; GFX90A-SDAG-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX90A-SDAG-NEXT:    s_add_i32 s4, s4, 1
+; GFX90A-SDAG-NEXT:    s_cmpk_eq_i32 s4, 0xff
+; GFX90A-SDAG-NEXT:    s_cbranch_scc0 .LBB61_1
+; GFX90A-SDAG-NEXT:  ; %bb.2: ; %bb2
+; GFX90A-SDAG-NEXT:    s_mov_b32 s5, 0
+; GFX90A-SDAG-NEXT:    s_lshl_b64 s[4:5], s[4:5], 2
+; GFX90A-SDAG-NEXT:    v_mov_b32_e32 v2, s5
+; GFX90A-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, s4, v0
+; GFX90A-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_addr_64bit_lsr_iv_multiload:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0: ; %bb
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_mov_b32 s0, -1
+; GFX9-4-GENERIC-SDAG-NEXT:  .LBB61_1: ; %bb5
+; GFX9-4-GENERIC-SDAG-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX9-4-GENERIC-SDAG-NEXT:    s_add_i32 s0, s0, 1
+; GFX9-4-GENERIC-SDAG-NEXT:    s_cmpk_eq_i32 s0, 0xff
+; GFX9-4-GENERIC-SDAG-NEXT:    s_cbranch_scc0 .LBB61_1
+; GFX9-4-GENERIC-SDAG-NEXT:  ; %bb.2: ; %bb2
+; GFX9-4-GENERIC-SDAG-NEXT:    s_mov_b32 s1, 0
+; GFX9-4-GENERIC-SDAG-NEXT:    v_lshl_add_u64 v[0:1], s[0:1], 2, v[0:1]
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_addr_64bit_lsr_iv_multiload:
+; GFX942-SDAG:       ; %bb.0: ; %bb
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    s_mov_b32 s0, -1
+; GFX942-SDAG-NEXT:  .LBB61_1: ; %bb5
+; GFX942-SDAG-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-SDAG-NEXT:    s_add_i32 s0, s0, 1
+; GFX942-SDAG-NEXT:    s_cmpk_eq_i32 s0, 0xff
+; GFX942-SDAG-NEXT:    s_cbranch_scc0 .LBB61_1
+; GFX942-SDAG-NEXT:  ; %bb.2: ; %bb2
+; GFX942-SDAG-NEXT:    s_mov_b32 s1, 0
+; GFX942-SDAG-NEXT:    v_lshl_add_u64 v[0:1], s[0:1], 2, v[0:1]
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_addr_64bit_lsr_iv_multiload:
+; GFX950-SDAG:       ; %bb.0: ; %bb
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    s_mov_b32 s0, -1
+; GFX950-SDAG-NEXT:  .LBB61_1: ; %bb5
+; GFX950-SDAG-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX950-SDAG-NEXT:    s_add_i32 s0, s0, 1
+; GFX950-SDAG-NEXT:    s_cmpk_eq_i32 s0, 0xff
+; GFX950-SDAG-NEXT:    s_cbranch_scc0 .LBB61_1
+; GFX950-SDAG-NEXT:  ; %bb.2: ; %bb2
+; GFX950-SDAG-NEXT:    s_mov_b32 s1, 0
+; GFX950-SDAG-NEXT:    v_lshl_add_u64 v[0:1], s[0:1], 2, v[0:1]
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_addr_64bit_lsr_iv_multiload:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0: ; %bb
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_mov_b32 s4, -1
+; GFX10-1-GENERIC-SDAG-NEXT:  .LBB61_1: ; %bb5
+; GFX10-1-GENERIC-SDAG-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX10-1-GENERIC-SDAG-NEXT:    s_add_i32 s4, s4, 1
+; GFX10-1-GENERIC-SDAG-NEXT:    s_cmpk_eq_i32 s4, 0xff
+; GFX10-1-GENERIC-SDAG-NEXT:    s_cbranch_scc0 .LBB61_1
+; GFX10-1-GENERIC-SDAG-NEXT:  ; %bb.2: ; %bb2
+; GFX10-1-GENERIC-SDAG-NEXT:    s_mov_b32 s5, 0
+; GFX10-1-GENERIC-SDAG-NEXT:    s_lshl_b64 s[4:5], s[4:5], 2
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, s4
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, s5, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_addr_64bit_lsr_iv_multiload:
+; GFX1012-SDAG:       ; %bb.0: ; %bb
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    s_mov_b32 s4, -1
+; GFX1012-SDAG-NEXT:  .LBB61_1: ; %bb5
+; GFX1012-SDAG-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX1012-SDAG-NEXT:    s_add_i32 s4, s4, 1
+; GFX1012-SDAG-NEXT:    s_cmpk_eq_i32 s4, 0xff
+; GFX1012-SDAG-NEXT:    s_cbranch_scc0 .LBB61_1
+; GFX1012-SDAG-NEXT:  ; %bb.2: ; %bb2
+; GFX1012-SDAG-NEXT:    s_mov_b32 s5, 0
+; GFX1012-SDAG-NEXT:    s_lshl_b64 s[4:5], s[4:5], 2
+; GFX1012-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, s4
+; GFX1012-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, s5, v1, vcc_lo
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_addr_64bit_lsr_iv_multiload:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0: ; %bb
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_mov_b32 s4, -1
+; GFX10-3-GENERIC-SDAG-NEXT:  .LBB61_1: ; %bb5
+; GFX10-3-GENERIC-SDAG-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX10-3-GENERIC-SDAG-NEXT:    s_add_i32 s4, s4, 1
+; GFX10-3-GENERIC-SDAG-NEXT:    s_cmpk_eq_i32 s4, 0xff
+; GFX10-3-GENERIC-SDAG-NEXT:    s_cbranch_scc0 .LBB61_1
+; GFX10-3-GENERIC-SDAG-NEXT:  ; %bb.2: ; %bb2
+; GFX10-3-GENERIC-SDAG-NEXT:    s_mov_b32 s5, 0
+; GFX10-3-GENERIC-SDAG-NEXT:    s_lshl_b64 s[4:5], s[4:5], 2
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, s4
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, s5, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_addr_64bit_lsr_iv_multiload:
+; GFX11-GENERIC-SDAG:       ; %bb.0: ; %bb
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_mov_b32 s0, -1
+; GFX11-GENERIC-SDAG-NEXT:  .LBB61_1: ; %bb5
+; GFX11-GENERIC-SDAG-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX11-GENERIC-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-GENERIC-SDAG-NEXT:    s_add_i32 s0, s0, 1
+; GFX11-GENERIC-SDAG-NEXT:    s_cmpk_eq_i32 s0, 0xff
+; GFX11-GENERIC-SDAG-NEXT:    s_cbranch_scc0 .LBB61_1
+; GFX11-GENERIC-SDAG-NEXT:  ; %bb.2: ; %bb2
+; GFX11-GENERIC-SDAG-NEXT:    s_mov_b32 s1, 0
+; GFX11-GENERIC-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-GENERIC-SDAG-NEXT:    s_lshl_b64 s[0:1], s[0:1], 2
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, s0
+; GFX11-GENERIC-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, s1, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_addr_64bit_lsr_iv_multiload:
+; GFX1250-SDAG:       ; %bb.0: ; %bb
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    s_mov_b32 s0, -1
+; GFX1250-SDAG-NEXT:  .LBB61_1: ; %bb5
+; GFX1250-SDAG-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX1250-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX1250-SDAG-NEXT:    s_add_co_i32 s0, s0, 1
+; GFX1250-SDAG-NEXT:    s_cmp_eq_u32 s0, 0xff
+; GFX1250-SDAG-NEXT:    s_cbranch_scc0 .LBB61_1
+; GFX1250-SDAG-NEXT:  ; %bb.2: ; %bb2
+; GFX1250-SDAG-NEXT:    s_mov_b32 s1, 0
+; GFX1250-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-SDAG-NEXT:    v_lshl_add_u64 v[0:1], s[0:1], 2, v[0:1]
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_addr_64bit_lsr_iv_multiload:
+; GFX12-GENERIC-SDAG:       ; %bb.0: ; %bb
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_mov_b32 s0, -1
+; GFX12-GENERIC-SDAG-NEXT:  .LBB61_1: ; %bb5
+; GFX12-GENERIC-SDAG-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_alu depctr_sa_sdst(0)
+; GFX12-GENERIC-SDAG-NEXT:    s_add_co_i32 s0, s0, 1
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_alu depctr_sa_sdst(0)
+; GFX12-GENERIC-SDAG-NEXT:    s_cmp_eq_u32 s0, 0xff
+; GFX12-GENERIC-SDAG-NEXT:    s_cbranch_scc0 .LBB61_1
+; GFX12-GENERIC-SDAG-NEXT:  ; %bb.2: ; %bb2
+; GFX12-GENERIC-SDAG-NEXT:    s_mov_b32 s1, 0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_alu depctr_sa_sdst(0)
+; GFX12-GENERIC-SDAG-NEXT:    s_lshl_b64 s[0:1], s[0:1], 2
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_alu depctr_sa_sdst(0)
+; GFX12-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, s0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, s1, v1, vcc_lo
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SE
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_addr_64bit_lsr_iv_multiload:
+; GFX9-GENERIC-ISEL:       ; %bb.0: ; %bb
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_mov_b32 s4, -1
+; GFX9-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v3, 0xff
+; GFX9-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v2, s4
+; GFX9-GENERIC-ISEL-NEXT:  .LBB61_1: ; %bb5
+; GFX9-GENERIC-ISEL-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX9-GENERIC-ISEL-NEXT:    v_add_u32_e32 v2, 1, v2
+; GFX9-GENERIC-ISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX9-GENERIC-ISEL-NEXT:    s_cbranch_vccz .LBB61_1
+; GFX9-GENERIC-ISEL-NEXT:  ; %bb.2: ; %bb2
+; GFX9-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v3, 0
+; GFX9-GENERIC-ISEL-NEXT:    v_lshlrev_b64 v[2:3], 2, v[2:3]
+; GFX9-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_addr_64bit_lsr_iv_multiload:
+; GFX906-ISEL:       ; %bb.0: ; %bb
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    s_mov_b32 s4, -1
+; GFX906-ISEL-NEXT:    v_mov_b32_e32 v3, 0xff
+; GFX906-ISEL-NEXT:    v_mov_b32_e32 v2, s4
+; GFX906-ISEL-NEXT:  .LBB61_1: ; %bb5
+; GFX906-ISEL-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX906-ISEL-NEXT:    v_add_u32_e32 v2, 1, v2
+; GFX906-ISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX906-ISEL-NEXT:    s_cbranch_vccz .LBB61_1
+; GFX906-ISEL-NEXT:  ; %bb.2: ; %bb2
+; GFX906-ISEL-NEXT:    v_mov_b32_e32 v3, 0
+; GFX906-ISEL-NEXT:    v_lshlrev_b64 v[2:3], 2, v[2:3]
+; GFX906-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_addr_64bit_lsr_iv_multiload:
+; GFX908-ISEL:       ; %bb.0: ; %bb
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    s_mov_b32 s4, -1
+; GFX908-ISEL-NEXT:    v_mov_b32_e32 v3, 0xff
+; GFX908-ISEL-NEXT:    v_mov_b32_e32 v2, s4
+; GFX908-ISEL-NEXT:  .LBB61_1: ; %bb5
+; GFX908-ISEL-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX908-ISEL-NEXT:    v_add_u32_e32 v2, 1, v2
+; GFX908-ISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX908-ISEL-NEXT:    s_cbranch_vccz .LBB61_1
+; GFX908-ISEL-NEXT:  ; %bb.2: ; %bb2
+; GFX908-ISEL-NEXT:    v_mov_b32_e32 v3, 0
+; GFX908-ISEL-NEXT:    v_lshlrev_b64 v[2:3], 2, v[2:3]
+; GFX908-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_addr_64bit_lsr_iv_multiload:
+; GFX90A-ISEL:       ; %bb.0: ; %bb
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    s_mov_b32 s4, -1
+; GFX90A-ISEL-NEXT:    v_mov_b32_e32 v3, 0xff
+; GFX90A-ISEL-NEXT:    v_mov_b32_e32 v2, s4
+; GFX90A-ISEL-NEXT:  .LBB61_1: ; %bb5
+; GFX90A-ISEL-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX90A-ISEL-NEXT:    v_add_u32_e32 v2, 1, v2
+; GFX90A-ISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX90A-ISEL-NEXT:    s_cbranch_vccz .LBB61_1
+; GFX90A-ISEL-NEXT:  ; %bb.2: ; %bb2
+; GFX90A-ISEL-NEXT:    v_mov_b32_e32 v3, 0
+; GFX90A-ISEL-NEXT:    v_lshlrev_b64 v[2:3], 2, v[2:3]
+; GFX90A-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_addr_64bit_lsr_iv_multiload:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0: ; %bb
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_mov_b32 s0, -1
+; GFX9-4-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v3, 0xff
+; GFX9-4-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v2, s0
+; GFX9-4-GENERIC-ISEL-NEXT:  .LBB61_1: ; %bb5
+; GFX9-4-GENERIC-ISEL-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX9-4-GENERIC-ISEL-NEXT:    v_add_u32_e32 v2, 1, v2
+; GFX9-4-GENERIC-ISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX9-4-GENERIC-ISEL-NEXT:    s_cbranch_vccz .LBB61_1
+; GFX9-4-GENERIC-ISEL-NEXT:  ; %bb.2: ; %bb2
+; GFX9-4-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v3, 0
+; GFX9-4-GENERIC-ISEL-NEXT:    v_lshl_add_u64 v[0:1], v[2:3], 2, v[0:1]
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_addr_64bit_lsr_iv_multiload:
+; GFX942-ISEL:       ; %bb.0: ; %bb
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    s_mov_b32 s0, -1
+; GFX942-ISEL-NEXT:    v_mov_b32_e32 v3, 0xff
+; GFX942-ISEL-NEXT:    v_mov_b32_e32 v2, s0
+; GFX942-ISEL-NEXT:  .LBB61_1: ; %bb5
+; GFX942-ISEL-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-ISEL-NEXT:    v_add_u32_e32 v2, 1, v2
+; GFX942-ISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX942-ISEL-NEXT:    s_cbranch_vccz .LBB61_1
+; GFX942-ISEL-NEXT:  ; %bb.2: ; %bb2
+; GFX942-ISEL-NEXT:    v_mov_b32_e32 v3, 0
+; GFX942-ISEL-NEXT:    v_lshl_add_u64 v[0:1], v[2:3], 2, v[0:1]
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_addr_64bit_lsr_iv_multiload:
+; GFX950-ISEL:       ; %bb.0: ; %bb
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    s_mov_b32 s0, -1
+; GFX950-ISEL-NEXT:    v_mov_b32_e32 v3, 0xff
+; GFX950-ISEL-NEXT:    v_mov_b32_e32 v2, s0
+; GFX950-ISEL-NEXT:  .LBB61_1: ; %bb5
+; GFX950-ISEL-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX950-ISEL-NEXT:    v_add_u32_e32 v2, 1, v2
+; GFX950-ISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX950-ISEL-NEXT:    s_cbranch_vccz .LBB61_1
+; GFX950-ISEL-NEXT:  ; %bb.2: ; %bb2
+; GFX950-ISEL-NEXT:    v_mov_b32_e32 v3, 0
+; GFX950-ISEL-NEXT:    v_lshl_add_u64 v[0:1], v[2:3], 2, v[0:1]
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_addr_64bit_lsr_iv_multiload:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0: ; %bb
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_mov_b32 s4, -1
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v2, s4
+; GFX10-1-GENERIC-ISEL-NEXT:  .LBB61_1: ; %bb5
+; GFX10-1-GENERIC-ISEL-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_nc_u32_e32 v2, 1, v2
+; GFX10-1-GENERIC-ISEL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0xff, v2
+; GFX10-1-GENERIC-ISEL-NEXT:    s_cbranch_vccz .LBB61_1
+; GFX10-1-GENERIC-ISEL-NEXT:  ; %bb.2: ; %bb2
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v3, 0
+; GFX10-1-GENERIC-ISEL-NEXT:    v_lshlrev_b64 v[2:3], 2, v[2:3]
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_addr_64bit_lsr_iv_multiload:
+; GFX1012-ISEL:       ; %bb.0: ; %bb
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    s_mov_b32 s4, -1
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v2, s4
+; GFX1012-ISEL-NEXT:  .LBB61_1: ; %bb5
+; GFX1012-ISEL-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX1012-ISEL-NEXT:    v_add_nc_u32_e32 v2, 1, v2
+; GFX1012-ISEL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0xff, v2
+; GFX1012-ISEL-NEXT:    s_cbranch_vccz .LBB61_1
+; GFX1012-ISEL-NEXT:  ; %bb.2: ; %bb2
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v3, 0
+; GFX1012-ISEL-NEXT:    v_lshlrev_b64 v[2:3], 2, v[2:3]
+; GFX1012-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_addr_64bit_lsr_iv_multiload:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0: ; %bb
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_mov_b32 s4, -1
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v2, s4
+; GFX10-3-GENERIC-ISEL-NEXT:  .LBB61_1: ; %bb5
+; GFX10-3-GENERIC-ISEL-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_nc_u32_e32 v2, 1, v2
+; GFX10-3-GENERIC-ISEL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0xff, v2
+; GFX10-3-GENERIC-ISEL-NEXT:    s_cbranch_vccz .LBB61_1
+; GFX10-3-GENERIC-ISEL-NEXT:  ; %bb.2: ; %bb2
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v3, 0
+; GFX10-3-GENERIC-ISEL-NEXT:    v_lshlrev_b64 v[2:3], 2, v[2:3]
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_addr_64bit_lsr_iv_multiload:
+; GFX11-GENERIC-ISEL:       ; %bb.0: ; %bb
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_mov_b32 s0, -1
+; GFX11-GENERIC-ISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v2, s0
+; GFX11-GENERIC-ISEL-NEXT:  .LBB61_1: ; %bb5
+; GFX11-GENERIC-ISEL-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX11-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_nc_u32_e32 v2, 1, v2
+; GFX11-GENERIC-ISEL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0xff, v2
+; GFX11-GENERIC-ISEL-NEXT:    s_cbranch_vccz .LBB61_1
+; GFX11-GENERIC-ISEL-NEXT:  ; %bb.2: ; %bb2
+; GFX11-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v3, 0
+; GFX11-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT:    v_lshlrev_b64 v[2:3], 2, v[2:3]
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_addr_64bit_lsr_iv_multiload:
+; GFX1250-ISEL:       ; %bb.0: ; %bb
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    s_mov_b32 s0, -1
+; GFX1250-ISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-ISEL-NEXT:    v_mov_b32_e32 v2, s0
+; GFX1250-ISEL-NEXT:  .LBB61_1: ; %bb5
+; GFX1250-ISEL-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX1250-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250-ISEL-NEXT:    v_add_nc_u32_e32 v2, 1, v2
+; GFX1250-ISEL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0xff, v2
+; GFX1250-ISEL-NEXT:    s_cbranch_vccz .LBB61_1
+; GFX1250-ISEL-NEXT:  ; %bb.2: ; %bb2
+; GFX1250-ISEL-NEXT:    v_mov_b32_e32 v3, 0
+; GFX1250-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-ISEL-NEXT:    v_lshl_add_u64 v[0:1], v[2:3], 2, v[0:1]
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_addr_64bit_lsr_iv_multiload:
+; GFX12-GENERIC-ISEL:       ; %bb.0: ; %bb
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_mov_b32 s0, -1
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_alu depctr_sa_sdst(0)
+; GFX12-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v2, s0
+; GFX12-GENERIC-ISEL-NEXT:  .LBB61_1: ; %bb5
+; GFX12-GENERIC-ISEL-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX12-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX12-GENERIC-ISEL-NEXT:    v_add_nc_u32_e32 v2, 1, v2
+; GFX12-GENERIC-ISEL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0xff, v2
+; GFX12-GENERIC-ISEL-NEXT:    s_cbranch_vccz .LBB61_1
+; GFX12-GENERIC-ISEL-NEXT:  ; %bb.2: ; %bb2
+; GFX12-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v3, 0
+; GFX12-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX12-GENERIC-ISEL-NEXT:    v_lshlrev_b64_e32 v[2:3], 2, v[2:3]
+; GFX12-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX12-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SE
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+bb:
+  br label %bb5
+
+bb2:
+  %y = icmp eq i32 %x, 0
+  br i1 %y, label %bb3, label %bb4
+
+bb3:
+  ret <4 x float> %i6
+
+bb4:
+  ret <4 x float> %i6.1
+
+bb5:
+  %i = phi i32 [ 0, %bb ], [ %i8, %bb5 ]
+  %i4 = zext i32 %i to i64
+  %i5 = getelementptr inbounds float, ptr addrspace(1) %arg, i64 %i4
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %i5, metadata !1)
+  %i6 = bitcast <4 x i32> %load to <4 x float>
+  %i5.1 = getelementptr inbounds float, ptr addrspace(1) %arg.1, i64 %i4
+  %load.1 = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %i5, metadata !2)
+  %i6.1 = bitcast <4 x i32> %load to <4 x float>
+  %i8 = add nuw nsw i32 %i, 1
+  %i9 = icmp eq i32 %i8, 256
+  br i1 %i9, label %bb2, label %bb5
+}
+;;==============================================================================
+;; } end signed offset addressing modes
+;;==============================================================================
+
+;;==============================================================================
+;; Various saddr addressing modes (derived from global-saddr-load.ll) {
+;;==============================================================================
+
+;;------------------------------------------------------------------------------
+;; No vgpr offset, constants
+;;------------------------------------------------------------------------------
+
+;; SGPR base only
+define <4 x float> @global_load_saddr_i8_offset_0(ptr addrspace(1) inreg %sbase) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_0:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_saddr_i8_offset_0:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_saddr_i8_offset_0:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_saddr_i8_offset_0:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_0:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] sc0 sc1
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_saddr_i8_offset_0:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] sc0 sc1
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_saddr_i8_offset_0:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] sc0 sc1
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_0:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_saddr_i8_offset_0:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_0:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_0:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] glc
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_saddr_i8_offset_0:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_SYS
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_0:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_SYS
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_0:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_saddr_i8_offset_0:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_saddr_i8_offset_0:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_saddr_i8_offset_0:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_0:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] sc0 sc1
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_saddr_i8_offset_0:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] sc0 sc1
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_saddr_i8_offset_0:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] sc0 sc1
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_0:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_saddr_i8_offset_0:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_0:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_0:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] glc
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_saddr_i8_offset_0:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_SYS
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_0:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_SYS
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %sbase, metadata !3)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+;; SGPR base with maximum gfx9 immediate offset
+define <4 x float> @global_load_saddr_i8_offset_4095(ptr addrspace(1) inreg %sbase) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_4095:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:4095
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_saddr_i8_offset_4095:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:4095
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_saddr_i8_offset_4095:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:4095
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_saddr_i8_offset_4095:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:4095
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_4095:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:4095
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_saddr_i8_offset_4095:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:4095
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_saddr_i8_offset_4095:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:4095
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_4095:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0x800
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:2047
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_saddr_i8_offset_4095:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    v_mov_b32_e32 v0, 0x800
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:2047
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_4095:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0x800
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:2047
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_4095:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:4095
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_saddr_i8_offset_4095:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:4095
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_4095:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:4095
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_4095:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:4095
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_saddr_i8_offset_4095:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:4095
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_saddr_i8_offset_4095:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:4095
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_saddr_i8_offset_4095:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:4095
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_4095:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:4095
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_saddr_i8_offset_4095:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:4095
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_saddr_i8_offset_4095:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:4095
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_4095:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0x800
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:2047
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_saddr_i8_offset_4095:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v0, 0x800
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:2047
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_4095:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0x800
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:2047
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_4095:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:4095
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_saddr_i8_offset_4095:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:4095
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_4095:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:4095
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 4095
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep0, metadata !0)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+;; SGPR base with maximum gfx9 immediate offset + 1
+define <4 x float> @global_load_saddr_i8_offset_4096(ptr addrspace(1) inreg %sbase) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_4096:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0x1000
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_saddr_i8_offset_4096:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    v_mov_b32_e32 v0, 0x1000
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_saddr_i8_offset_4096:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    v_mov_b32_e32 v0, 0x1000
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_saddr_i8_offset_4096:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    v_mov_b32_e32 v0, 0x1000
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_4096:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0x1000
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] sc0
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_saddr_i8_offset_4096:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v0, 0x1000
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] sc0
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_saddr_i8_offset_4096:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v0, 0x1000
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] sc0
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_4096:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0x1000
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_saddr_i8_offset_4096:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    v_mov_b32_e32 v0, 0x1000
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_4096:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0x1000
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_4096:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0x1000
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] glc
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_saddr_i8_offset_4096:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:4096
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_4096:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:4096 scope:SCOPE_SE
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_4096:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0x1000
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_saddr_i8_offset_4096:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    v_mov_b32_e32 v0, 0x1000
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_saddr_i8_offset_4096:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    v_mov_b32_e32 v0, 0x1000
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_saddr_i8_offset_4096:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    v_mov_b32_e32 v0, 0x1000
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_4096:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0x1000
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] sc0
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_saddr_i8_offset_4096:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    v_mov_b32_e32 v0, 0x1000
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] sc0
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_saddr_i8_offset_4096:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    v_mov_b32_e32 v0, 0x1000
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] sc0
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_4096:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0x1000
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_saddr_i8_offset_4096:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v0, 0x1000
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_4096:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0x1000
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_4096:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0x1000
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] glc
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_saddr_i8_offset_4096:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:4096
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_4096:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:4096 scope:SCOPE_SE
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 4096
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep0, metadata !1)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+;; SGPR base with maximum gfx9 immediate offset + 2
+define <4 x float> @global_load_saddr_i8_offset_4097(ptr addrspace(1) inreg %sbase) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_4097:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0x1000
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:1 glc
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_saddr_i8_offset_4097:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    v_mov_b32_e32 v0, 0x1000
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:1 glc
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_saddr_i8_offset_4097:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    v_mov_b32_e32 v0, 0x1000
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:1 glc
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_saddr_i8_offset_4097:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    v_mov_b32_e32 v0, 0x1000
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:1 glc
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_4097:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0x1000
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:1 sc1
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_saddr_i8_offset_4097:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v0, 0x1000
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:1 sc1
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_saddr_i8_offset_4097:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v0, 0x1000
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:1 sc1
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_4097:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0x1000
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:1 glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_saddr_i8_offset_4097:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    v_mov_b32_e32 v0, 0x1000
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:1 glc dlc
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_4097:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0x1000
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:1 glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_4097:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0x1000
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:1 glc
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_saddr_i8_offset_4097:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:4097 scope:SCOPE_DEV
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_4097:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:4097 scope:SCOPE_DEV
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_4097:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0x1000
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:1 glc
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_saddr_i8_offset_4097:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    v_mov_b32_e32 v0, 0x1000
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:1 glc
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_saddr_i8_offset_4097:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    v_mov_b32_e32 v0, 0x1000
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:1 glc
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_saddr_i8_offset_4097:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    v_mov_b32_e32 v0, 0x1000
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:1 glc
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_4097:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0x1000
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:1 sc1
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_saddr_i8_offset_4097:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    v_mov_b32_e32 v0, 0x1000
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:1 sc1
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_saddr_i8_offset_4097:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    v_mov_b32_e32 v0, 0x1000
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:1 sc1
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_4097:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0x1000
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:1 glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_saddr_i8_offset_4097:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v0, 0x1000
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:1 glc dlc
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_4097:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0x1000
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:1 glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_4097:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0x1000
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:1 glc
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_saddr_i8_offset_4097:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:4097 scope:SCOPE_DEV
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_4097:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:4097 scope:SCOPE_DEV
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 4097
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep0, metadata !2)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+;; SGPR base with maximum negative gfx9 immediate offset
+define <4 x float> @global_load_saddr_i8_offset_neg4096(ptr addrspace(1) inreg %sbase) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg4096:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:-4096 glc
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_saddr_i8_offset_neg4096:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:-4096 glc
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_saddr_i8_offset_neg4096:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:-4096 glc
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_saddr_i8_offset_neg4096:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:-4096 glc
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg4096:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:-4096 sc0 sc1
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_saddr_i8_offset_neg4096:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:-4096 sc0 sc1
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_saddr_i8_offset_neg4096:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:-4096 sc0 sc1
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg4096:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, s4, 0xfffff000, s16
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, s4, -1, s17, s4
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_saddr_i8_offset_neg4096:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    v_add_co_u32 v0, s4, 0xfffff000, s16
+; GFX1012-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, s4, -1, s17, s4
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg4096:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, s4, 0xfffff000, s16
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, s17, s4
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg4096:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:-4096 glc
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_saddr_i8_offset_neg4096:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:-4096 scope:SCOPE_SYS
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg4096:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:-4096 scope:SCOPE_SYS
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg4096:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:-4096 glc
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_saddr_i8_offset_neg4096:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:-4096 glc
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_saddr_i8_offset_neg4096:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:-4096 glc
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_saddr_i8_offset_neg4096:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:-4096 glc
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg4096:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:-4096 sc0 sc1
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_saddr_i8_offset_neg4096:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:-4096 sc0 sc1
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_saddr_i8_offset_neg4096:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:-4096 sc0 sc1
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg4096:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_add_u32 s4, s16, 0xfffff000
+; GFX10-1-GENERIC-ISEL-NEXT:    s_addc_u32 s5, s17, -1
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, s4
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, s5
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_saddr_i8_offset_neg4096:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    s_add_u32 s4, s16, 0xfffff000
+; GFX1012-ISEL-NEXT:    s_addc_u32 s5, s17, -1
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v0, s4
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v1, s5
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg4096:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_add_u32 s4, s16, 0xfffff000
+; GFX10-3-GENERIC-ISEL-NEXT:    s_addc_u32 s5, s17, -1
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, s4
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, s5
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg4096:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:-4096 glc
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_saddr_i8_offset_neg4096:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:-4096 scope:SCOPE_SYS
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg4096:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:-4096 scope:SCOPE_SYS
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 -4096
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep0, metadata !3)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+;; SGPR base with maximum negative gfx9 immediate offset -1
+define <4 x float> @global_load_saddr_i8_offset_neg4097(ptr addrspace(1) inreg %sbase) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg4097:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_add_u32 s4, s16, 0xffffefff
+; GFX9-GENERIC-SDAG-NEXT:    s_addc_u32 s5, s17, -1
+; GFX9-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[4:5]
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_saddr_i8_offset_neg4097:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    s_add_u32 s4, s16, 0xffffefff
+; GFX906-SDAG-NEXT:    s_addc_u32 s5, s17, -1
+; GFX906-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[4:5]
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_saddr_i8_offset_neg4097:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    s_add_u32 s4, s16, 0xffffefff
+; GFX908-SDAG-NEXT:    s_addc_u32 s5, s17, -1
+; GFX908-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[4:5]
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_saddr_i8_offset_neg4097:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    s_add_u32 s4, s16, 0xffffefff
+; GFX90A-SDAG-NEXT:    s_addc_u32 s5, s17, -1
+; GFX90A-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[4:5]
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg4097:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_add_u32 s0, s0, 0xffffefff
+; GFX9-4-GENERIC-SDAG-NEXT:    s_addc_u32 s1, s1, -1
+; GFX9-4-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1]
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_saddr_i8_offset_neg4097:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    s_add_u32 s0, s0, 0xffffefff
+; GFX942-SDAG-NEXT:    s_addc_u32 s1, s1, -1
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1]
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_saddr_i8_offset_neg4097:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    s_add_u32 s0, s0, 0xffffefff
+; GFX950-SDAG-NEXT:    s_addc_u32 s1, s1, -1
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1]
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg4097:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, s4, 0xfffff000, s16
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, s4, -1, s17, s4
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-1
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_saddr_i8_offset_neg4097:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    v_add_co_u32 v0, s4, 0xfffff000, s16
+; GFX1012-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, s4, -1, s17, s4
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-1
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg4097:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, s4, 0xfffff000, s16
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, s17, s4
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-1
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg4097:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, s0, 0xfffff000, s0
+; GFX11-GENERIC-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, s1, s0
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:-1
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_saddr_i8_offset_neg4097:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:-4097
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg4097:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:-4097
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg4097:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_add_u32 s4, s16, 0xffffefff
+; GFX9-GENERIC-ISEL-NEXT:    s_addc_u32 s5, s17, -1
+; GFX9-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[4:5]
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_saddr_i8_offset_neg4097:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    s_add_u32 s4, s16, 0xffffefff
+; GFX906-ISEL-NEXT:    s_addc_u32 s5, s17, -1
+; GFX906-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[4:5]
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_saddr_i8_offset_neg4097:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    s_add_u32 s4, s16, 0xffffefff
+; GFX908-ISEL-NEXT:    s_addc_u32 s5, s17, -1
+; GFX908-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[4:5]
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_saddr_i8_offset_neg4097:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    s_add_u32 s4, s16, 0xffffefff
+; GFX90A-ISEL-NEXT:    s_addc_u32 s5, s17, -1
+; GFX90A-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[4:5]
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg4097:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_add_u32 s0, s0, 0xffffefff
+; GFX9-4-GENERIC-ISEL-NEXT:    s_addc_u32 s1, s1, -1
+; GFX9-4-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1]
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_saddr_i8_offset_neg4097:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    s_add_u32 s0, s0, 0xffffefff
+; GFX942-ISEL-NEXT:    s_addc_u32 s1, s1, -1
+; GFX942-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1]
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_saddr_i8_offset_neg4097:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    s_add_u32 s0, s0, 0xffffefff
+; GFX950-ISEL-NEXT:    s_addc_u32 s1, s1, -1
+; GFX950-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1]
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg4097:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_add_u32 s4, s16, 0xffffefff
+; GFX10-1-GENERIC-ISEL-NEXT:    s_addc_u32 s5, s17, -1
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, s4
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, s5
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_saddr_i8_offset_neg4097:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    s_add_u32 s4, s16, 0xffffefff
+; GFX1012-ISEL-NEXT:    s_addc_u32 s5, s17, -1
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v0, s4
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v1, s5
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg4097:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_add_u32 s4, s16, 0xffffefff
+; GFX10-3-GENERIC-ISEL-NEXT:    s_addc_u32 s5, s17, -1
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, s4
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, s5
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg4097:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_add_u32 s0, s0, 0xffffefff
+; GFX11-GENERIC-ISEL-NEXT:    s_addc_u32 s1, s1, -1
+; GFX11-GENERIC-ISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-GENERIC-ISEL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_saddr_i8_offset_neg4097:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:-4097
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg4097:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:-4097
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 -4097
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep0, metadata !0)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+;; SGPR base with maximum negative gfx9 immediate offset -2
+define <4 x float> @global_load_saddr_i8_offset_neg4098(ptr addrspace(1) inreg %sbase) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg4098:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_add_u32 s4, s16, 0xffffeffe
+; GFX9-GENERIC-SDAG-NEXT:    s_addc_u32 s5, s17, -1
+; GFX9-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[4:5]
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_saddr_i8_offset_neg4098:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    s_add_u32 s4, s16, 0xffffeffe
+; GFX906-SDAG-NEXT:    s_addc_u32 s5, s17, -1
+; GFX906-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[4:5]
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_saddr_i8_offset_neg4098:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    s_add_u32 s4, s16, 0xffffeffe
+; GFX908-SDAG-NEXT:    s_addc_u32 s5, s17, -1
+; GFX908-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[4:5]
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_saddr_i8_offset_neg4098:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    s_add_u32 s4, s16, 0xffffeffe
+; GFX90A-SDAG-NEXT:    s_addc_u32 s5, s17, -1
+; GFX90A-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[4:5]
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg4098:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_add_u32 s0, s0, 0xffffeffe
+; GFX9-4-GENERIC-SDAG-NEXT:    s_addc_u32 s1, s1, -1
+; GFX9-4-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] sc0
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_saddr_i8_offset_neg4098:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    s_add_u32 s0, s0, 0xffffeffe
+; GFX942-SDAG-NEXT:    s_addc_u32 s1, s1, -1
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] sc0
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_saddr_i8_offset_neg4098:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    s_add_u32 s0, s0, 0xffffeffe
+; GFX950-SDAG-NEXT:    s_addc_u32 s1, s1, -1
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] sc0
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg4098:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, s4, 0xfffff000, s16
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, s4, -1, s17, s4
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2 glc
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_saddr_i8_offset_neg4098:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    v_add_co_u32 v0, s4, 0xfffff000, s16
+; GFX1012-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, s4, -1, s17, s4
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2 glc
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg4098:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, s4, 0xfffff000, s16
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, s17, s4
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2 glc
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg4098:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, s0, 0xfffff000, s0
+; GFX11-GENERIC-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, s1, s0
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:-2 glc
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_saddr_i8_offset_neg4098:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:-4098
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg4098:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:-4098 scope:SCOPE_SE
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg4098:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_add_u32 s4, s16, 0xffffeffe
+; GFX9-GENERIC-ISEL-NEXT:    s_addc_u32 s5, s17, -1
+; GFX9-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[4:5]
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_saddr_i8_offset_neg4098:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    s_add_u32 s4, s16, 0xffffeffe
+; GFX906-ISEL-NEXT:    s_addc_u32 s5, s17, -1
+; GFX906-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[4:5]
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_saddr_i8_offset_neg4098:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    s_add_u32 s4, s16, 0xffffeffe
+; GFX908-ISEL-NEXT:    s_addc_u32 s5, s17, -1
+; GFX908-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[4:5]
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_saddr_i8_offset_neg4098:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    s_add_u32 s4, s16, 0xffffeffe
+; GFX90A-ISEL-NEXT:    s_addc_u32 s5, s17, -1
+; GFX90A-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[4:5]
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg4098:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_add_u32 s0, s0, 0xffffeffe
+; GFX9-4-GENERIC-ISEL-NEXT:    s_addc_u32 s1, s1, -1
+; GFX9-4-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] sc0
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_saddr_i8_offset_neg4098:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    s_add_u32 s0, s0, 0xffffeffe
+; GFX942-ISEL-NEXT:    s_addc_u32 s1, s1, -1
+; GFX942-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] sc0
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_saddr_i8_offset_neg4098:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    s_add_u32 s0, s0, 0xffffeffe
+; GFX950-ISEL-NEXT:    s_addc_u32 s1, s1, -1
+; GFX950-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] sc0
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg4098:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_add_u32 s4, s16, 0xffffeffe
+; GFX10-1-GENERIC-ISEL-NEXT:    s_addc_u32 s5, s17, -1
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, s4
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, s5
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_saddr_i8_offset_neg4098:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    s_add_u32 s4, s16, 0xffffeffe
+; GFX1012-ISEL-NEXT:    s_addc_u32 s5, s17, -1
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v0, s4
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v1, s5
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg4098:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_add_u32 s4, s16, 0xffffeffe
+; GFX10-3-GENERIC-ISEL-NEXT:    s_addc_u32 s5, s17, -1
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, s4
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, s5
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg4098:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_add_u32 s0, s0, 0xffffeffe
+; GFX11-GENERIC-ISEL-NEXT:    s_addc_u32 s1, s1, -1
+; GFX11-GENERIC-ISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-GENERIC-ISEL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_saddr_i8_offset_neg4098:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:-4098
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg4098:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:-4098 scope:SCOPE_SE
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 -4098
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep0, metadata !1)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+;; SGPR base with maximum gfx10 immediate offset
+define <4 x float> @global_load_saddr_i8_offset_2048(ptr addrspace(1) inreg %sbase) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_2048:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:2048 glc
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_saddr_i8_offset_2048:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:2048 glc
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_saddr_i8_offset_2048:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:2048 glc
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_saddr_i8_offset_2048:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:2048 glc
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_2048:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:2048 sc1
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_saddr_i8_offset_2048:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:2048 sc1
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_saddr_i8_offset_2048:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:2048 sc1
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_2048:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0x800
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_saddr_i8_offset_2048:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    v_mov_b32_e32 v0, 0x800
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_2048:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0x800
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_2048:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:2048 glc
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_saddr_i8_offset_2048:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:2048 scope:SCOPE_DEV
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_2048:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:2048 scope:SCOPE_DEV
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_2048:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:2048 glc
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_saddr_i8_offset_2048:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:2048 glc
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_saddr_i8_offset_2048:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:2048 glc
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_saddr_i8_offset_2048:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:2048 glc
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_2048:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:2048 sc1
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_saddr_i8_offset_2048:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:2048 sc1
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_saddr_i8_offset_2048:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:2048 sc1
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_2048:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0x800
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_saddr_i8_offset_2048:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v0, 0x800
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_2048:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0x800
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_2048:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:2048 glc
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_saddr_i8_offset_2048:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:2048 scope:SCOPE_DEV
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_2048:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:2048 scope:SCOPE_DEV
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 2048
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep0, metadata !2)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+;; SGPR base with maximum gfx10 immediate offset + 1
+define <4 x float> @global_load_saddr_i8_offset_2049(ptr addrspace(1) inreg %sbase) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_2049:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:2049 glc
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_saddr_i8_offset_2049:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:2049 glc
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_saddr_i8_offset_2049:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:2049 glc
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_saddr_i8_offset_2049:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:2049 glc
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_2049:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:2049 sc0 sc1
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_saddr_i8_offset_2049:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:2049 sc0 sc1
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_saddr_i8_offset_2049:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:2049 sc0 sc1
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_2049:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0x800
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:1 glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_saddr_i8_offset_2049:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    v_mov_b32_e32 v0, 0x800
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:1 glc dlc
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_2049:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0x800
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:1 glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_2049:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:2049 glc
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_saddr_i8_offset_2049:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:2049 scope:SCOPE_SYS
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_2049:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:2049 scope:SCOPE_SYS
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_2049:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:2049 glc
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_saddr_i8_offset_2049:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:2049 glc
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_saddr_i8_offset_2049:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:2049 glc
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_saddr_i8_offset_2049:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:2049 glc
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_2049:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:2049 sc0 sc1
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_saddr_i8_offset_2049:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:2049 sc0 sc1
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_saddr_i8_offset_2049:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:2049 sc0 sc1
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_2049:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0x800
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:1 glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_saddr_i8_offset_2049:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v0, 0x800
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:1 glc dlc
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_2049:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0x800
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:1 glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_2049:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:2049 glc
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_saddr_i8_offset_2049:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:2049 scope:SCOPE_SYS
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_2049:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:2049 scope:SCOPE_SYS
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 2049
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep0, metadata !3)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+;; SGPR base with maximum gfx10 immediate offset + 2
+define <4 x float> @global_load_saddr_i8_offset_2050(ptr addrspace(1) inreg %sbase) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_2050:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:2050
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_saddr_i8_offset_2050:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:2050
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_saddr_i8_offset_2050:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:2050
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_saddr_i8_offset_2050:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:2050
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_2050:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:2050
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_saddr_i8_offset_2050:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:2050
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_saddr_i8_offset_2050:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:2050
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_2050:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0x800
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:2
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_saddr_i8_offset_2050:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    v_mov_b32_e32 v0, 0x800
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:2
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_2050:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0x800
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:2
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_2050:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:2050
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_saddr_i8_offset_2050:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:2050
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_2050:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:2050
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_2050:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:2050
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_saddr_i8_offset_2050:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:2050
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_saddr_i8_offset_2050:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:2050
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_saddr_i8_offset_2050:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:2050
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_2050:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:2050
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_saddr_i8_offset_2050:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:2050
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_saddr_i8_offset_2050:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:2050
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_2050:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0x800
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:2
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_saddr_i8_offset_2050:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v0, 0x800
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:2
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_2050:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0x800
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:2
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_2050:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:2050
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_saddr_i8_offset_2050:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:2050
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_2050:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:2050
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 2050
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep0, metadata !0)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+;; SGPR base with maximum negative gfx10 immediate offset
+define <4 x float> @global_load_saddr_i8_offset_neg2048(ptr addrspace(1) inreg %sbase) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg2048:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2048
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_saddr_i8_offset_neg2048:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2048
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_saddr_i8_offset_neg2048:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2048
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_saddr_i8_offset_neg2048:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2048
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg2048:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:-2048 sc0
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_saddr_i8_offset_neg2048:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:-2048 sc0
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_saddr_i8_offset_neg2048:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:-2048 sc0
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg2048:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2048 glc
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_saddr_i8_offset_neg2048:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2048 glc
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg2048:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2048 glc
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg2048:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:-2048 glc
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_saddr_i8_offset_neg2048:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:-2048
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg2048:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:-2048 scope:SCOPE_SE
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg2048:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2048
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_saddr_i8_offset_neg2048:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2048
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_saddr_i8_offset_neg2048:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2048
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_saddr_i8_offset_neg2048:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2048
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg2048:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:-2048 sc0
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_saddr_i8_offset_neg2048:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:-2048 sc0
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_saddr_i8_offset_neg2048:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:-2048 sc0
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg2048:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2048 glc
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_saddr_i8_offset_neg2048:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2048 glc
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg2048:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2048 glc
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg2048:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:-2048 glc
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_saddr_i8_offset_neg2048:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:-2048
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg2048:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:-2048 scope:SCOPE_SE
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 -2048
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep0, metadata !1)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+;; SGPR base with maximum negative gfx10 immediate offset - 1
+define <4 x float> @global_load_saddr_i8_offset_neg2049(ptr addrspace(1) inreg %sbase) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg2049:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2049 glc
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_saddr_i8_offset_neg2049:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2049 glc
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_saddr_i8_offset_neg2049:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2049 glc
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_saddr_i8_offset_neg2049:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2049 glc
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg2049:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:-2049 sc1
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_saddr_i8_offset_neg2049:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:-2049 sc1
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_saddr_i8_offset_neg2049:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:-2049 sc1
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg2049:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, s4, 0xfffff800, s16
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, s4, -1, s17, s4
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_saddr_i8_offset_neg2049:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    v_add_co_u32 v0, s4, 0xfffff800, s16
+; GFX1012-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, s4, -1, s17, s4
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc dlc
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg2049:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, s4, 0xfffff800, s16
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, s17, s4
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg2049:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:-2049 glc
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_saddr_i8_offset_neg2049:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:-2049 scope:SCOPE_DEV
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg2049:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:-2049 scope:SCOPE_DEV
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg2049:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2049 glc
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_saddr_i8_offset_neg2049:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2049 glc
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_saddr_i8_offset_neg2049:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2049 glc
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_saddr_i8_offset_neg2049:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2049 glc
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg2049:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:-2049 sc1
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_saddr_i8_offset_neg2049:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:-2049 sc1
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_saddr_i8_offset_neg2049:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:-2049 sc1
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg2049:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_add_u32 s4, s16, 0xfffff7ff
+; GFX10-1-GENERIC-ISEL-NEXT:    s_addc_u32 s5, s17, -1
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, s4
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, s5
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_saddr_i8_offset_neg2049:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    s_add_u32 s4, s16, 0xfffff7ff
+; GFX1012-ISEL-NEXT:    s_addc_u32 s5, s17, -1
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v0, s4
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v1, s5
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg2049:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_add_u32 s4, s16, 0xfffff7ff
+; GFX10-3-GENERIC-ISEL-NEXT:    s_addc_u32 s5, s17, -1
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, s4
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, s5
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg2049:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:-2049 glc
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_saddr_i8_offset_neg2049:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:-2049 scope:SCOPE_DEV
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg2049:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:-2049 scope:SCOPE_DEV
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 -2049
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep0, metadata !2)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+;; SGPR base with maximum negative gfx10 immediate offset - 1
+define <4 x float> @global_load_saddr_i8_offset_neg2050(ptr addrspace(1) inreg %sbase) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg2050:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2050 glc
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_saddr_i8_offset_neg2050:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2050 glc
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_saddr_i8_offset_neg2050:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2050 glc
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_saddr_i8_offset_neg2050:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2050 glc
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg2050:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:-2050 sc0 sc1
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_saddr_i8_offset_neg2050:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:-2050 sc0 sc1
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_saddr_i8_offset_neg2050:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:-2050 sc0 sc1
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg2050:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, s4, 0xfffff800, s16
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, s4, -1, s17, s4
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2 glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_saddr_i8_offset_neg2050:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    v_add_co_u32 v0, s4, 0xfffff800, s16
+; GFX1012-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, s4, -1, s17, s4
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2 glc dlc
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg2050:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, s4, 0xfffff800, s16
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, s17, s4
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2 glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg2050:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:-2050 glc
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_saddr_i8_offset_neg2050:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:-2050 scope:SCOPE_SYS
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg2050:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:-2050 scope:SCOPE_SYS
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg2050:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2050 glc
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_saddr_i8_offset_neg2050:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2050 glc
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_saddr_i8_offset_neg2050:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2050 glc
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_saddr_i8_offset_neg2050:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2050 glc
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg2050:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:-2050 sc0 sc1
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_saddr_i8_offset_neg2050:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:-2050 sc0 sc1
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_saddr_i8_offset_neg2050:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:-2050 sc0 sc1
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg2050:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_add_u32 s4, s16, 0xfffff7fe
+; GFX10-1-GENERIC-ISEL-NEXT:    s_addc_u32 s5, s17, -1
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, s4
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, s5
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_saddr_i8_offset_neg2050:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    s_add_u32 s4, s16, 0xfffff7fe
+; GFX1012-ISEL-NEXT:    s_addc_u32 s5, s17, -1
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v0, s4
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v1, s5
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg2050:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_add_u32 s4, s16, 0xfffff7fe
+; GFX10-3-GENERIC-ISEL-NEXT:    s_addc_u32 s5, s17, -1
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, s4
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, s5
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg2050:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:-2050 glc
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_saddr_i8_offset_neg2050:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:-2050 scope:SCOPE_SYS
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg2050:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:-2050 scope:SCOPE_SYS
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 -2050
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep0, metadata !3)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+define <4 x float> @global_load_saddr_i8_offset_0x7FFFFF(ptr addrspace(1) inreg %sbase) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_0x7FFFFF:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0x7ff000
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:4095
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_saddr_i8_offset_0x7FFFFF:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    v_mov_b32_e32 v0, 0x7ff000
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:4095
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_saddr_i8_offset_0x7FFFFF:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    v_mov_b32_e32 v0, 0x7ff000
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:4095
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_saddr_i8_offset_0x7FFFFF:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    v_mov_b32_e32 v0, 0x7ff000
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:4095
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_0x7FFFFF:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0x7ff000
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:4095
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_saddr_i8_offset_0x7FFFFF:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v0, 0x7ff000
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:4095
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_saddr_i8_offset_0x7FFFFF:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v0, 0x7ff000
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:4095
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_0x7FFFFF:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0x7ff800
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:2047
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_saddr_i8_offset_0x7FFFFF:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    v_mov_b32_e32 v0, 0x7ff800
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:2047
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_0x7FFFFF:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0x7ff800
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:2047
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_0x7FFFFF:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0x7ff000
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:4095
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_saddr_i8_offset_0x7FFFFF:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:8388607
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_0x7FFFFF:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:8388607
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_0x7FFFFF:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0x7ff000
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:4095
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_saddr_i8_offset_0x7FFFFF:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    v_mov_b32_e32 v0, 0x7ff000
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:4095
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_saddr_i8_offset_0x7FFFFF:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    v_mov_b32_e32 v0, 0x7ff000
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:4095
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_saddr_i8_offset_0x7FFFFF:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    v_mov_b32_e32 v0, 0x7ff000
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:4095
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_0x7FFFFF:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0x7ff000
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:4095
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_saddr_i8_offset_0x7FFFFF:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    v_mov_b32_e32 v0, 0x7ff000
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:4095
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_saddr_i8_offset_0x7FFFFF:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    v_mov_b32_e32 v0, 0x7ff000
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:4095
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_0x7FFFFF:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0x7ff800
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:2047
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_saddr_i8_offset_0x7FFFFF:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v0, 0x7ff800
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:2047
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_0x7FFFFF:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0x7ff800
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:2047
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_0x7FFFFF:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0x7ff000
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:4095
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_saddr_i8_offset_0x7FFFFF:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:8388607
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_0x7FFFFF:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:8388607
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 8388607
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep0, metadata !0)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+define <4 x float> @global_load_saddr_i8_offset_0xFFFFFF(ptr addrspace(1) inreg %sbase) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_0xFFFFFF:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_add_u32 s4, s16, 0xff800000
+; GFX9-GENERIC-SDAG-NEXT:    s_addc_u32 s5, s17, -1
+; GFX9-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[4:5]
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_saddr_i8_offset_0xFFFFFF:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    s_add_u32 s4, s16, 0xff800000
+; GFX906-SDAG-NEXT:    s_addc_u32 s5, s17, -1
+; GFX906-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[4:5]
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_saddr_i8_offset_0xFFFFFF:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    s_add_u32 s4, s16, 0xff800000
+; GFX908-SDAG-NEXT:    s_addc_u32 s5, s17, -1
+; GFX908-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[4:5]
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_saddr_i8_offset_0xFFFFFF:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    s_add_u32 s4, s16, 0xff800000
+; GFX90A-SDAG-NEXT:    s_addc_u32 s5, s17, -1
+; GFX90A-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[4:5]
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_0xFFFFFF:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_add_u32 s0, s0, 0xff800000
+; GFX9-4-GENERIC-SDAG-NEXT:    s_addc_u32 s1, s1, -1
+; GFX9-4-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] sc0
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_saddr_i8_offset_0xFFFFFF:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    s_add_u32 s0, s0, 0xff800000
+; GFX942-SDAG-NEXT:    s_addc_u32 s1, s1, -1
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] sc0
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_saddr_i8_offset_0xFFFFFF:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    s_add_u32 s0, s0, 0xff800000
+; GFX950-SDAG-NEXT:    s_addc_u32 s1, s1, -1
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] sc0
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_0xFFFFFF:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, s4, 0xff800000, s16
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, s4, -1, s17, s4
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_saddr_i8_offset_0xFFFFFF:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    v_add_co_u32 v0, s4, 0xff800000, s16
+; GFX1012-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, s4, -1, s17, s4
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_0xFFFFFF:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, s4, 0xff800000, s16
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, s17, s4
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_0xFFFFFF:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, s0, 0xff800000, s0
+; GFX11-GENERIC-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, s1, s0
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_saddr_i8_offset_0xFFFFFF:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:-8388608
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_0xFFFFFF:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:-8388608 scope:SCOPE_SE
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_0xFFFFFF:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_add_u32 s4, s16, 0xff800000
+; GFX9-GENERIC-ISEL-NEXT:    s_addc_u32 s5, s17, -1
+; GFX9-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[4:5]
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_saddr_i8_offset_0xFFFFFF:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    s_add_u32 s4, s16, 0xff800000
+; GFX906-ISEL-NEXT:    s_addc_u32 s5, s17, -1
+; GFX906-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[4:5]
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_saddr_i8_offset_0xFFFFFF:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    s_add_u32 s4, s16, 0xff800000
+; GFX908-ISEL-NEXT:    s_addc_u32 s5, s17, -1
+; GFX908-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[4:5]
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_saddr_i8_offset_0xFFFFFF:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    s_add_u32 s4, s16, 0xff800000
+; GFX90A-ISEL-NEXT:    s_addc_u32 s5, s17, -1
+; GFX90A-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[4:5]
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_0xFFFFFF:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_add_u32 s0, s0, 0xff800000
+; GFX9-4-GENERIC-ISEL-NEXT:    s_addc_u32 s1, s1, -1
+; GFX9-4-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] sc0
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_saddr_i8_offset_0xFFFFFF:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    s_add_u32 s0, s0, 0xff800000
+; GFX942-ISEL-NEXT:    s_addc_u32 s1, s1, -1
+; GFX942-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] sc0
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_saddr_i8_offset_0xFFFFFF:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    s_add_u32 s0, s0, 0xff800000
+; GFX950-ISEL-NEXT:    s_addc_u32 s1, s1, -1
+; GFX950-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] sc0
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_0xFFFFFF:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_add_u32 s4, s16, 0xff800000
+; GFX10-1-GENERIC-ISEL-NEXT:    s_addc_u32 s5, s17, -1
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, s4
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, s5
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_saddr_i8_offset_0xFFFFFF:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    s_add_u32 s4, s16, 0xff800000
+; GFX1012-ISEL-NEXT:    s_addc_u32 s5, s17, -1
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v0, s4
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v1, s5
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_0xFFFFFF:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_add_u32 s4, s16, 0xff800000
+; GFX10-3-GENERIC-ISEL-NEXT:    s_addc_u32 s5, s17, -1
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, s4
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, s5
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_0xFFFFFF:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_add_u32 s0, s0, 0xff800000
+; GFX11-GENERIC-ISEL-NEXT:    s_addc_u32 s1, s1, -1
+; GFX11-GENERIC-ISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-GENERIC-ISEL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_saddr_i8_offset_0xFFFFFF:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:-8388608
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_0xFFFFFF:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:-8388608 scope:SCOPE_SE
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 -8388608
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep0, metadata !1)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+define <4 x float> @global_load_saddr_i8_offset_0xFFFFFFFF(ptr addrspace(1) inreg %sbase) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_0xFFFFFFFF:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0xfffff000
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:4095 glc
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_saddr_i8_offset_0xFFFFFFFF:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    v_mov_b32_e32 v0, 0xfffff000
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:4095 glc
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_saddr_i8_offset_0xFFFFFFFF:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    v_mov_b32_e32 v0, 0xfffff000
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:4095 glc
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_saddr_i8_offset_0xFFFFFFFF:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    v_mov_b32_e32 v0, 0xfffff000
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:4095 glc
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_0xFFFFFFFF:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0xfffff000
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:4095 sc1
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_saddr_i8_offset_0xFFFFFFFF:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v0, 0xfffff000
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:4095 sc1
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_saddr_i8_offset_0xFFFFFFFF:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v0, 0xfffff000
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:4095 sc1
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_0xFFFFFFFF:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0xfffff800
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:2047 glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_saddr_i8_offset_0xFFFFFFFF:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    v_mov_b32_e32 v0, 0xfffff800
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:2047 glc dlc
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_0xFFFFFFFF:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0xfffff800
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:2047 glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_0xFFFFFFFF:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0xfffff000
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:4095 glc
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_saddr_i8_offset_0xFFFFFFFF:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    v_add_co_u32 v0, s0, 0xff800000, s0
+; GFX1250-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, s1, s0
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:8388607 scope:SCOPE_DEV
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_0xFFFFFFFF:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0xff800000
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:8388607 scope:SCOPE_DEV
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_0xFFFFFFFF:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0xfffff000
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:4095 glc
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_saddr_i8_offset_0xFFFFFFFF:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    v_mov_b32_e32 v0, 0xfffff000
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:4095 glc
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_saddr_i8_offset_0xFFFFFFFF:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    v_mov_b32_e32 v0, 0xfffff000
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:4095 glc
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_saddr_i8_offset_0xFFFFFFFF:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    v_mov_b32_e32 v0, 0xfffff000
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:4095 glc
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_0xFFFFFFFF:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0xfffff000
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:4095 sc1
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_saddr_i8_offset_0xFFFFFFFF:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    v_mov_b32_e32 v0, 0xfffff000
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:4095 sc1
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_saddr_i8_offset_0xFFFFFFFF:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    v_mov_b32_e32 v0, 0xfffff000
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:4095 sc1
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_0xFFFFFFFF:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0xfffff800
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:2047 glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_saddr_i8_offset_0xFFFFFFFF:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v0, 0xfffff800
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:2047 glc dlc
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_0xFFFFFFFF:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0xfffff800
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:2047 glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_0xFFFFFFFF:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0xfffff000
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:4095 glc
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_saddr_i8_offset_0xFFFFFFFF:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    s_add_co_u32 s0, s0, -1
+; GFX1250-ISEL-NEXT:    s_add_co_ci_u32 s1, s1, 0
+; GFX1250-ISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-ISEL-NEXT:    v_mov_b64_e32 v[0:1], s[0:1]
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off scope:SCOPE_DEV
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_0xFFFFFFFF:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0xff800000
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:8388607 scope:SCOPE_DEV
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 4294967295
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep0, metadata !2)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+define <4 x float> @global_load_saddr_i8_offset_0x100000000(ptr addrspace(1) inreg %sbase) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_0x100000000:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_add_i32 s17, s17, 1
+; GFX9-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_saddr_i8_offset_0x100000000:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    s_add_i32 s17, s17, 1
+; GFX906-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_saddr_i8_offset_0x100000000:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    s_add_i32 s17, s17, 1
+; GFX908-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_saddr_i8_offset_0x100000000:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    s_add_i32 s17, s17, 1
+; GFX90A-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_0x100000000:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_add_i32 s1, s1, 1
+; GFX9-4-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] sc0 sc1
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_saddr_i8_offset_0x100000000:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    s_add_i32 s1, s1, 1
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] sc0 sc1
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_saddr_i8_offset_0x100000000:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    s_add_i32 s1, s1, 1
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] sc0 sc1
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_0x100000000:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX10-1-GENERIC-SDAG-NEXT:    s_add_i32 s17, s17, 1
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_saddr_i8_offset_0x100000000:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX1012-SDAG-NEXT:    s_add_i32 s17, s17, 1
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_0x100000000:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX10-3-GENERIC-SDAG-NEXT:    s_add_i32 s17, s17, 1
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_0x100000000:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX11-GENERIC-SDAG-NEXT:    s_add_i32 s1, s1, 1
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] glc
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_saddr_i8_offset_0x100000000:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX1250-SDAG-NEXT:    s_add_co_i32 s1, s1, 1
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_SYS
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_0x100000000:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX12-GENERIC-SDAG-NEXT:    s_add_co_i32 s1, s1, 1
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_SYS
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_alu depctr_sa_sdst(0)
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_0x100000000:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_add_u32 s4, s16, 0
+; GFX9-GENERIC-ISEL-NEXT:    s_addc_u32 s5, s17, 1
+; GFX9-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, s4
+; GFX9-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, s5
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_saddr_i8_offset_0x100000000:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    s_add_u32 s4, s16, 0
+; GFX906-ISEL-NEXT:    s_addc_u32 s5, s17, 1
+; GFX906-ISEL-NEXT:    v_mov_b32_e32 v0, s4
+; GFX906-ISEL-NEXT:    v_mov_b32_e32 v1, s5
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_saddr_i8_offset_0x100000000:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    s_add_u32 s4, s16, 0
+; GFX908-ISEL-NEXT:    s_addc_u32 s5, s17, 1
+; GFX908-ISEL-NEXT:    v_mov_b32_e32 v0, s4
+; GFX908-ISEL-NEXT:    v_mov_b32_e32 v1, s5
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_saddr_i8_offset_0x100000000:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    s_add_u32 s4, s16, 0
+; GFX90A-ISEL-NEXT:    s_addc_u32 s5, s17, 1
+; GFX90A-ISEL-NEXT:    v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1]
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_0x100000000:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_add_u32 s0, s0, 0
+; GFX9-4-GENERIC-ISEL-NEXT:    s_addc_u32 s1, s1, 1
+; GFX9-4-GENERIC-ISEL-NEXT:    v_mov_b64_e32 v[0:1], s[0:1]
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_saddr_i8_offset_0x100000000:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    s_add_u32 s0, s0, 0
+; GFX942-ISEL-NEXT:    s_addc_u32 s1, s1, 1
+; GFX942-ISEL-NEXT:    v_mov_b64_e32 v[0:1], s[0:1]
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_saddr_i8_offset_0x100000000:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    s_add_u32 s0, s0, 0
+; GFX950-ISEL-NEXT:    s_addc_u32 s1, s1, 1
+; GFX950-ISEL-NEXT:    v_mov_b64_e32 v[0:1], s[0:1]
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_0x100000000:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_add_u32 s4, s16, 0
+; GFX10-1-GENERIC-ISEL-NEXT:    s_addc_u32 s5, s17, 1
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, s4
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, s5
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_saddr_i8_offset_0x100000000:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    s_add_u32 s4, s16, 0
+; GFX1012-ISEL-NEXT:    s_addc_u32 s5, s17, 1
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v0, s4
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v1, s5
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_0x100000000:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_add_u32 s4, s16, 0
+; GFX10-3-GENERIC-ISEL-NEXT:    s_addc_u32 s5, s17, 1
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, s4
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, s5
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_0x100000000:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_add_u32 s0, s0, 0
+; GFX11-GENERIC-ISEL-NEXT:    s_addc_u32 s1, s1, 1
+; GFX11-GENERIC-ISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-GENERIC-ISEL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_saddr_i8_offset_0x100000000:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    s_add_co_u32 s0, s0, 0
+; GFX1250-ISEL-NEXT:    s_add_co_ci_u32 s1, s1, 1
+; GFX1250-ISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-ISEL-NEXT:    v_mov_b64_e32 v[0:1], s[0:1]
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_0x100000000:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_add_co_u32 s0, s0, 0
+; GFX12-GENERIC-ISEL-NEXT:    s_add_co_ci_u32 s1, s1, 1
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_alu depctr_sa_sdst(0)
+; GFX12-GENERIC-ISEL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 4294967296
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep0, metadata !3)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+define <4 x float> @global_load_saddr_i8_offset_0x100000001(ptr addrspace(1) inreg %sbase) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_0x100000001:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v1, s17
+; GFX9-GENERIC-SDAG-NEXT:    v_add_co_u32_e64 v0, vcc, 0, s16
+; GFX9-GENERIC-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:1
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_saddr_i8_offset_0x100000001:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    v_mov_b32_e32 v1, s17
+; GFX906-SDAG-NEXT:    v_add_co_u32_e64 v0, vcc, 0, s16
+; GFX906-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:1
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_saddr_i8_offset_0x100000001:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    v_mov_b32_e32 v1, s17
+; GFX908-SDAG-NEXT:    v_add_co_u32_e64 v0, vcc, 0, s16
+; GFX908-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:1
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_saddr_i8_offset_0x100000001:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    v_mov_b32_e32 v1, s17
+; GFX90A-SDAG-NEXT:    v_add_co_u32_e64 v0, vcc, 0, s16
+; GFX90A-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:1
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_0x100000001:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v1, s1
+; GFX9-4-GENERIC-SDAG-NEXT:    v_add_co_u32_e64 v0, vcc, 0, s0
+; GFX9-4-GENERIC-SDAG-NEXT:    s_nop 1
+; GFX9-4-GENERIC-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:1
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_saddr_i8_offset_0x100000001:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v1, s1
+; GFX942-SDAG-NEXT:    v_add_co_u32_e64 v0, vcc, 0, s0
+; GFX942-SDAG-NEXT:    s_nop 1
+; GFX942-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:1
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_saddr_i8_offset_0x100000001:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v1, s1
+; GFX950-SDAG-NEXT:    v_add_co_u32_e64 v0, vcc, 0, s0
+; GFX950-SDAG-NEXT:    s_nop 1
+; GFX950-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:1
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_0x100000001:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, s4, 0, s16
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, s4, 1, s17, s4
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:1
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_saddr_i8_offset_0x100000001:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    v_add_co_u32 v0, s4, 0, s16
+; GFX1012-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, s4, 1, s17, s4
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:1
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_0x100000001:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, s4, 0, s16
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 1, s17, s4
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:1
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_0x100000001:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, s0, 0, s0
+; GFX11-GENERIC-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 1, s1, s0
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:1
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_saddr_i8_offset_0x100000001:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    v_add_co_u32 v0, s0, 0, s0
+; GFX1250-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 1, s1, s0
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:1
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_0x100000001:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, s0, 0, s0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_alu depctr_va_sdst(0)
+; GFX12-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 1, s1, s0
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:1
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_0x100000001:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_add_u32 s4, s16, 1
+; GFX9-GENERIC-ISEL-NEXT:    s_addc_u32 s5, s17, 1
+; GFX9-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, s4
+; GFX9-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, s5
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_saddr_i8_offset_0x100000001:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    s_add_u32 s4, s16, 1
+; GFX906-ISEL-NEXT:    s_addc_u32 s5, s17, 1
+; GFX906-ISEL-NEXT:    v_mov_b32_e32 v0, s4
+; GFX906-ISEL-NEXT:    v_mov_b32_e32 v1, s5
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_saddr_i8_offset_0x100000001:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    s_add_u32 s4, s16, 1
+; GFX908-ISEL-NEXT:    s_addc_u32 s5, s17, 1
+; GFX908-ISEL-NEXT:    v_mov_b32_e32 v0, s4
+; GFX908-ISEL-NEXT:    v_mov_b32_e32 v1, s5
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_saddr_i8_offset_0x100000001:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    s_add_u32 s4, s16, 1
+; GFX90A-ISEL-NEXT:    s_addc_u32 s5, s17, 1
+; GFX90A-ISEL-NEXT:    v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1]
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_0x100000001:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_add_u32 s0, s0, 1
+; GFX9-4-GENERIC-ISEL-NEXT:    s_addc_u32 s1, s1, 1
+; GFX9-4-GENERIC-ISEL-NEXT:    v_mov_b64_e32 v[0:1], s[0:1]
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_saddr_i8_offset_0x100000001:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    s_add_u32 s0, s0, 1
+; GFX942-ISEL-NEXT:    s_addc_u32 s1, s1, 1
+; GFX942-ISEL-NEXT:    v_mov_b64_e32 v[0:1], s[0:1]
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_saddr_i8_offset_0x100000001:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    s_add_u32 s0, s0, 1
+; GFX950-ISEL-NEXT:    s_addc_u32 s1, s1, 1
+; GFX950-ISEL-NEXT:    v_mov_b64_e32 v[0:1], s[0:1]
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_0x100000001:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_add_u32 s4, s16, 1
+; GFX10-1-GENERIC-ISEL-NEXT:    s_addc_u32 s5, s17, 1
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, s4
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, s5
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_saddr_i8_offset_0x100000001:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    s_add_u32 s4, s16, 1
+; GFX1012-ISEL-NEXT:    s_addc_u32 s5, s17, 1
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v0, s4
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v1, s5
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_0x100000001:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_add_u32 s4, s16, 1
+; GFX10-3-GENERIC-ISEL-NEXT:    s_addc_u32 s5, s17, 1
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, s4
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, s5
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_0x100000001:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_add_u32 s0, s0, 1
+; GFX11-GENERIC-ISEL-NEXT:    s_addc_u32 s1, s1, 1
+; GFX11-GENERIC-ISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-GENERIC-ISEL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_saddr_i8_offset_0x100000001:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    s_add_co_u32 s0, s0, 1
+; GFX1250-ISEL-NEXT:    s_add_co_ci_u32 s1, s1, 1
+; GFX1250-ISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-ISEL-NEXT:    v_mov_b64_e32 v[0:1], s[0:1]
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_0x100000001:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_add_co_u32 s0, s0, 1
+; GFX12-GENERIC-ISEL-NEXT:    s_add_co_ci_u32 s1, s1, 1
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_alu depctr_sa_sdst(0)
+; GFX12-GENERIC-ISEL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 4294967297
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep0, metadata !0)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+define <4 x float> @global_load_saddr_i8_offset_0x100000FFF(ptr addrspace(1) inreg %sbase) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_0x100000FFF:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_add_u32 s4, s16, 0xfff
+; GFX9-GENERIC-SDAG-NEXT:    s_addc_u32 s5, s17, 1
+; GFX9-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[4:5]
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_saddr_i8_offset_0x100000FFF:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    s_add_u32 s4, s16, 0xfff
+; GFX906-SDAG-NEXT:    s_addc_u32 s5, s17, 1
+; GFX906-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[4:5]
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_saddr_i8_offset_0x100000FFF:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    s_add_u32 s4, s16, 0xfff
+; GFX908-SDAG-NEXT:    s_addc_u32 s5, s17, 1
+; GFX908-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[4:5]
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_saddr_i8_offset_0x100000FFF:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    s_add_u32 s4, s16, 0xfff
+; GFX90A-SDAG-NEXT:    s_addc_u32 s5, s17, 1
+; GFX90A-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[4:5]
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_0x100000FFF:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_add_u32 s0, s0, 0xfff
+; GFX9-4-GENERIC-SDAG-NEXT:    s_addc_u32 s1, s1, 1
+; GFX9-4-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] sc0
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_saddr_i8_offset_0x100000FFF:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    s_add_u32 s0, s0, 0xfff
+; GFX942-SDAG-NEXT:    s_addc_u32 s1, s1, 1
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] sc0
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_saddr_i8_offset_0x100000FFF:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    s_add_u32 s0, s0, 0xfff
+; GFX950-SDAG-NEXT:    s_addc_u32 s1, s1, 1
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] sc0
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_0x100000FFF:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, s4, 0x800, s16
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, s4, 1, s17, s4
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2047 glc
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_saddr_i8_offset_0x100000FFF:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    v_add_co_u32 v0, s4, 0x800, s16
+; GFX1012-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, s4, 1, s17, s4
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2047 glc
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_0x100000FFF:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, s4, 0x800, s16
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 1, s17, s4
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2047 glc
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_0x100000FFF:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, s0, 0, s0
+; GFX11-GENERIC-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 1, s1, s0
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:4095 glc
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_saddr_i8_offset_0x100000FFF:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    v_add_co_u32 v0, s0, 0, s0
+; GFX1250-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 1, s1, s0
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:4095
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_0x100000FFF:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, s0, 0, s0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_alu depctr_va_sdst(0)
+; GFX12-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 1, s1, s0
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:4095 scope:SCOPE_SE
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_0x100000FFF:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_add_u32 s4, s16, 0xfff
+; GFX9-GENERIC-ISEL-NEXT:    s_addc_u32 s5, s17, 1
+; GFX9-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[4:5]
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_saddr_i8_offset_0x100000FFF:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    s_add_u32 s4, s16, 0xfff
+; GFX906-ISEL-NEXT:    s_addc_u32 s5, s17, 1
+; GFX906-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[4:5]
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_saddr_i8_offset_0x100000FFF:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    s_add_u32 s4, s16, 0xfff
+; GFX908-ISEL-NEXT:    s_addc_u32 s5, s17, 1
+; GFX908-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[4:5]
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_saddr_i8_offset_0x100000FFF:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    s_add_u32 s4, s16, 0xfff
+; GFX90A-ISEL-NEXT:    s_addc_u32 s5, s17, 1
+; GFX90A-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[4:5]
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_0x100000FFF:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_add_u32 s0, s0, 0xfff
+; GFX9-4-GENERIC-ISEL-NEXT:    s_addc_u32 s1, s1, 1
+; GFX9-4-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] sc0
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_saddr_i8_offset_0x100000FFF:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    s_add_u32 s0, s0, 0xfff
+; GFX942-ISEL-NEXT:    s_addc_u32 s1, s1, 1
+; GFX942-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] sc0
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_saddr_i8_offset_0x100000FFF:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    s_add_u32 s0, s0, 0xfff
+; GFX950-ISEL-NEXT:    s_addc_u32 s1, s1, 1
+; GFX950-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] sc0
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_0x100000FFF:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_add_u32 s4, s16, 0xfff
+; GFX10-1-GENERIC-ISEL-NEXT:    s_addc_u32 s5, s17, 1
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, s4
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, s5
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_saddr_i8_offset_0x100000FFF:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    s_add_u32 s4, s16, 0xfff
+; GFX1012-ISEL-NEXT:    s_addc_u32 s5, s17, 1
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v0, s4
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v1, s5
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_0x100000FFF:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_add_u32 s4, s16, 0xfff
+; GFX10-3-GENERIC-ISEL-NEXT:    s_addc_u32 s5, s17, 1
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, s4
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, s5
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_0x100000FFF:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_add_u32 s0, s0, 0xfff
+; GFX11-GENERIC-ISEL-NEXT:    s_addc_u32 s1, s1, 1
+; GFX11-GENERIC-ISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-GENERIC-ISEL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_saddr_i8_offset_0x100000FFF:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    s_add_co_u32 s0, s0, 0xfff
+; GFX1250-ISEL-NEXT:    s_add_co_ci_u32 s1, s1, 1
+; GFX1250-ISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-ISEL-NEXT:    v_mov_b64_e32 v[0:1], s[0:1]
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_0x100000FFF:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_add_co_u32 s0, s0, 0xfff
+; GFX12-GENERIC-ISEL-NEXT:    s_add_co_ci_u32 s1, s1, 1
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_alu depctr_sa_sdst(0)
+; GFX12-GENERIC-ISEL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SE
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 4294971391
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep0, metadata !1)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+define <4 x float> @global_load_saddr_i8_offset_0x100001000(ptr addrspace(1) inreg %sbase) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_0x100001000:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_add_u32 s4, s16, 0x1000
+; GFX9-GENERIC-SDAG-NEXT:    s_addc_u32 s5, s17, 1
+; GFX9-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[4:5] glc
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_saddr_i8_offset_0x100001000:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    s_add_u32 s4, s16, 0x1000
+; GFX906-SDAG-NEXT:    s_addc_u32 s5, s17, 1
+; GFX906-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[4:5] glc
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_saddr_i8_offset_0x100001000:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    s_add_u32 s4, s16, 0x1000
+; GFX908-SDAG-NEXT:    s_addc_u32 s5, s17, 1
+; GFX908-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[4:5] glc
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_saddr_i8_offset_0x100001000:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    s_add_u32 s4, s16, 0x1000
+; GFX90A-SDAG-NEXT:    s_addc_u32 s5, s17, 1
+; GFX90A-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[4:5] glc
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_0x100001000:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_add_u32 s0, s0, 0x1000
+; GFX9-4-GENERIC-SDAG-NEXT:    s_addc_u32 s1, s1, 1
+; GFX9-4-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] sc1
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_saddr_i8_offset_0x100001000:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    s_add_u32 s0, s0, 0x1000
+; GFX942-SDAG-NEXT:    s_addc_u32 s1, s1, 1
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] sc1
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_saddr_i8_offset_0x100001000:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    s_add_u32 s0, s0, 0x1000
+; GFX950-SDAG-NEXT:    s_addc_u32 s1, s1, 1
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] sc1
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_0x100001000:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, s4, 0x1000, s16
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, s4, 1, s17, s4
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_saddr_i8_offset_0x100001000:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    v_add_co_u32 v0, s4, 0x1000, s16
+; GFX1012-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, s4, 1, s17, s4
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_0x100001000:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, s4, 0x1000, s16
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 1, s17, s4
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_0x100001000:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, s0, 0x1000, s0
+; GFX11-GENERIC-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 1, s1, s0
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_saddr_i8_offset_0x100001000:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    v_add_co_u32 v0, s0, 0, s0
+; GFX1250-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 1, s1, s0
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:4096 scope:SCOPE_DEV
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_0x100001000:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, s0, 0, s0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_alu depctr_va_sdst(0)
+; GFX12-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 1, s1, s0
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:4096 scope:SCOPE_DEV
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_0x100001000:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_add_u32 s4, s16, 0x1000
+; GFX9-GENERIC-ISEL-NEXT:    s_addc_u32 s5, s17, 1
+; GFX9-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[4:5] glc
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_saddr_i8_offset_0x100001000:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    s_add_u32 s4, s16, 0x1000
+; GFX906-ISEL-NEXT:    s_addc_u32 s5, s17, 1
+; GFX906-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[4:5] glc
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_saddr_i8_offset_0x100001000:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    s_add_u32 s4, s16, 0x1000
+; GFX908-ISEL-NEXT:    s_addc_u32 s5, s17, 1
+; GFX908-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[4:5] glc
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_saddr_i8_offset_0x100001000:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    s_add_u32 s4, s16, 0x1000
+; GFX90A-ISEL-NEXT:    s_addc_u32 s5, s17, 1
+; GFX90A-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[4:5] glc
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_0x100001000:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_add_u32 s0, s0, 0x1000
+; GFX9-4-GENERIC-ISEL-NEXT:    s_addc_u32 s1, s1, 1
+; GFX9-4-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] sc1
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_saddr_i8_offset_0x100001000:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    s_add_u32 s0, s0, 0x1000
+; GFX942-ISEL-NEXT:    s_addc_u32 s1, s1, 1
+; GFX942-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] sc1
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_saddr_i8_offset_0x100001000:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    s_add_u32 s0, s0, 0x1000
+; GFX950-ISEL-NEXT:    s_addc_u32 s1, s1, 1
+; GFX950-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] sc1
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_0x100001000:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_add_u32 s4, s16, 0x1000
+; GFX10-1-GENERIC-ISEL-NEXT:    s_addc_u32 s5, s17, 1
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, s4
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, s5
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_saddr_i8_offset_0x100001000:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    s_add_u32 s4, s16, 0x1000
+; GFX1012-ISEL-NEXT:    s_addc_u32 s5, s17, 1
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v0, s4
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v1, s5
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_0x100001000:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_add_u32 s4, s16, 0x1000
+; GFX10-3-GENERIC-ISEL-NEXT:    s_addc_u32 s5, s17, 1
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, s4
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, s5
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_0x100001000:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_add_u32 s0, s0, 0x1000
+; GFX11-GENERIC-ISEL-NEXT:    s_addc_u32 s1, s1, 1
+; GFX11-GENERIC-ISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-GENERIC-ISEL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_saddr_i8_offset_0x100001000:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    s_add_co_u32 s0, s0, 0x1000
+; GFX1250-ISEL-NEXT:    s_add_co_ci_u32 s1, s1, 1
+; GFX1250-ISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-ISEL-NEXT:    v_mov_b64_e32 v[0:1], s[0:1]
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off scope:SCOPE_DEV
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_0x100001000:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_add_co_u32 s0, s0, 0x1000
+; GFX12-GENERIC-ISEL-NEXT:    s_add_co_ci_u32 s1, s1, 1
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_alu depctr_sa_sdst(0)
+; GFX12-GENERIC-ISEL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off scope:SCOPE_DEV
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 4294971392
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep0, metadata !2)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+define <4 x float> @global_load_saddr_i8_offset_neg0xFFFFFFFF(ptr addrspace(1) inreg %sbase) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg0xFFFFFFFF:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, s16
+; GFX9-GENERIC-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX9-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v1, s17
+; GFX9-GENERIC-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-4095 glc
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_saddr_i8_offset_neg0xFFFFFFFF:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    v_mov_b32_e32 v0, s16
+; GFX906-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX906-SDAG-NEXT:    v_mov_b32_e32 v1, s17
+; GFX906-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-4095 glc
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_saddr_i8_offset_neg0xFFFFFFFF:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    v_mov_b32_e32 v0, s16
+; GFX908-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX908-SDAG-NEXT:    v_mov_b32_e32 v1, s17
+; GFX908-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-4095 glc
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_saddr_i8_offset_neg0xFFFFFFFF:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    v_mov_b32_e32 v0, s16
+; GFX90A-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX90A-SDAG-NEXT:    v_mov_b32_e32 v1, s17
+; GFX90A-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-4095 glc
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg0xFFFFFFFF:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, s0
+; GFX9-4-GENERIC-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX9-4-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v1, s1
+; GFX9-4-GENERIC-SDAG-NEXT:    s_nop 0
+; GFX9-4-GENERIC-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-4095 sc0 sc1
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_saddr_i8_offset_neg0xFFFFFFFF:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v0, s0
+; GFX942-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v1, s1
+; GFX942-SDAG-NEXT:    s_nop 0
+; GFX942-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-4095 sc0 sc1
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_saddr_i8_offset_neg0xFFFFFFFF:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v0, s0
+; GFX950-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v1, s1
+; GFX950-SDAG-NEXT:    s_nop 0
+; GFX950-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-4095 sc0 sc1
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg0xFFFFFFFF:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, s4, 0x800, s16
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, s4, -1, s17, s4
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2047 glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_saddr_i8_offset_neg0xFFFFFFFF:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    v_add_co_u32 v0, s4, 0x800, s16
+; GFX1012-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, s4, -1, s17, s4
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2047 glc dlc
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg0xFFFFFFFF:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, s4, 0x800, s16
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, s17, s4
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-2047 glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg0xFFFFFFFF:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, s0, 0x1000, s0
+; GFX11-GENERIC-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, s1, s0
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:-4095 glc
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_saddr_i8_offset_neg0xFFFFFFFF:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    v_add_co_u32 v0, s0, 0x800000, s0
+; GFX1250-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, s1, s0
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:-8388607 scope:SCOPE_SYS
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg0xFFFFFFFF:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, s0, 0x800000, s0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_alu depctr_va_sdst(0)
+; GFX12-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, s1, s0
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:-8388607 scope:SCOPE_SYS
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg0xFFFFFFFF:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_add_u32 s4, s16, 1
+; GFX9-GENERIC-ISEL-NEXT:    s_addc_u32 s5, s17, -1
+; GFX9-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, s4
+; GFX9-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, s5
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_saddr_i8_offset_neg0xFFFFFFFF:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    s_add_u32 s4, s16, 1
+; GFX906-ISEL-NEXT:    s_addc_u32 s5, s17, -1
+; GFX906-ISEL-NEXT:    v_mov_b32_e32 v0, s4
+; GFX906-ISEL-NEXT:    v_mov_b32_e32 v1, s5
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_saddr_i8_offset_neg0xFFFFFFFF:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    s_add_u32 s4, s16, 1
+; GFX908-ISEL-NEXT:    s_addc_u32 s5, s17, -1
+; GFX908-ISEL-NEXT:    v_mov_b32_e32 v0, s4
+; GFX908-ISEL-NEXT:    v_mov_b32_e32 v1, s5
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_saddr_i8_offset_neg0xFFFFFFFF:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    s_add_u32 s4, s16, 1
+; GFX90A-ISEL-NEXT:    s_addc_u32 s5, s17, -1
+; GFX90A-ISEL-NEXT:    v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1]
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg0xFFFFFFFF:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_add_u32 s0, s0, 1
+; GFX9-4-GENERIC-ISEL-NEXT:    s_addc_u32 s1, s1, -1
+; GFX9-4-GENERIC-ISEL-NEXT:    v_mov_b64_e32 v[0:1], s[0:1]
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_saddr_i8_offset_neg0xFFFFFFFF:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    s_add_u32 s0, s0, 1
+; GFX942-ISEL-NEXT:    s_addc_u32 s1, s1, -1
+; GFX942-ISEL-NEXT:    v_mov_b64_e32 v[0:1], s[0:1]
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_saddr_i8_offset_neg0xFFFFFFFF:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    s_add_u32 s0, s0, 1
+; GFX950-ISEL-NEXT:    s_addc_u32 s1, s1, -1
+; GFX950-ISEL-NEXT:    v_mov_b64_e32 v[0:1], s[0:1]
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg0xFFFFFFFF:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_add_u32 s4, s16, 1
+; GFX10-1-GENERIC-ISEL-NEXT:    s_addc_u32 s5, s17, -1
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, s4
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, s5
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_saddr_i8_offset_neg0xFFFFFFFF:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    s_add_u32 s4, s16, 1
+; GFX1012-ISEL-NEXT:    s_addc_u32 s5, s17, -1
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v0, s4
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v1, s5
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg0xFFFFFFFF:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_add_u32 s4, s16, 1
+; GFX10-3-GENERIC-ISEL-NEXT:    s_addc_u32 s5, s17, -1
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, s4
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, s5
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg0xFFFFFFFF:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_add_u32 s0, s0, 1
+; GFX11-GENERIC-ISEL-NEXT:    s_addc_u32 s1, s1, -1
+; GFX11-GENERIC-ISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-GENERIC-ISEL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_saddr_i8_offset_neg0xFFFFFFFF:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    s_add_co_u32 s0, s0, 1
+; GFX1250-ISEL-NEXT:    s_add_co_ci_u32 s1, s1, -1
+; GFX1250-ISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-ISEL-NEXT:    v_mov_b64_e32 v[0:1], s[0:1]
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg0xFFFFFFFF:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_add_co_u32 s0, s0, 1
+; GFX12-GENERIC-ISEL-NEXT:    s_add_co_ci_u32 s1, s1, -1
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_alu depctr_sa_sdst(0)
+; GFX12-GENERIC-ISEL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 -4294967295
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep0, metadata !3)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+define <4 x float> @global_load_saddr_i8_offset_neg0x100000000(ptr addrspace(1) inreg %sbase) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg0x100000000:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_add_i32 s17, s17, -1
+; GFX9-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_saddr_i8_offset_neg0x100000000:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    s_add_i32 s17, s17, -1
+; GFX906-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_saddr_i8_offset_neg0x100000000:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    s_add_i32 s17, s17, -1
+; GFX908-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_saddr_i8_offset_neg0x100000000:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    s_add_i32 s17, s17, -1
+; GFX90A-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg0x100000000:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_add_i32 s1, s1, -1
+; GFX9-4-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1]
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_saddr_i8_offset_neg0x100000000:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    s_add_i32 s1, s1, -1
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1]
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_saddr_i8_offset_neg0x100000000:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    s_add_i32 s1, s1, -1
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1]
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg0x100000000:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX10-1-GENERIC-SDAG-NEXT:    s_add_i32 s17, s17, -1
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_saddr_i8_offset_neg0x100000000:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX1012-SDAG-NEXT:    s_add_i32 s17, s17, -1
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg0x100000000:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX10-3-GENERIC-SDAG-NEXT:    s_add_i32 s17, s17, -1
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg0x100000000:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX11-GENERIC-SDAG-NEXT:    s_add_i32 s1, s1, -1
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1]
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_saddr_i8_offset_neg0x100000000:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX1250-SDAG-NEXT:    s_add_co_i32 s1, s1, -1
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1]
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg0x100000000:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX12-GENERIC-SDAG-NEXT:    s_add_co_i32 s1, s1, -1
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1]
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_alu depctr_sa_sdst(0)
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg0x100000000:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_add_u32 s4, s16, 0
+; GFX9-GENERIC-ISEL-NEXT:    s_addc_u32 s5, s17, -1
+; GFX9-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, s4
+; GFX9-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, s5
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_saddr_i8_offset_neg0x100000000:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    s_add_u32 s4, s16, 0
+; GFX906-ISEL-NEXT:    s_addc_u32 s5, s17, -1
+; GFX906-ISEL-NEXT:    v_mov_b32_e32 v0, s4
+; GFX906-ISEL-NEXT:    v_mov_b32_e32 v1, s5
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_saddr_i8_offset_neg0x100000000:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    s_add_u32 s4, s16, 0
+; GFX908-ISEL-NEXT:    s_addc_u32 s5, s17, -1
+; GFX908-ISEL-NEXT:    v_mov_b32_e32 v0, s4
+; GFX908-ISEL-NEXT:    v_mov_b32_e32 v1, s5
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_saddr_i8_offset_neg0x100000000:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    s_add_u32 s4, s16, 0
+; GFX90A-ISEL-NEXT:    s_addc_u32 s5, s17, -1
+; GFX90A-ISEL-NEXT:    v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1]
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg0x100000000:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_add_u32 s0, s0, 0
+; GFX9-4-GENERIC-ISEL-NEXT:    s_addc_u32 s1, s1, -1
+; GFX9-4-GENERIC-ISEL-NEXT:    v_mov_b64_e32 v[0:1], s[0:1]
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_saddr_i8_offset_neg0x100000000:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    s_add_u32 s0, s0, 0
+; GFX942-ISEL-NEXT:    s_addc_u32 s1, s1, -1
+; GFX942-ISEL-NEXT:    v_mov_b64_e32 v[0:1], s[0:1]
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_saddr_i8_offset_neg0x100000000:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    s_add_u32 s0, s0, 0
+; GFX950-ISEL-NEXT:    s_addc_u32 s1, s1, -1
+; GFX950-ISEL-NEXT:    v_mov_b64_e32 v[0:1], s[0:1]
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg0x100000000:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_add_u32 s4, s16, 0
+; GFX10-1-GENERIC-ISEL-NEXT:    s_addc_u32 s5, s17, -1
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, s4
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, s5
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_saddr_i8_offset_neg0x100000000:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    s_add_u32 s4, s16, 0
+; GFX1012-ISEL-NEXT:    s_addc_u32 s5, s17, -1
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v0, s4
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v1, s5
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg0x100000000:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_add_u32 s4, s16, 0
+; GFX10-3-GENERIC-ISEL-NEXT:    s_addc_u32 s5, s17, -1
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, s4
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, s5
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg0x100000000:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_add_u32 s0, s0, 0
+; GFX11-GENERIC-ISEL-NEXT:    s_addc_u32 s1, s1, -1
+; GFX11-GENERIC-ISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-GENERIC-ISEL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_saddr_i8_offset_neg0x100000000:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    s_add_co_u32 s0, s0, 0
+; GFX1250-ISEL-NEXT:    s_add_co_ci_u32 s1, s1, -1
+; GFX1250-ISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-ISEL-NEXT:    v_mov_b64_e32 v[0:1], s[0:1]
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg0x100000000:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_add_co_u32 s0, s0, 0
+; GFX12-GENERIC-ISEL-NEXT:    s_add_co_ci_u32 s1, s1, -1
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_alu depctr_sa_sdst(0)
+; GFX12-GENERIC-ISEL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 -4294967296
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep0, metadata !0)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+define <4 x float> @global_load_saddr_i8_offset_neg0x100000001(ptr addrspace(1) inreg %sbase) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg0x100000001:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v1, s17
+; GFX9-GENERIC-SDAG-NEXT:    v_add_co_u32_e64 v0, vcc, 0, s16
+; GFX9-GENERIC-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-1
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_saddr_i8_offset_neg0x100000001:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    v_mov_b32_e32 v1, s17
+; GFX906-SDAG-NEXT:    v_add_co_u32_e64 v0, vcc, 0, s16
+; GFX906-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-1
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_saddr_i8_offset_neg0x100000001:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    v_mov_b32_e32 v1, s17
+; GFX908-SDAG-NEXT:    v_add_co_u32_e64 v0, vcc, 0, s16
+; GFX908-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-1
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_saddr_i8_offset_neg0x100000001:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    v_mov_b32_e32 v1, s17
+; GFX90A-SDAG-NEXT:    v_add_co_u32_e64 v0, vcc, 0, s16
+; GFX90A-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-1
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg0x100000001:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v1, s1
+; GFX9-4-GENERIC-SDAG-NEXT:    v_add_co_u32_e64 v0, vcc, 0, s0
+; GFX9-4-GENERIC-SDAG-NEXT:    s_nop 1
+; GFX9-4-GENERIC-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-1 sc0
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_saddr_i8_offset_neg0x100000001:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v1, s1
+; GFX942-SDAG-NEXT:    v_add_co_u32_e64 v0, vcc, 0, s0
+; GFX942-SDAG-NEXT:    s_nop 1
+; GFX942-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-1 sc0
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_saddr_i8_offset_neg0x100000001:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v1, s1
+; GFX950-SDAG-NEXT:    v_add_co_u32_e64 v0, vcc, 0, s0
+; GFX950-SDAG-NEXT:    s_nop 1
+; GFX950-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-1 sc0
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg0x100000001:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, s4, 0, s16
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, s4, -1, s17, s4
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_saddr_i8_offset_neg0x100000001:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    v_add_co_u32 v0, s4, 0, s16
+; GFX1012-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, s4, -1, s17, s4
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg0x100000001:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, s4, 0, s16
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, s17, s4
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg0x100000001:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, s0, 0, s0
+; GFX11-GENERIC-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, s1, s0
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:-1 glc
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_saddr_i8_offset_neg0x100000001:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    v_add_co_u32 v0, s0, 0, s0
+; GFX1250-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, s1, s0
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:-1
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg0x100000001:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, s0, 0, s0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_alu depctr_va_sdst(0)
+; GFX12-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, s1, s0
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:-1 scope:SCOPE_SE
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg0x100000001:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_add_u32 s4, s16, -1
+; GFX9-GENERIC-ISEL-NEXT:    s_addc_u32 s5, s17, -2
+; GFX9-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, s4
+; GFX9-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, s5
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_saddr_i8_offset_neg0x100000001:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    s_add_u32 s4, s16, -1
+; GFX906-ISEL-NEXT:    s_addc_u32 s5, s17, -2
+; GFX906-ISEL-NEXT:    v_mov_b32_e32 v0, s4
+; GFX906-ISEL-NEXT:    v_mov_b32_e32 v1, s5
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_saddr_i8_offset_neg0x100000001:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    s_add_u32 s4, s16, -1
+; GFX908-ISEL-NEXT:    s_addc_u32 s5, s17, -2
+; GFX908-ISEL-NEXT:    v_mov_b32_e32 v0, s4
+; GFX908-ISEL-NEXT:    v_mov_b32_e32 v1, s5
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_saddr_i8_offset_neg0x100000001:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    s_add_u32 s4, s16, -1
+; GFX90A-ISEL-NEXT:    s_addc_u32 s5, s17, -2
+; GFX90A-ISEL-NEXT:    v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1]
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg0x100000001:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_add_u32 s0, s0, -1
+; GFX9-4-GENERIC-ISEL-NEXT:    s_addc_u32 s1, s1, -2
+; GFX9-4-GENERIC-ISEL-NEXT:    v_mov_b64_e32 v[0:1], s[0:1]
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_saddr_i8_offset_neg0x100000001:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    s_add_u32 s0, s0, -1
+; GFX942-ISEL-NEXT:    s_addc_u32 s1, s1, -2
+; GFX942-ISEL-NEXT:    v_mov_b64_e32 v[0:1], s[0:1]
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_saddr_i8_offset_neg0x100000001:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    s_add_u32 s0, s0, -1
+; GFX950-ISEL-NEXT:    s_addc_u32 s1, s1, -2
+; GFX950-ISEL-NEXT:    v_mov_b64_e32 v[0:1], s[0:1]
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg0x100000001:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_add_u32 s4, s16, -1
+; GFX10-1-GENERIC-ISEL-NEXT:    s_addc_u32 s5, s17, -2
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, s4
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, s5
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_saddr_i8_offset_neg0x100000001:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    s_add_u32 s4, s16, -1
+; GFX1012-ISEL-NEXT:    s_addc_u32 s5, s17, -2
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v0, s4
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v1, s5
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg0x100000001:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_add_u32 s4, s16, -1
+; GFX10-3-GENERIC-ISEL-NEXT:    s_addc_u32 s5, s17, -2
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, s4
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, s5
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg0x100000001:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_add_u32 s0, s0, -1
+; GFX11-GENERIC-ISEL-NEXT:    s_addc_u32 s1, s1, -2
+; GFX11-GENERIC-ISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-GENERIC-ISEL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_saddr_i8_offset_neg0x100000001:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    s_add_co_u32 s0, s0, -1
+; GFX1250-ISEL-NEXT:    s_add_co_ci_u32 s1, s1, -2
+; GFX1250-ISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-ISEL-NEXT:    v_mov_b64_e32 v[0:1], s[0:1]
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg0x100000001:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_add_co_u32 s0, s0, -1
+; GFX12-GENERIC-ISEL-NEXT:    s_add_co_ci_u32 s1, s1, -2
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_alu depctr_sa_sdst(0)
+; GFX12-GENERIC-ISEL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SE
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 -4294967297
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep0, metadata !1)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+;;------------------------------------------------------------------------------
+;; Basic addressing patterns
+;;------------------------------------------------------------------------------
+
+;; Basic pattern, no immediate offset.
+define <4 x float> @global_load_saddr_i8_zext_vgpr(ptr addrspace(1) inreg %sbase, i32 %voffset) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_saddr_i8_zext_vgpr:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_saddr_i8_zext_vgpr:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_saddr_i8_zext_vgpr:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] sc1
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_saddr_i8_zext_vgpr:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] sc1
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_saddr_i8_zext_vgpr:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] sc1
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_saddr_i8_zext_vgpr:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] glc
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_saddr_i8_zext_vgpr:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_DEV
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_DEV
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_saddr_i8_zext_vgpr:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_saddr_i8_zext_vgpr:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_saddr_i8_zext_vgpr:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] sc1
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_saddr_i8_zext_vgpr:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] sc1
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_saddr_i8_zext_vgpr:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] sc1
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_saddr_i8_zext_vgpr:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] glc
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_saddr_i8_zext_vgpr:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_DEV
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_DEV
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %zext.offset = zext i32 %voffset to i64
+  %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep0, metadata !2)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+;; Maximum positive offset on gfx9
+define <4 x float> @global_load_saddr_i8_zext_vgpr_offset_4095(ptr addrspace(1) inreg %sbase, i32 %voffset) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:4095 glc
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:4095 glc
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:4095 glc
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:4095 glc
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:4095 sc0 sc1
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:4095 sc0 sc1
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:4095 sc0 sc1
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, s4, s16, v0
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, s4, s17, 0, s4
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2047 glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    v_add_co_u32 v0, s4, s16, v0
+; GFX1012-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, s4, s17, 0, s4
+; GFX1012-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX1012-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2047 glc dlc
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, s4, s16, v0
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, s17, 0, s4
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2047 glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:4095 glc
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:4095 scope:SCOPE_SYS
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:4095 scope:SCOPE_SYS
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:4095 glc
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:4095 glc
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:4095 glc
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:4095 glc
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:4095 sc0 sc1
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:4095 sc0 sc1
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:4095 sc0 sc1
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, s16
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v2, s17
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v1, v0
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v2, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfff, v0
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v1, s16
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v2, s17
+; GFX1012-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v1, v0
+; GFX1012-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v2, vcc_lo
+; GFX1012-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfff, v0
+; GFX1012-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, s16
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v2, s17
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v1, v0
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v2, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfff, v0
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:4095 glc
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:4095 scope:SCOPE_SYS
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:4095 scope:SCOPE_SYS
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %zext.offset = zext i32 %voffset to i64
+  %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
+  %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 4095
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep1, metadata !3)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+;; Maximum positive offset on gfx9 + 1
+define <4 x float> @global_load_saddr_i8_zext_vgpr_offset_4096(ptr addrspace(1) inreg %sbase, i32 %voffset) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4096:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v1, s17
+; GFX9-GENERIC-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, s16, v0
+; GFX9-GENERIC-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX9-GENERIC-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4096:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    v_mov_b32_e32 v1, s17
+; GFX906-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, s16, v0
+; GFX906-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX906-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4096:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    v_mov_b32_e32 v1, s17
+; GFX908-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, s16, v0
+; GFX908-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX908-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4096:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    v_mov_b32_e32 v1, s17
+; GFX90A-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, s16, v0
+; GFX90A-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX90A-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4096:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v1, 0
+; GFX9-4-GENERIC-SDAG-NEXT:    v_lshl_add_u64 v[0:1], s[0:1], 0, v[0:1]
+; GFX9-4-GENERIC-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX9-4-GENERIC-SDAG-NEXT:    s_nop 1
+; GFX9-4-GENERIC-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4096:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v1, 0
+; GFX942-SDAG-NEXT:    v_lshl_add_u64 v[0:1], s[0:1], 0, v[0:1]
+; GFX942-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX942-SDAG-NEXT:    s_nop 1
+; GFX942-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4096:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v1, 0
+; GFX950-SDAG-NEXT:    v_lshl_add_u64 v[0:1], s[0:1], 0, v[0:1]
+; GFX950-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX950-SDAG-NEXT:    s_nop 1
+; GFX950-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4096:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, s4, s16, v0
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, s4, s17, 0, s4
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1000, v0
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4096:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    v_add_co_u32 v0, s4, s16, v0
+; GFX1012-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, s4, s17, 0, s4
+; GFX1012-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1000, v0
+; GFX1012-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4096:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, s4, s16, v0
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, s17, 0, s4
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1000, v0
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4096:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, s0, s0, v0
+; GFX11-GENERIC-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, s1, 0, s0
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1000, v0
+; GFX11-GENERIC-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4096:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:4096
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4096:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:4096
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4096:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, s16
+; GFX9-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v2, s17
+; GFX9-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v1, v0
+; GFX9-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v2, vcc
+; GFX9-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX9-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4096:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    v_mov_b32_e32 v1, s16
+; GFX906-ISEL-NEXT:    v_mov_b32_e32 v2, s17
+; GFX906-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v1, v0
+; GFX906-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v2, vcc
+; GFX906-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX906-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4096:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    v_mov_b32_e32 v1, s16
+; GFX908-ISEL-NEXT:    v_mov_b32_e32 v2, s17
+; GFX908-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v1, v0
+; GFX908-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v2, vcc
+; GFX908-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX908-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4096:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    v_pk_mov_b32 v[2:3], s[16:17], s[16:17] op_sel:[0,1]
+; GFX90A-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v2, v0
+; GFX90A-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v3, vcc
+; GFX90A-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX90A-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4096:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    v_mov_b64_e32 v[2:3], s[0:1]
+; GFX9-4-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v2, v0
+; GFX9-4-GENERIC-ISEL-NEXT:    s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v3, vcc
+; GFX9-4-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX9-4-GENERIC-ISEL-NEXT:    s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4096:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    v_mov_b64_e32 v[2:3], s[0:1]
+; GFX942-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v2, v0
+; GFX942-ISEL-NEXT:    s_nop 1
+; GFX942-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v3, vcc
+; GFX942-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX942-ISEL-NEXT:    s_nop 1
+; GFX942-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4096:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    v_mov_b64_e32 v[2:3], s[0:1]
+; GFX950-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v2, v0
+; GFX950-ISEL-NEXT:    s_nop 1
+; GFX950-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v3, vcc
+; GFX950-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX950-ISEL-NEXT:    s_nop 1
+; GFX950-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4096:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, s16
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v2, s17
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v1, v0
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v2, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1000, v0
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4096:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v1, s16
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v2, s17
+; GFX1012-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v1, v0
+; GFX1012-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v2, vcc_lo
+; GFX1012-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1000, v0
+; GFX1012-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4096:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, s16
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v2, s17
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v1, v0
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v2, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1000, v0
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4096:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v1, s0
+; GFX11-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v1, v0
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v2, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1000, v0
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4096:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:4096
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4096:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:4096
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %zext.offset = zext i32 %voffset to i64
+  %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
+  %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 4096
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep1, metadata !0)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+;; Maximum negative offset on gfx9
+define <4 x float> @global_load_saddr_i8_zext_vgpr_offset_neg4096(ptr addrspace(1) inreg %sbase, i32 %voffset) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4096:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:-4096
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4096:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:-4096
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4096:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:-4096
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4096:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:-4096
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4096:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:-4096 sc0
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4096:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:-4096 sc0
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4096:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:-4096 sc0
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4096:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, s4, s16, v0
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, s4, s17, 0, s4
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4096:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    v_add_co_u32 v0, s4, s16, v0
+; GFX1012-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, s4, s17, 0, s4
+; GFX1012-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
+; GFX1012-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4096:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, s4, s16, v0
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, s17, 0, s4
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4096:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:-4096 glc
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4096:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:-4096
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4096:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:-4096 scope:SCOPE_SE
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4096:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:-4096
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4096:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:-4096
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4096:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:-4096
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4096:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:-4096
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4096:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:-4096 sc0
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4096:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:-4096 sc0
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4096:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:-4096 sc0
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4096:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, s16
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v2, s17
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v1, v0
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v2, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4096:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v1, s16
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v2, s17
+; GFX1012-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v1, v0
+; GFX1012-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v2, vcc_lo
+; GFX1012-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
+; GFX1012-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4096:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, s16
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v2, s17
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v1, v0
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v2, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4096:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:-4096 glc
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4096:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:-4096
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4096:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:-4096 scope:SCOPE_SE
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %zext.offset = zext i32 %voffset to i64
+  %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
+  %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -4096
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep1, metadata !1)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+;; Maximum negative offset on gfx9 - 1
+define <4 x float> @global_load_saddr_i8_zext_vgpr_offset_neg4097(ptr addrspace(1) inreg %sbase, i32 %voffset) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4097:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v1, s17
+; GFX9-GENERIC-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, s16, v0
+; GFX9-GENERIC-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
+; GFX9-GENERIC-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4097:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    v_mov_b32_e32 v1, s17
+; GFX906-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, s16, v0
+; GFX906-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
+; GFX906-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4097:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    v_mov_b32_e32 v1, s17
+; GFX908-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, s16, v0
+; GFX908-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
+; GFX908-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4097:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    v_mov_b32_e32 v1, s17
+; GFX90A-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, s16, v0
+; GFX90A-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
+; GFX90A-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4097:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v1, 0
+; GFX9-4-GENERIC-SDAG-NEXT:    v_lshl_add_u64 v[0:1], s[0:1], 0, v[0:1]
+; GFX9-4-GENERIC-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
+; GFX9-4-GENERIC-SDAG-NEXT:    s_nop 1
+; GFX9-4-GENERIC-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-1 sc1
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4097:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v1, 0
+; GFX942-SDAG-NEXT:    v_lshl_add_u64 v[0:1], s[0:1], 0, v[0:1]
+; GFX942-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
+; GFX942-SDAG-NEXT:    s_nop 1
+; GFX942-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-1 sc1
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4097:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v1, 0
+; GFX950-SDAG-NEXT:    v_lshl_add_u64 v[0:1], s[0:1], 0, v[0:1]
+; GFX950-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
+; GFX950-SDAG-NEXT:    s_nop 1
+; GFX950-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-1 sc1
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4097:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, s4, s16, v0
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, s4, s17, 0, s4
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4097:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    v_add_co_u32 v0, s4, s16, v0
+; GFX1012-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, s4, s17, 0, s4
+; GFX1012-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
+; GFX1012-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc dlc
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4097:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, s4, s16, v0
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, s17, 0, s4
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4097:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, s0, s0, v0
+; GFX11-GENERIC-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, s1, 0, s0
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
+; GFX11-GENERIC-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:-1 glc
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4097:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:-4097 scope:SCOPE_DEV
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4097:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:-4097 scope:SCOPE_DEV
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4097:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, s16
+; GFX9-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v2, s17
+; GFX9-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v1, v0
+; GFX9-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v2, vcc
+; GFX9-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0xffffefff, v0
+; GFX9-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4097:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    v_mov_b32_e32 v1, s16
+; GFX906-ISEL-NEXT:    v_mov_b32_e32 v2, s17
+; GFX906-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v1, v0
+; GFX906-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v2, vcc
+; GFX906-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0xffffefff, v0
+; GFX906-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4097:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    v_mov_b32_e32 v1, s16
+; GFX908-ISEL-NEXT:    v_mov_b32_e32 v2, s17
+; GFX908-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v1, v0
+; GFX908-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v2, vcc
+; GFX908-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0xffffefff, v0
+; GFX908-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4097:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    v_pk_mov_b32 v[2:3], s[16:17], s[16:17] op_sel:[0,1]
+; GFX90A-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v2, v0
+; GFX90A-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v3, vcc
+; GFX90A-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0xffffefff, v0
+; GFX90A-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4097:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    v_mov_b64_e32 v[2:3], s[0:1]
+; GFX9-4-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v2, v0
+; GFX9-4-GENERIC-ISEL-NEXT:    s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v3, vcc
+; GFX9-4-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0xffffefff, v0
+; GFX9-4-GENERIC-ISEL-NEXT:    s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc1
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4097:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    v_mov_b64_e32 v[2:3], s[0:1]
+; GFX942-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v2, v0
+; GFX942-ISEL-NEXT:    s_nop 1
+; GFX942-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v3, vcc
+; GFX942-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0xffffefff, v0
+; GFX942-ISEL-NEXT:    s_nop 1
+; GFX942-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc1
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4097:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    v_mov_b64_e32 v[2:3], s[0:1]
+; GFX950-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v2, v0
+; GFX950-ISEL-NEXT:    s_nop 1
+; GFX950-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v3, vcc
+; GFX950-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0xffffefff, v0
+; GFX950-ISEL-NEXT:    s_nop 1
+; GFX950-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc1
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4097:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, s16
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v2, s17
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v1, v0
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v2, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0xffffefff, v0
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4097:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v1, s16
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v2, s17
+; GFX1012-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v1, v0
+; GFX1012-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v2, vcc_lo
+; GFX1012-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0xffffefff, v0
+; GFX1012-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4097:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, s16
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v2, s17
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v1, v0
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v2, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0xffffefff, v0
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4097:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v1, s0
+; GFX11-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v1, v0
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v2, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0xffffefff, v0
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4097:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:-4097 scope:SCOPE_DEV
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4097:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:-4097 scope:SCOPE_DEV
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %zext.offset = zext i32 %voffset to i64
+  %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
+  %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -4097
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep1, metadata !2)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+;; Maximum positive offset on gfx10
+define <4 x float> @global_load_saddr_i8_zext_vgpr_offset_2047(ptr addrspace(1) inreg %sbase, i32 %voffset) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_2047:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:2047 glc
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_2047:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:2047 glc
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_2047:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:2047 glc
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_2047:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:2047 glc
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_2047:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:2047 sc0 sc1
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_2047:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:2047 sc0 sc1
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_2047:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:2047 sc0 sc1
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_2047:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:2047 glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_2047:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:2047 glc dlc
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_2047:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:2047 glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_2047:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:2047 glc
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_2047:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:2047 scope:SCOPE_SYS
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_2047:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:2047 scope:SCOPE_SYS
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_2047:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:2047 glc
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_2047:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:2047 glc
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_2047:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:2047 glc
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_2047:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:2047 glc
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_2047:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:2047 sc0 sc1
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_2047:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:2047 sc0 sc1
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_2047:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:2047 sc0 sc1
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_2047:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:2047 glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_2047:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:2047 glc dlc
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_2047:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:2047 glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_2047:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:2047 glc
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_2047:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:2047 scope:SCOPE_SYS
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_2047:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:2047 scope:SCOPE_SYS
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %zext.offset = zext i32 %voffset to i64
+  %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
+  %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 2047
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep1, metadata !3)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+;; Maximum positive offset on gfx10 + 1
+define <4 x float> @global_load_saddr_i8_zext_vgpr_offset_2048(ptr addrspace(1) inreg %sbase, i32 %voffset) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_2048:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:2048
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_2048:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:2048
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_2048:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:2048
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_2048:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:2048
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_2048:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:2048
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_2048:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:2048
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_2048:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:2048
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_2048:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, s4, s16, v0
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, s4, s17, 0, s4
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_2048:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    v_add_co_u32 v0, s4, s16, v0
+; GFX1012-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, s4, s17, 0, s4
+; GFX1012-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX1012-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_2048:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, s4, s16, v0
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, s17, 0, s4
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_2048:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:2048
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_2048:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:2048
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_2048:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:2048
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_2048:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:2048
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_2048:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:2048
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_2048:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:2048
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_2048:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:2048
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_2048:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:2048
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_2048:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:2048
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_2048:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:2048
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_2048:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, s16
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v2, s17
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v1, v0
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v2, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_2048:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v1, s16
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v2, s17
+; GFX1012-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v1, v0
+; GFX1012-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v2, vcc_lo
+; GFX1012-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX1012-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_2048:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, s16
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v2, s17
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v1, v0
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v2, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_2048:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:2048
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_2048:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:2048
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_2048:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:2048
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %zext.offset = zext i32 %voffset to i64
+  %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
+  %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 2048
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep1, metadata !0)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+;; Maximum negative offset on gfx10
+define <4 x float> @global_load_saddr_i8_zext_vgpr_offset_neg2048(ptr addrspace(1) inreg %sbase, i32 %voffset) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2048
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2048
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2048
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2048
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:-2048 sc0
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:-2048 sc0
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:-2048 sc0
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2048 glc
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2048 glc
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2048 glc
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:-2048 glc
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:-2048
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:-2048 scope:SCOPE_SE
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2048
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2048
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2048
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2048
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:-2048 sc0
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:-2048 sc0
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:-2048 sc0
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2048 glc
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2048 glc
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2048 glc
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:-2048 glc
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:-2048
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:-2048 scope:SCOPE_SE
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %zext.offset = zext i32 %voffset to i64
+  %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
+  %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -2048
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep1, metadata !1)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+;; Maximum negative offset on gfx10 - 1
+define <4 x float> @global_load_saddr_i8_zext_vgpr_offset_neg2049(ptr addrspace(1) inreg %sbase, i32 %voffset) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2049:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2049 glc
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2049:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2049 glc
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2049:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2049 glc
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2049:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2049 glc
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2049:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:-2049 sc1
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2049:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:-2049 sc1
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2049:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:-2049 sc1
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2049:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, s4, s16, v0
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, s4, s17, 0, s4
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfffff800, v0
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2049:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    v_add_co_u32 v0, s4, s16, v0
+; GFX1012-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, s4, s17, 0, s4
+; GFX1012-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfffff800, v0
+; GFX1012-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc dlc
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2049:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, s4, s16, v0
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, s17, 0, s4
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfffff800, v0
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2049:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:-2049 glc
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2049:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:-2049 scope:SCOPE_DEV
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2049:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:-2049 scope:SCOPE_DEV
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2049:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2049 glc
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2049:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2049 glc
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2049:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2049 glc
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2049:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2049 glc
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2049:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:-2049 sc1
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2049:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:-2049 sc1
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2049:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:-2049 sc1
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2049:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, s16
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v2, s17
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v1, v0
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v2, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfffff7ff, v0
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2049:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v1, s16
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v2, s17
+; GFX1012-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v1, v0
+; GFX1012-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v2, vcc_lo
+; GFX1012-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfffff7ff, v0
+; GFX1012-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2049:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, s16
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v2, s17
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v1, v0
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v2, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfffff7ff, v0
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2049:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:-2049 glc
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2049:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:-2049 scope:SCOPE_DEV
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2049:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:-2049 scope:SCOPE_DEV
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %zext.offset = zext i32 %voffset to i64
+  %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
+  %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -2049
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep1, metadata !2)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+;; Maximum positive offset on gfx12.
+define <4 x float> @global_load_saddr_i8_zext_vgpr_offset_0x7FFFFF(ptr addrspace(1) inreg %sbase, i32 %voffset) { %zext.offset = zext i32 %voffset to i64
+; GFX9-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v1, s17
+; GFX9-GENERIC-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, s16, v0
+; GFX9-GENERIC-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x7ff000, v0
+; GFX9-GENERIC-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095 glc
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    v_mov_b32_e32 v1, s17
+; GFX906-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, s16, v0
+; GFX906-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x7ff000, v0
+; GFX906-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095 glc
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    v_mov_b32_e32 v1, s17
+; GFX908-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, s16, v0
+; GFX908-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x7ff000, v0
+; GFX908-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095 glc
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    v_mov_b32_e32 v1, s17
+; GFX90A-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, s16, v0
+; GFX90A-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x7ff000, v0
+; GFX90A-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095 glc
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v1, 0
+; GFX9-4-GENERIC-SDAG-NEXT:    v_lshl_add_u64 v[0:1], s[0:1], 0, v[0:1]
+; GFX9-4-GENERIC-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x7ff000, v0
+; GFX9-4-GENERIC-SDAG-NEXT:    s_nop 1
+; GFX9-4-GENERIC-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095 sc0 sc1
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v1, 0
+; GFX942-SDAG-NEXT:    v_lshl_add_u64 v[0:1], s[0:1], 0, v[0:1]
+; GFX942-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x7ff000, v0
+; GFX942-SDAG-NEXT:    s_nop 1
+; GFX942-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095 sc0 sc1
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v1, 0
+; GFX950-SDAG-NEXT:    v_lshl_add_u64 v[0:1], s[0:1], 0, v[0:1]
+; GFX950-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x7ff000, v0
+; GFX950-SDAG-NEXT:    s_nop 1
+; GFX950-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095 sc0 sc1
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, s4, s16, v0
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, s4, s17, 0, s4
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x7ff800, v0
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2047 glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    v_add_co_u32 v0, s4, s16, v0
+; GFX1012-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, s4, s17, 0, s4
+; GFX1012-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x7ff800, v0
+; GFX1012-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2047 glc dlc
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, s4, s16, v0
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, s17, 0, s4
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x7ff800, v0
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2047 glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, s0, s0, v0
+; GFX11-GENERIC-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, s1, 0, s0
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x7ff000, v0
+; GFX11-GENERIC-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:4095 glc
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:8388607 scope:SCOPE_SYS
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:8388607 scope:SCOPE_SYS
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, s16
+; GFX9-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v2, s17
+; GFX9-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v1, v0
+; GFX9-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v2, vcc
+; GFX9-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0x7fffff, v0
+; GFX9-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    v_mov_b32_e32 v1, s16
+; GFX906-ISEL-NEXT:    v_mov_b32_e32 v2, s17
+; GFX906-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v1, v0
+; GFX906-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v2, vcc
+; GFX906-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0x7fffff, v0
+; GFX906-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    v_mov_b32_e32 v1, s16
+; GFX908-ISEL-NEXT:    v_mov_b32_e32 v2, s17
+; GFX908-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v1, v0
+; GFX908-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v2, vcc
+; GFX908-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0x7fffff, v0
+; GFX908-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    v_pk_mov_b32 v[2:3], s[16:17], s[16:17] op_sel:[0,1]
+; GFX90A-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v2, v0
+; GFX90A-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v3, vcc
+; GFX90A-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0x7fffff, v0
+; GFX90A-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    v_mov_b64_e32 v[2:3], s[0:1]
+; GFX9-4-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v2, v0
+; GFX9-4-GENERIC-ISEL-NEXT:    s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v3, vcc
+; GFX9-4-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0x7fffff, v0
+; GFX9-4-GENERIC-ISEL-NEXT:    s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    v_mov_b64_e32 v[2:3], s[0:1]
+; GFX942-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v2, v0
+; GFX942-ISEL-NEXT:    s_nop 1
+; GFX942-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v3, vcc
+; GFX942-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0x7fffff, v0
+; GFX942-ISEL-NEXT:    s_nop 1
+; GFX942-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    v_mov_b64_e32 v[2:3], s[0:1]
+; GFX950-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v2, v0
+; GFX950-ISEL-NEXT:    s_nop 1
+; GFX950-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v3, vcc
+; GFX950-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0x7fffff, v0
+; GFX950-ISEL-NEXT:    s_nop 1
+; GFX950-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, s16
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v2, s17
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v1, v0
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v2, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0x7fffff, v0
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v1, s16
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v2, s17
+; GFX1012-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v1, v0
+; GFX1012-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v2, vcc_lo
+; GFX1012-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0x7fffff, v0
+; GFX1012-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, s16
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v2, s17
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v1, v0
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v2, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0x7fffff, v0
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v1, s0
+; GFX11-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v1, v0
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v2, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0x7fffff, v0
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:8388607 scope:SCOPE_SYS
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:8388607 scope:SCOPE_SYS
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
+  %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 8388607
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep1, metadata !3)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+;; Minimum offset on gfx12.
+define <4 x float> @global_load_saddr_i8_zext_vgpr_offset_0xFFFFFF(ptr addrspace(1) inreg %sbase, i32 %voffset) { %zext.offset = zext i32 %voffset to i64
+; GFX9-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v1, s17
+; GFX9-GENERIC-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, s16, v0
+; GFX9-GENERIC-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0xff800000, v0
+; GFX9-GENERIC-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    v_mov_b32_e32 v1, s17
+; GFX906-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, s16, v0
+; GFX906-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0xff800000, v0
+; GFX906-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    v_mov_b32_e32 v1, s17
+; GFX908-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, s16, v0
+; GFX908-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0xff800000, v0
+; GFX908-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    v_mov_b32_e32 v1, s17
+; GFX90A-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, s16, v0
+; GFX90A-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0xff800000, v0
+; GFX90A-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v1, 0
+; GFX9-4-GENERIC-SDAG-NEXT:    v_lshl_add_u64 v[0:1], s[0:1], 0, v[0:1]
+; GFX9-4-GENERIC-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0xff800000, v0
+; GFX9-4-GENERIC-SDAG-NEXT:    s_nop 1
+; GFX9-4-GENERIC-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v1, 0
+; GFX942-SDAG-NEXT:    v_lshl_add_u64 v[0:1], s[0:1], 0, v[0:1]
+; GFX942-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0xff800000, v0
+; GFX942-SDAG-NEXT:    s_nop 1
+; GFX942-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v1, 0
+; GFX950-SDAG-NEXT:    v_lshl_add_u64 v[0:1], s[0:1], 0, v[0:1]
+; GFX950-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0xff800000, v0
+; GFX950-SDAG-NEXT:    s_nop 1
+; GFX950-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, s4, s16, v0
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, s4, s17, 0, s4
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0xff800000, v0
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    v_add_co_u32 v0, s4, s16, v0
+; GFX1012-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, s4, s17, 0, s4
+; GFX1012-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0xff800000, v0
+; GFX1012-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, s4, s16, v0
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, s17, 0, s4
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0xff800000, v0
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, s0, s0, v0
+; GFX11-GENERIC-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, s1, 0, s0
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0xff800000, v0
+; GFX11-GENERIC-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:-8388608
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:-8388608
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, s16
+; GFX9-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v2, s17
+; GFX9-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v1, v0
+; GFX9-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v2, vcc
+; GFX9-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0xff800000, v0
+; GFX9-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    v_mov_b32_e32 v1, s16
+; GFX906-ISEL-NEXT:    v_mov_b32_e32 v2, s17
+; GFX906-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v1, v0
+; GFX906-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v2, vcc
+; GFX906-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0xff800000, v0
+; GFX906-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    v_mov_b32_e32 v1, s16
+; GFX908-ISEL-NEXT:    v_mov_b32_e32 v2, s17
+; GFX908-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v1, v0
+; GFX908-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v2, vcc
+; GFX908-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0xff800000, v0
+; GFX908-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    v_pk_mov_b32 v[2:3], s[16:17], s[16:17] op_sel:[0,1]
+; GFX90A-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v2, v0
+; GFX90A-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v3, vcc
+; GFX90A-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0xff800000, v0
+; GFX90A-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    v_mov_b64_e32 v[2:3], s[0:1]
+; GFX9-4-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v2, v0
+; GFX9-4-GENERIC-ISEL-NEXT:    s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v3, vcc
+; GFX9-4-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0xff800000, v0
+; GFX9-4-GENERIC-ISEL-NEXT:    s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    v_mov_b64_e32 v[2:3], s[0:1]
+; GFX942-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v2, v0
+; GFX942-ISEL-NEXT:    s_nop 1
+; GFX942-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v3, vcc
+; GFX942-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0xff800000, v0
+; GFX942-ISEL-NEXT:    s_nop 1
+; GFX942-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    v_mov_b64_e32 v[2:3], s[0:1]
+; GFX950-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v2, v0
+; GFX950-ISEL-NEXT:    s_nop 1
+; GFX950-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v3, vcc
+; GFX950-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0xff800000, v0
+; GFX950-ISEL-NEXT:    s_nop 1
+; GFX950-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, s16
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v2, s17
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v1, v0
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v2, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0xff800000, v0
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v1, s16
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v2, s17
+; GFX1012-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v1, v0
+; GFX1012-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v2, vcc_lo
+; GFX1012-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0xff800000, v0
+; GFX1012-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, s16
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v2, s17
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v1, v0
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v2, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0xff800000, v0
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v1, s0
+; GFX11-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v1, v0
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v2, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0xff800000, v0
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:-8388608
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:-8388608
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
+  %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -8388608
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep1, metadata !0)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+
+;; Maximum positive offset on gfx9, and immediate needs to be moved lower.
+define <4 x float> @global_load_saddr_i8_zext_vgpr_offset_4095_gep_order(ptr addrspace(1) inreg %sbase, i32 %voffset) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095_gep_order:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:4095
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095_gep_order:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:4095
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095_gep_order:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:4095
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095_gep_order:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:4095
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095_gep_order:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:4095 sc0
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095_gep_order:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:4095 sc0
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095_gep_order:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:4095 sc0
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095_gep_order:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, s4, s16, v0
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, s4, s17, 0, s4
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2047 glc
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095_gep_order:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    v_add_co_u32 v0, s4, s16, v0
+; GFX1012-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, s4, s17, 0, s4
+; GFX1012-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX1012-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2047 glc
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095_gep_order:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, s4, s16, v0
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, s17, 0, s4
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2047 glc
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095_gep_order:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:4095 glc
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095_gep_order:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:4095
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095_gep_order:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:4095 scope:SCOPE_SE
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095_gep_order:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:4095
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095_gep_order:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:4095
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095_gep_order:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:4095
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095_gep_order:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:4095
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095_gep_order:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:4095 sc0
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095_gep_order:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:4095 sc0
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095_gep_order:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:4095 sc0
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095_gep_order:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, s16
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v2, s17
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v1, v0
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v2, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfff, v0
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095_gep_order:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v1, s16
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v2, s17
+; GFX1012-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v1, v0
+; GFX1012-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v2, vcc_lo
+; GFX1012-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfff, v0
+; GFX1012-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095_gep_order:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, s16
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v2, s17
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v1, v0
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v2, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfff, v0
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095_gep_order:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:4095 glc
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095_gep_order:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:4095
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095_gep_order:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:4095 scope:SCOPE_SE
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %zext.offset = zext i32 %voffset to i64
+  %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 4095
+  %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 %zext.offset
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep1, metadata !1)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+;; pointer addressing done in integers
+define <4 x float> @global_load_saddr_i8_zext_vgpr_ptrtoint(ptr addrspace(1) inreg %sbase, i32 %voffset) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] sc1
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] sc1
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] sc1
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] glc
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_DEV
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_DEV
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] sc1
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] sc1
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] sc1
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] glc
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_DEV
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_DEV
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %zext.offset = zext i32 %voffset to i64
+  %sbase.as.int = ptrtoint ptr addrspace(1) %sbase to i64
+  %add = add i64 %sbase.as.int, %zext.offset
+  %dirty.gep = inttoptr i64 %add to ptr addrspace(1)
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %dirty.gep, metadata !2)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+;; zext forced to LHS of addressing expression
+define <4 x float> @global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add(ptr addrspace(1) inreg %sbase, i32 %voffset) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] sc0 sc1
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] sc0 sc1
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] sc0 sc1
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] glc
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_SYS
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_SYS
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] sc0 sc1
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] sc0 sc1
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] sc0 sc1
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] glc
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_SYS
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_SYS
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %zext.offset = zext i32 %voffset to i64
+  %sbase.as.int = ptrtoint ptr addrspace(1) %sbase to i64
+  %add = add i64 %zext.offset, %sbase.as.int
+  %dirty.gep = inttoptr i64 %add to ptr addrspace(1)
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %dirty.gep, metadata !3)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+;; zext forced to LHS of addressing expression, with immediate offset
+define <4 x float> @global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0(ptr addrspace(1) inreg %sbase, i32 %voffset) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:128
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:128
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:128
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:128
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:128
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:128
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:128
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:128
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:128
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:128
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:128
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:128
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:128
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:128
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:128
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:128
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:128
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:128
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:128
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:128
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:128
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:128
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:128
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:128
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:128
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:128
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %zext.offset = zext i32 %voffset to i64
+  %sbase.as.int = ptrtoint ptr addrspace(1) %sbase to i64
+  %add = add i64 %zext.offset, %sbase.as.int
+  %add.immoffset = add i64 %add, 128
+  %dirty.gep = inttoptr i64 %add.immoffset to ptr addrspace(1)
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %dirty.gep, metadata !0)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+;; zext forced to LHS of addressing expression, with immediate offset in non-canonical position
+define <4 x float> @global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1(ptr addrspace(1) inreg %sbase, i32 %voffset) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:128
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:128
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:128
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:128
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:128 sc0
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:128 sc0
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:128 sc0
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:128 glc
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:128 glc
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:128 glc
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:128 glc
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:128
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:128 scope:SCOPE_SE
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:128
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:128
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:128
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:128
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:128 sc0
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:128 sc0
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:128 sc0
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:128 glc
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:128 glc
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:128 glc
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:128 glc
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:128
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:128 scope:SCOPE_SE
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %zext.offset = zext i32 %voffset to i64
+  %sbase.as.int = ptrtoint ptr addrspace(1) %sbase to i64
+  %add.immoffset = add i64 %sbase.as.int, 128
+  %add = add i64 %zext.offset, %add.immoffset
+  %dirty.gep = inttoptr i64 %add to ptr addrspace(1)
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %dirty.gep, metadata !1)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+;;------------------------------------------------------------------------------
+;; Uniformity edge cases
+;;------------------------------------------------------------------------------
+
+;; Both 64-bit base and 32-bit offset are scalar
+define <4 x float> @global_load_saddr_i8_zext_uniform_offset(ptr addrspace(1) inreg %sbase, i32 inreg %soffset) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_uniform_offset:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, s18
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_saddr_i8_zext_uniform_offset:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    v_mov_b32_e32 v0, s18
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_saddr_i8_zext_uniform_offset:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    v_mov_b32_e32 v0, s18
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_saddr_i8_zext_uniform_offset:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    v_mov_b32_e32 v0, s18
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_uniform_offset:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, s2
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] sc1
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_saddr_i8_zext_uniform_offset:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v0, s2
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] sc1
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_saddr_i8_zext_uniform_offset:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v0, s2
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] sc1
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_uniform_offset:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, s18
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_saddr_i8_zext_uniform_offset:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    v_mov_b32_e32 v0, s18
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_uniform_offset:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, s18
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_uniform_offset:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, s2
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] glc
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_saddr_i8_zext_uniform_offset:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    v_mov_b32_e32 v0, s2
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_DEV
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_uniform_offset:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, s2
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_DEV
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_uniform_offset:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, s18
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_saddr_i8_zext_uniform_offset:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    v_mov_b32_e32 v0, s18
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_saddr_i8_zext_uniform_offset:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    v_mov_b32_e32 v0, s18
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_saddr_i8_zext_uniform_offset:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    v_mov_b32_e32 v0, s18
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_uniform_offset:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, s2
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] sc1
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_saddr_i8_zext_uniform_offset:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    v_mov_b32_e32 v0, s2
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] sc1
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_saddr_i8_zext_uniform_offset:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    v_mov_b32_e32 v0, s2
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] sc1
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_uniform_offset:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, s18
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_saddr_i8_zext_uniform_offset:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v0, s18
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_uniform_offset:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, s18
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_uniform_offset:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, s2
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] glc
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_saddr_i8_zext_uniform_offset:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX1250-ISEL-NEXT:    s_add_co_u32 s0, s0, s2
+; GFX1250-ISEL-NEXT:    s_add_co_ci_u32 s1, s1, 0
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_DEV
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_uniform_offset:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, s2
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_DEV
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %zext.offset = zext i32 %soffset to i64
+  %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep0, metadata !2)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+;; Both 64-bit base and 32-bit offset are scalar, with immediate offset.
+define <4 x float> @global_load_saddr_i8_zext_uniform_offset_immoffset(ptr addrspace(1) inreg %sbase, i32 inreg %soffset) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_uniform_offset_immoffset:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, s18
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:-24 glc
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_saddr_i8_zext_uniform_offset_immoffset:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    v_mov_b32_e32 v0, s18
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:-24 glc
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_saddr_i8_zext_uniform_offset_immoffset:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    v_mov_b32_e32 v0, s18
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:-24 glc
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_saddr_i8_zext_uniform_offset_immoffset:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    v_mov_b32_e32 v0, s18
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:-24 glc
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_uniform_offset_immoffset:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, s2
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:-24 sc0 sc1
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_saddr_i8_zext_uniform_offset_immoffset:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v0, s2
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:-24 sc0 sc1
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_saddr_i8_zext_uniform_offset_immoffset:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v0, s2
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:-24 sc0 sc1
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_uniform_offset_immoffset:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, s18
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:-24 glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_saddr_i8_zext_uniform_offset_immoffset:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    v_mov_b32_e32 v0, s18
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:-24 glc dlc
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_uniform_offset_immoffset:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, s18
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:-24 glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_uniform_offset_immoffset:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, s2
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:-24 glc
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_saddr_i8_zext_uniform_offset_immoffset:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    v_mov_b32_e32 v0, s2
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:-24 scope:SCOPE_SYS
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_uniform_offset_immoffset:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, s2
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:-24 scope:SCOPE_SYS
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_uniform_offset_immoffset:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, s18
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:-24 glc
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_saddr_i8_zext_uniform_offset_immoffset:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    v_mov_b32_e32 v0, s18
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:-24 glc
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_saddr_i8_zext_uniform_offset_immoffset:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    v_mov_b32_e32 v0, s18
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:-24 glc
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_saddr_i8_zext_uniform_offset_immoffset:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    v_mov_b32_e32 v0, s18
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:-24 glc
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_uniform_offset_immoffset:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, s2
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:-24 sc0 sc1
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_saddr_i8_zext_uniform_offset_immoffset:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    v_mov_b32_e32 v0, s2
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:-24 sc0 sc1
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_saddr_i8_zext_uniform_offset_immoffset:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    v_mov_b32_e32 v0, s2
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:-24 sc0 sc1
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_uniform_offset_immoffset:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, s18
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:-24 glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_saddr_i8_zext_uniform_offset_immoffset:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v0, s18
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:-24 glc dlc
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_uniform_offset_immoffset:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, s18
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:-24 glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_uniform_offset_immoffset:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, s2
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:-24 glc
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_saddr_i8_zext_uniform_offset_immoffset:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX1250-ISEL-NEXT:    s_add_co_u32 s0, s0, s2
+; GFX1250-ISEL-NEXT:    s_add_co_ci_u32 s1, s1, 0
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:-24 scope:SCOPE_SYS
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_uniform_offset_immoffset:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, s2
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:-24 scope:SCOPE_SYS
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %zext.offset = zext i32 %soffset to i64
+  %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
+  %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -24
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep1, metadata !3)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+;; Both components uniform, zext forced to LHS of addressing expression
+define <4 x float> @global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add(ptr addrspace(1) inreg %sbase, i32 inreg %soffset) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, s18
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    v_mov_b32_e32 v0, s18
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    v_mov_b32_e32 v0, s18
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    v_mov_b32_e32 v0, s18
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, s2
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1]
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v0, s2
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1]
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v0, s2
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1]
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, s18
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    v_mov_b32_e32 v0, s18
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, s18
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, s2
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1]
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    v_mov_b32_e32 v0, s2
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1]
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, s2
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1]
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, s18
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    v_mov_b32_e32 v0, s18
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    v_mov_b32_e32 v0, s18
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    v_mov_b32_e32 v0, s18
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, s2
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1]
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    v_mov_b32_e32 v0, s2
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1]
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    v_mov_b32_e32 v0, s2
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1]
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, s18
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v0, s18
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, s18
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, s2
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1]
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX1250-ISEL-NEXT:    s_add_co_u32 s0, s0, s2
+; GFX1250-ISEL-NEXT:    s_add_co_ci_u32 s1, s1, 0
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1]
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, s2
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1]
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %zext.offset = zext i32 %soffset to i64
+  %sbase.as.int = ptrtoint ptr addrspace(1) %sbase to i64
+  %add = add i64 %zext.offset, %sbase.as.int
+  %dirty.gep = inttoptr i64 %add to ptr addrspace(1)
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %dirty.gep, metadata !0)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+;; Both components uniform, zext forced to LHS of addressing expression, with immediate offset
+define <4 x float> @global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0(ptr addrspace(1) inreg %sbase, i32 inreg %soffset) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, s18
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:128
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    v_mov_b32_e32 v0, s18
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:128
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    v_mov_b32_e32 v0, s18
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:128
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    v_mov_b32_e32 v0, s18
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:128
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, s2
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:128 sc0
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v0, s2
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:128 sc0
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v0, s2
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:128 sc0
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, s18
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:128 glc
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    v_mov_b32_e32 v0, s18
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:128 glc
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, s18
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:128 glc
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, s2
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:128 glc
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    v_mov_b32_e32 v0, s2
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:128
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, s2
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:128 scope:SCOPE_SE
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, s18
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:128
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    v_mov_b32_e32 v0, s18
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:128
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    v_mov_b32_e32 v0, s18
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:128
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    v_mov_b32_e32 v0, s18
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:128
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, s2
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:128 sc0
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    v_mov_b32_e32 v0, s2
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:128 sc0
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    v_mov_b32_e32 v0, s2
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:128 sc0
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, s18
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:128 glc
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v0, s18
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:128 glc
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, s18
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:128 glc
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, s2
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:128 glc
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX1250-ISEL-NEXT:    s_add_co_u32 s0, s0, s2
+; GFX1250-ISEL-NEXT:    s_add_co_ci_u32 s1, s1, 0
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:128
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, s2
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:128 scope:SCOPE_SE
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %zext.offset = zext i32 %soffset to i64
+  %sbase.as.int = ptrtoint ptr addrspace(1) %sbase to i64
+  %add = add i64 %zext.offset, %sbase.as.int
+  %add.immoffset = add i64 %add, 128
+  %dirty.gep = inttoptr i64 %add.immoffset to ptr addrspace(1)
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %dirty.gep, metadata !1)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+;; divergent 64-bit base, 32-bit scalar offset.
+define <4 x float> @global_load_saddr_i8_vgpr64_sgpr32(ptr addrspace(1) %vbase, i32 inreg %soffset) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_saddr_i8_vgpr64_sgpr32:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, s16, v0
+; GFX9-GENERIC-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_saddr_i8_vgpr64_sgpr32:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, s16, v0
+; GFX906-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_saddr_i8_vgpr64_sgpr32:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, s16, v0
+; GFX908-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_saddr_i8_vgpr64_sgpr32:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, s16, v0
+; GFX90A-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_saddr_i8_vgpr64_sgpr32:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_mov_b32 s1, 0
+; GFX9-4-GENERIC-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc1
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_saddr_i8_vgpr64_sgpr32:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    s_mov_b32 s1, 0
+; GFX942-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc1
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_saddr_i8_vgpr64_sgpr32:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    s_mov_b32 s1, 0
+; GFX950-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc1
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_saddr_i8_vgpr64_sgpr32:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, s16
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_saddr_i8_vgpr64_sgpr32:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, s16
+; GFX1012-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_saddr_i8_vgpr64_sgpr32:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, s16
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_saddr_i8_vgpr64_sgpr32:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, s0
+; GFX11-GENERIC-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_saddr_i8_vgpr64_sgpr32:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    s_mov_b32 s1, 0
+; GFX1250-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-SDAG-NEXT:    v_add_nc_u64_e32 v[0:1], s[0:1], v[0:1]
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off scope:SCOPE_DEV
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_saddr_i8_vgpr64_sgpr32:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, s0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off scope:SCOPE_DEV
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_saddr_i8_vgpr64_sgpr32:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_mov_b32 s17, 0
+; GFX9-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v2, s16
+; GFX9-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v3, s17
+; GFX9-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_saddr_i8_vgpr64_sgpr32:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    s_mov_b32 s17, 0
+; GFX906-ISEL-NEXT:    v_mov_b32_e32 v2, s16
+; GFX906-ISEL-NEXT:    v_mov_b32_e32 v3, s17
+; GFX906-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_saddr_i8_vgpr64_sgpr32:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    s_mov_b32 s17, 0
+; GFX908-ISEL-NEXT:    v_mov_b32_e32 v2, s16
+; GFX908-ISEL-NEXT:    v_mov_b32_e32 v3, s17
+; GFX908-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_saddr_i8_vgpr64_sgpr32:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    s_mov_b32 s17, 0
+; GFX90A-ISEL-NEXT:    v_pk_mov_b32 v[2:3], s[16:17], s[16:17] op_sel:[0,1]
+; GFX90A-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_saddr_i8_vgpr64_sgpr32:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_mov_b32 s1, 0
+; GFX9-4-GENERIC-ISEL-NEXT:    v_mov_b64_e32 v[2:3], s[0:1]
+; GFX9-4-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-4-GENERIC-ISEL-NEXT:    s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc1
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_saddr_i8_vgpr64_sgpr32:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    s_mov_b32 s1, 0
+; GFX942-ISEL-NEXT:    v_mov_b64_e32 v[2:3], s[0:1]
+; GFX942-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX942-ISEL-NEXT:    s_nop 1
+; GFX942-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc1
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_saddr_i8_vgpr64_sgpr32:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    s_mov_b32 s1, 0
+; GFX950-ISEL-NEXT:    v_mov_b64_e32 v[2:3], s[0:1]
+; GFX950-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX950-ISEL-NEXT:    s_nop 1
+; GFX950-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc1
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_saddr_i8_vgpr64_sgpr32:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_mov_b32 s17, 0
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v2, s16
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v3, s17
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_saddr_i8_vgpr64_sgpr32:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    s_mov_b32 s17, 0
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v2, s16
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v3, s17
+; GFX1012-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_saddr_i8_vgpr64_sgpr32:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_mov_b32 s17, 0
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v2, s16
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v3, s17
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_saddr_i8_vgpr64_sgpr32:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_mov_b32 s1, 0
+; GFX11-GENERIC-ISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT:    v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_saddr_i8_vgpr64_sgpr32:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    s_mov_b32 s1, 0
+; GFX1250-ISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250-ISEL-NEXT:    v_mov_b64_e32 v[2:3], s[0:1]
+; GFX1250-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1250-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off scope:SCOPE_DEV
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_saddr_i8_vgpr64_sgpr32:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_mov_b32 s1, 0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_alu depctr_sa_sdst(0)
+; GFX12-GENERIC-ISEL-NEXT:    v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
+; GFX12-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX12-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off scope:SCOPE_DEV
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %zext.offset = zext i32 %soffset to i64
+  %gep0 = getelementptr inbounds i8, ptr addrspace(1) %vbase, i64 %zext.offset
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep0, metadata !2)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+;; divergent 64-bit base, 32-bit scalar offset, with imm offset
+define <4 x float> @global_load_saddr_i8_vgpr64_sgpr32_offset_4095(ptr addrspace(1) %vbase, i32 inreg %soffset) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_saddr_i8_vgpr64_sgpr32_offset_4095:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, s16, v0
+; GFX9-GENERIC-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095 glc
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_saddr_i8_vgpr64_sgpr32_offset_4095:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, s16, v0
+; GFX906-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095 glc
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_saddr_i8_vgpr64_sgpr32_offset_4095:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, s16, v0
+; GFX908-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095 glc
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_saddr_i8_vgpr64_sgpr32_offset_4095:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, s16, v0
+; GFX90A-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095 glc
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_saddr_i8_vgpr64_sgpr32_offset_4095:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_mov_b32 s1, 0
+; GFX9-4-GENERIC-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095 sc0 sc1
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_saddr_i8_vgpr64_sgpr32_offset_4095:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    s_mov_b32 s1, 0
+; GFX942-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095 sc0 sc1
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_saddr_i8_vgpr64_sgpr32_offset_4095:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    s_mov_b32 s1, 0
+; GFX950-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095 sc0 sc1
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_saddr_i8_vgpr64_sgpr32_offset_4095:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, s16
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2047 glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_saddr_i8_vgpr64_sgpr32_offset_4095:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, s16
+; GFX1012-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX1012-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2047 glc dlc
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_saddr_i8_vgpr64_sgpr32_offset_4095:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, s16
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2047 glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_saddr_i8_vgpr64_sgpr32_offset_4095:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, s0
+; GFX11-GENERIC-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:4095 glc
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_saddr_i8_vgpr64_sgpr32_offset_4095:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    s_mov_b32 s1, 0
+; GFX1250-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-SDAG-NEXT:    v_add_nc_u64_e32 v[0:1], s[0:1], v[0:1]
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:4095 scope:SCOPE_SYS
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_saddr_i8_vgpr64_sgpr32_offset_4095:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, s0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:4095 scope:SCOPE_SYS
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_saddr_i8_vgpr64_sgpr32_offset_4095:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_mov_b32 s17, 0
+; GFX9-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v2, s16
+; GFX9-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v3, s17
+; GFX9-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095 glc
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_saddr_i8_vgpr64_sgpr32_offset_4095:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    s_mov_b32 s17, 0
+; GFX906-ISEL-NEXT:    v_mov_b32_e32 v2, s16
+; GFX906-ISEL-NEXT:    v_mov_b32_e32 v3, s17
+; GFX906-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095 glc
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_saddr_i8_vgpr64_sgpr32_offset_4095:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    s_mov_b32 s17, 0
+; GFX908-ISEL-NEXT:    v_mov_b32_e32 v2, s16
+; GFX908-ISEL-NEXT:    v_mov_b32_e32 v3, s17
+; GFX908-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095 glc
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_saddr_i8_vgpr64_sgpr32_offset_4095:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    s_mov_b32 s17, 0
+; GFX90A-ISEL-NEXT:    v_pk_mov_b32 v[2:3], s[16:17], s[16:17] op_sel:[0,1]
+; GFX90A-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095 glc
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_saddr_i8_vgpr64_sgpr32_offset_4095:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_mov_b32 s1, 0
+; GFX9-4-GENERIC-ISEL-NEXT:    v_mov_b64_e32 v[2:3], s[0:1]
+; GFX9-4-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-4-GENERIC-ISEL-NEXT:    s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095 sc0 sc1
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_saddr_i8_vgpr64_sgpr32_offset_4095:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    s_mov_b32 s1, 0
+; GFX942-ISEL-NEXT:    v_mov_b64_e32 v[2:3], s[0:1]
+; GFX942-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX942-ISEL-NEXT:    s_nop 1
+; GFX942-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095 sc0 sc1
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_saddr_i8_vgpr64_sgpr32_offset_4095:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    s_mov_b32 s1, 0
+; GFX950-ISEL-NEXT:    v_mov_b64_e32 v[2:3], s[0:1]
+; GFX950-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX950-ISEL-NEXT:    s_nop 1
+; GFX950-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4095 sc0 sc1
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_saddr_i8_vgpr64_sgpr32_offset_4095:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_mov_b32 s17, 0
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v2, s16
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v3, s17
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfff, v0
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_saddr_i8_vgpr64_sgpr32_offset_4095:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    s_mov_b32 s17, 0
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v2, s16
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v3, s17
+; GFX1012-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX1012-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfff, v0
+; GFX1012-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_saddr_i8_vgpr64_sgpr32_offset_4095:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_mov_b32 s17, 0
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v2, s16
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v3, s17
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfff, v0
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_saddr_i8_vgpr64_sgpr32_offset_4095:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_mov_b32 s1, 0
+; GFX11-GENERIC-ISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT:    v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:4095 glc
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_saddr_i8_vgpr64_sgpr32_offset_4095:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    s_mov_b32 s1, 0
+; GFX1250-ISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250-ISEL-NEXT:    v_mov_b64_e32 v[2:3], s[0:1]
+; GFX1250-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1250-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:4095 scope:SCOPE_SYS
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_saddr_i8_vgpr64_sgpr32_offset_4095:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_mov_b32 s1, 0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_alu depctr_sa_sdst(0)
+; GFX12-GENERIC-ISEL-NEXT:    v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
+; GFX12-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX12-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:4095 scope:SCOPE_SYS
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %zext.offset = zext i32 %soffset to i64
+  %gep0 = getelementptr inbounds i8, ptr addrspace(1) %vbase, i64 %zext.offset
+  %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 4095
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep1, metadata !3)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+;;------------------------------------------------------------------------------
+;; Natural addressing shifts with restricted range
+;;------------------------------------------------------------------------------
+
+;; Cannot push the shift into 32-bits, and cannot match.
+define <4 x float> @global_load_saddr_f32_natural_addressing(ptr addrspace(1) inreg %sbase, ptr addrspace(1) %voffset.ptr) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_saddr_f32_natural_addressing:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dword v0, v[0:1], off
+; GFX9-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v1, 0
+; GFX9-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v2, s17
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    v_lshlrev_b64 v[0:1], 2, v[0:1]
+; GFX9-GENERIC-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, s16, v0
+; GFX9-GENERIC-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_saddr_f32_natural_addressing:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    global_load_dword v0, v[0:1], off
+; GFX906-SDAG-NEXT:    v_mov_b32_e32 v1, 0
+; GFX906-SDAG-NEXT:    v_mov_b32_e32 v2, s17
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    v_lshlrev_b64 v[0:1], 2, v[0:1]
+; GFX906-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, s16, v0
+; GFX906-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_saddr_f32_natural_addressing:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    global_load_dword v0, v[0:1], off
+; GFX908-SDAG-NEXT:    v_mov_b32_e32 v1, 0
+; GFX908-SDAG-NEXT:    v_mov_b32_e32 v2, s17
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    v_lshlrev_b64 v[0:1], 2, v[0:1]
+; GFX908-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, s16, v0
+; GFX908-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_saddr_f32_natural_addressing:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    global_load_dword v0, v[0:1], off
+; GFX90A-SDAG-NEXT:    v_mov_b32_e32 v1, 0
+; GFX90A-SDAG-NEXT:    v_mov_b32_e32 v2, s17
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    v_lshlrev_b64 v[0:1], 2, v[0:1]
+; GFX90A-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, s16, v0
+; GFX90A-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_saddr_f32_natural_addressing:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dword v0, v[0:1], off
+; GFX9-4-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v1, 0
+; GFX9-4-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v2, s0
+; GFX9-4-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v3, s1
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 2, v[2:3]
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_saddr_f32_natural_addressing:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    global_load_dword v0, v[0:1], off
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v1, 0
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v2, s0
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v3, s1
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 2, v[2:3]
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_saddr_f32_natural_addressing:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    global_load_dword v0, v[0:1], off
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v1, 0
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v2, s0
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v3, s1
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 2, v[2:3]
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_saddr_f32_natural_addressing:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dword v0, v[0:1], off
+; GFX10-1-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v1, 0
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_lshlrev_b64 v[0:1], 2, v[0:1]
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, s16, v0
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, s17, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_saddr_f32_natural_addressing:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    global_load_dword v0, v[0:1], off
+; GFX1012-SDAG-NEXT:    v_mov_b32_e32 v1, 0
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    v_lshlrev_b64 v[0:1], 2, v[0:1]
+; GFX1012-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, s16, v0
+; GFX1012-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, s17, v1, vcc_lo
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_saddr_f32_natural_addressing:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dword v0, v[0:1], off
+; GFX10-3-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v1, 0
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_lshlrev_b64 v[0:1], 2, v[0:1]
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, s16, v0
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, s17, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_saddr_f32_natural_addressing:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b32 v0, v[0:1], off
+; GFX11-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v1, 0
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT:    v_lshlrev_b64 v[0:1], 2, v[0:1]
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, s0, v0
+; GFX11-GENERIC-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, s1, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_saddr_f32_natural_addressing:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    global_load_b32 v0, v[0:1], off
+; GFX1250-SDAG-NEXT:    s_wait_xcnt 0x0
+; GFX1250-SDAG-NEXT:    v_mov_b32_e32 v1, 0
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 2, s[0:1]
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_saddr_f32_natural_addressing:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b32 v0, v[0:1], off
+; GFX12-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v1, 0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX12-GENERIC-SDAG-NEXT:    v_lshlrev_b64_e32 v[0:1], 2, v[0:1]
+; GFX12-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, s0, v0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX12-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, s1, v1, vcc_lo
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_saddr_f32_natural_addressing:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dword v0, v[0:1], off
+; GFX9-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX9-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v2, s16
+; GFX9-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v3, s17
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    v_lshlrev_b64 v[0:1], 2, v[0:1]
+; GFX9-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v2, v0
+; GFX9-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, v3, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_saddr_f32_natural_addressing:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    global_load_dword v0, v[0:1], off
+; GFX906-ISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX906-ISEL-NEXT:    v_mov_b32_e32 v2, s16
+; GFX906-ISEL-NEXT:    v_mov_b32_e32 v3, s17
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    v_lshlrev_b64 v[0:1], 2, v[0:1]
+; GFX906-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v2, v0
+; GFX906-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, v3, v1, vcc
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_saddr_f32_natural_addressing:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    global_load_dword v0, v[0:1], off
+; GFX908-ISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX908-ISEL-NEXT:    v_mov_b32_e32 v2, s16
+; GFX908-ISEL-NEXT:    v_mov_b32_e32 v3, s17
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    v_lshlrev_b64 v[0:1], 2, v[0:1]
+; GFX908-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v2, v0
+; GFX908-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, v3, v1, vcc
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_saddr_f32_natural_addressing:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    global_load_dword v0, v[0:1], off
+; GFX90A-ISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX90A-ISEL-NEXT:    v_pk_mov_b32 v[2:3], s[16:17], s[16:17] op_sel:[0,1]
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    v_lshlrev_b64 v[0:1], 2, v[0:1]
+; GFX90A-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v2, v0
+; GFX90A-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, v3, v1, vcc
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_saddr_f32_natural_addressing:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dword v0, v[0:1], off
+; GFX9-4-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 2, s[0:1]
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_saddr_f32_natural_addressing:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    global_load_dword v0, v[0:1], off
+; GFX942-ISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 2, s[0:1]
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_saddr_f32_natural_addressing:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    global_load_dword v0, v[0:1], off
+; GFX950-ISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 2, s[0:1]
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_saddr_f32_natural_addressing:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dword v0, v[0:1], off
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v2, s16
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v3, s17
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    v_lshlrev_b64 v[0:1], 2, v[0:1]
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v2, v0
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, v3, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_saddr_f32_natural_addressing:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    global_load_dword v0, v[0:1], off
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v2, s16
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v3, s17
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    v_lshlrev_b64 v[0:1], 2, v[0:1]
+; GFX1012-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v2, v0
+; GFX1012-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, v3, v1, vcc_lo
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_saddr_f32_natural_addressing:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dword v0, v[0:1], off
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v2, s16
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v3, s17
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    v_lshlrev_b64 v[0:1], 2, v[0:1]
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v2, v0
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, v3, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_saddr_f32_natural_addressing:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b32 v0, v[0:1], off
+; GFX11-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX11-GENERIC-ISEL-NEXT:    v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT:    v_lshlrev_b64 v[0:1], 2, v[0:1]
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v2, v0
+; GFX11-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, v3, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_saddr_f32_natural_addressing:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    global_load_b32 v0, v[0:1], off
+; GFX1250-ISEL-NEXT:    s_wait_xcnt 0x0
+; GFX1250-ISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-ISEL-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 2, s[0:1]
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_saddr_f32_natural_addressing:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b32 v0, v[0:1], off
+; GFX12-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX12-GENERIC-ISEL-NEXT:    v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX12-GENERIC-ISEL-NEXT:    v_lshlrev_b64_e32 v[0:1], 2, v[0:1]
+; GFX12-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v2, v0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX12-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, v3, v1, vcc_lo
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %voffset = load i32, ptr addrspace(1) %voffset.ptr
+  %zext.offset = zext i32 %voffset to i64
+  %gep = getelementptr inbounds float, ptr addrspace(1) %sbase, i64 %zext.offset
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep, metadata !0)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+;; Cannot push the shift into 32-bits, with an immediate offset.
+define <4 x float> @global_load_saddr_f32_natural_addressing_immoffset(ptr addrspace(1) inreg %sbase, ptr addrspace(1) %voffset.ptr) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_saddr_f32_natural_addressing_immoffset:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dword v0, v[0:1], off
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:128
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_saddr_f32_natural_addressing_immoffset:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    global_load_dword v0, v[0:1], off
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:128
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_saddr_f32_natural_addressing_immoffset:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    global_load_dword v0, v[0:1], off
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:128
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_saddr_f32_natural_addressing_immoffset:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    global_load_dword v0, v[0:1], off
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:128
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_saddr_f32_natural_addressing_immoffset:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dword v0, v[0:1], off
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:128 sc0
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_saddr_f32_natural_addressing_immoffset:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    global_load_dword v0, v[0:1], off
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:128 sc0
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_saddr_f32_natural_addressing_immoffset:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    global_load_dword v0, v[0:1], off
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:128 sc0
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_saddr_f32_natural_addressing_immoffset:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dword v0, v[0:1], off
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:128 glc
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_saddr_f32_natural_addressing_immoffset:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    global_load_dword v0, v[0:1], off
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:128 glc
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_saddr_f32_natural_addressing_immoffset:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dword v0, v[0:1], off
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:128 glc
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_saddr_f32_natural_addressing_immoffset:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b32 v0, v[0:1], off
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:128 glc
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_saddr_f32_natural_addressing_immoffset:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    global_load_b32 v0, v[0:1], off
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:128
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_saddr_f32_natural_addressing_immoffset:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b32 v0, v[0:1], off
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:128 scope:SCOPE_SE
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_saddr_f32_natural_addressing_immoffset:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dword v0, v[0:1], off
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:128
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_saddr_f32_natural_addressing_immoffset:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    global_load_dword v0, v[0:1], off
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:128
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_saddr_f32_natural_addressing_immoffset:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    global_load_dword v0, v[0:1], off
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:128
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_saddr_f32_natural_addressing_immoffset:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    global_load_dword v0, v[0:1], off
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:128
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_saddr_f32_natural_addressing_immoffset:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dword v0, v[0:1], off
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:128 sc0
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_saddr_f32_natural_addressing_immoffset:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    global_load_dword v0, v[0:1], off
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:128 sc0
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_saddr_f32_natural_addressing_immoffset:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    global_load_dword v0, v[0:1], off
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:128 sc0
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_saddr_f32_natural_addressing_immoffset:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dword v0, v[0:1], off
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:128 glc
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_saddr_f32_natural_addressing_immoffset:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    global_load_dword v0, v[0:1], off
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:128 glc
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_saddr_f32_natural_addressing_immoffset:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dword v0, v[0:1], off
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:128 glc
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_saddr_f32_natural_addressing_immoffset:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b32 v0, v[0:1], off
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:128 glc
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_saddr_f32_natural_addressing_immoffset:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    global_load_b32 v0, v[0:1], off
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:128
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_saddr_f32_natural_addressing_immoffset:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b32 v0, v[0:1], off
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:128 scope:SCOPE_SE
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %voffset = load i32, ptr addrspace(1) %voffset.ptr
+  %zext.offset = zext i32 %voffset to i64
+  %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
+  %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 128
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep1, metadata !1)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+;; Range is sufficiently restricted to push the shift into 32-bits.
+define <4 x float> @global_load_f32_saddr_zext_vgpr_range(ptr addrspace(1) inreg %sbase, ptr addrspace(1) %voffset.ptr) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dword v0, v[0:1], off
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    global_load_dword v0, v[0:1], off
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    global_load_dword v0, v[0:1], off
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    global_load_dword v0, v[0:1], off
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dword v0, v[0:1], off
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] sc1
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    global_load_dword v0, v[0:1], off
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] sc1
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    global_load_dword v0, v[0:1], off
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] sc1
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dword v0, v[0:1], off
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    global_load_dword v0, v[0:1], off
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dword v0, v[0:1], off
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b32 v0, v[0:1], off
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] glc
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    global_load_b32 v0, v[0:1], off
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_xcnt 0x0
+; GFX1250-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_DEV
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b32 v0, v[0:1], off
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_DEV
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dword v0, v[0:1], off
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    global_load_dword v0, v[0:1], off
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    global_load_dword v0, v[0:1], off
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    global_load_dword v0, v[0:1], off
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dword v0, v[0:1], off
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] sc1
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    global_load_dword v0, v[0:1], off
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] sc1
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    global_load_dword v0, v[0:1], off
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] sc1
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dword v0, v[0:1], off
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    global_load_dword v0, v[0:1], off
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dword v0, v[0:1], off
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b32 v0, v[0:1], off
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] glc
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    global_load_b32 v0, v[0:1], off
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_xcnt 0x0
+; GFX1250-ISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_DEV
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b32 v0, v[0:1], off
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_DEV
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %voffset = load i32, ptr addrspace(1) %voffset.ptr, !range !4, !noundef !{}
+  %zext.offset = zext i32 %voffset to i64
+  %gep = getelementptr inbounds float, ptr addrspace(1) %sbase, i64 %zext.offset
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep, metadata !2)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+;; Range is sufficiently restricted to push the shift into 32-bits, with an imm offset
+define <4 x float> @global_load_f32_saddr_zext_vgpr_range_imm_offset(ptr addrspace(1) inreg %sbase, ptr addrspace(1) %voffset.ptr) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range_imm_offset:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dword v0, v[0:1], off
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:400 glc
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range_imm_offset:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    global_load_dword v0, v[0:1], off
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:400 glc
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range_imm_offset:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    global_load_dword v0, v[0:1], off
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:400 glc
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range_imm_offset:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    global_load_dword v0, v[0:1], off
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:400 glc
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range_imm_offset:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dword v0, v[0:1], off
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:400 sc0 sc1
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range_imm_offset:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    global_load_dword v0, v[0:1], off
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:400 sc0 sc1
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range_imm_offset:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    global_load_dword v0, v[0:1], off
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:400 sc0 sc1
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range_imm_offset:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dword v0, v[0:1], off
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:400 glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range_imm_offset:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    global_load_dword v0, v[0:1], off
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:400 glc dlc
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range_imm_offset:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dword v0, v[0:1], off
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:400 glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range_imm_offset:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b32 v0, v[0:1], off
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:400 glc
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range_imm_offset:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    global_load_b32 v0, v[0:1], off
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_xcnt 0x0
+; GFX1250-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:400 scope:SCOPE_SYS
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range_imm_offset:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b32 v0, v[0:1], off
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:400 scope:SCOPE_SYS
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range_imm_offset:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dword v0, v[0:1], off
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:400 glc
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range_imm_offset:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    global_load_dword v0, v[0:1], off
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:400 glc
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range_imm_offset:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    global_load_dword v0, v[0:1], off
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:400 glc
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range_imm_offset:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    global_load_dword v0, v[0:1], off
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:400 glc
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range_imm_offset:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dword v0, v[0:1], off
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:400 sc0 sc1
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range_imm_offset:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    global_load_dword v0, v[0:1], off
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:400 sc0 sc1
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range_imm_offset:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    global_load_dword v0, v[0:1], off
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] offset:400 sc0 sc1
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range_imm_offset:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dword v0, v[0:1], off
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:400 glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range_imm_offset:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    global_load_dword v0, v[0:1], off
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:400 glc dlc
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range_imm_offset:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dword v0, v[0:1], off
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v0, s[16:17] offset:400 glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range_imm_offset:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b32 v0, v[0:1], off
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:400 glc
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range_imm_offset:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    global_load_b32 v0, v[0:1], off
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_xcnt 0x0
+; GFX1250-ISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:400 scope:SCOPE_SYS
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range_imm_offset:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b32 v0, v[0:1], off
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v0, s[0:1] offset:400 scope:SCOPE_SYS
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %voffset = load i32, ptr addrspace(1) %voffset.ptr, !range !4, !noundef !{}
+  %zext.offset = zext i32 %voffset to i64
+  %gep0 = getelementptr inbounds float, ptr addrspace(1) %sbase, i64 %zext.offset
+  %gep1 = getelementptr inbounds float, ptr addrspace(1) %gep0, i64 100
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep1, metadata !3)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+;; Range is 1 beyond the limit where we can move the shift into 32-bits.
+define <4 x float> @global_load_f32_saddr_zext_vgpr_range_too_large(ptr addrspace(1) inreg %sbase, ptr addrspace(1) %voffset.ptr) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range_too_large:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dword v0, v[0:1], off
+; GFX9-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v1, 0
+; GFX9-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v2, s17
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    v_lshlrev_b64 v[0:1], 2, v[0:1]
+; GFX9-GENERIC-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, s16, v0
+; GFX9-GENERIC-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range_too_large:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    global_load_dword v0, v[0:1], off
+; GFX906-SDAG-NEXT:    v_mov_b32_e32 v1, 0
+; GFX906-SDAG-NEXT:    v_mov_b32_e32 v2, s17
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    v_lshlrev_b64 v[0:1], 2, v[0:1]
+; GFX906-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, s16, v0
+; GFX906-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range_too_large:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    global_load_dword v0, v[0:1], off
+; GFX908-SDAG-NEXT:    v_mov_b32_e32 v1, 0
+; GFX908-SDAG-NEXT:    v_mov_b32_e32 v2, s17
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    v_lshlrev_b64 v[0:1], 2, v[0:1]
+; GFX908-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, s16, v0
+; GFX908-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range_too_large:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    global_load_dword v0, v[0:1], off
+; GFX90A-SDAG-NEXT:    v_mov_b32_e32 v1, 0
+; GFX90A-SDAG-NEXT:    v_mov_b32_e32 v2, s17
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    v_lshlrev_b64 v[0:1], 2, v[0:1]
+; GFX90A-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, s16, v0
+; GFX90A-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range_too_large:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dword v0, v[0:1], off
+; GFX9-4-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v1, 0
+; GFX9-4-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v2, s0
+; GFX9-4-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v3, s1
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 2, v[2:3]
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range_too_large:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    global_load_dword v0, v[0:1], off
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v1, 0
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v2, s0
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v3, s1
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 2, v[2:3]
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range_too_large:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    global_load_dword v0, v[0:1], off
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v1, 0
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v2, s0
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v3, s1
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 2, v[2:3]
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range_too_large:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dword v0, v[0:1], off
+; GFX10-1-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v1, 0
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_lshlrev_b64 v[0:1], 2, v[0:1]
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, s16, v0
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, s17, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range_too_large:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    global_load_dword v0, v[0:1], off
+; GFX1012-SDAG-NEXT:    v_mov_b32_e32 v1, 0
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    v_lshlrev_b64 v[0:1], 2, v[0:1]
+; GFX1012-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, s16, v0
+; GFX1012-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, s17, v1, vcc_lo
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range_too_large:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dword v0, v[0:1], off
+; GFX10-3-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v1, 0
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_lshlrev_b64 v[0:1], 2, v[0:1]
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, s16, v0
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, s17, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range_too_large:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b32 v0, v[0:1], off
+; GFX11-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v1, 0
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT:    v_lshlrev_b64 v[0:1], 2, v[0:1]
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, s0, v0
+; GFX11-GENERIC-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, s1, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range_too_large:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    global_load_b32 v0, v[0:1], off
+; GFX1250-SDAG-NEXT:    s_wait_xcnt 0x0
+; GFX1250-SDAG-NEXT:    v_mov_b32_e32 v1, 0
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 2, s[0:1]
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range_too_large:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b32 v0, v[0:1], off
+; GFX12-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v1, 0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX12-GENERIC-SDAG-NEXT:    v_lshlrev_b64_e32 v[0:1], 2, v[0:1]
+; GFX12-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, s0, v0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX12-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, s1, v1, vcc_lo
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range_too_large:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dword v0, v[0:1], off
+; GFX9-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX9-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v2, s16
+; GFX9-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v3, s17
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    v_lshlrev_b64 v[0:1], 2, v[0:1]
+; GFX9-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v2, v0
+; GFX9-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, v3, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range_too_large:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    global_load_dword v0, v[0:1], off
+; GFX906-ISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX906-ISEL-NEXT:    v_mov_b32_e32 v2, s16
+; GFX906-ISEL-NEXT:    v_mov_b32_e32 v3, s17
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    v_lshlrev_b64 v[0:1], 2, v[0:1]
+; GFX906-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v2, v0
+; GFX906-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, v3, v1, vcc
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range_too_large:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    global_load_dword v0, v[0:1], off
+; GFX908-ISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX908-ISEL-NEXT:    v_mov_b32_e32 v2, s16
+; GFX908-ISEL-NEXT:    v_mov_b32_e32 v3, s17
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    v_lshlrev_b64 v[0:1], 2, v[0:1]
+; GFX908-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v2, v0
+; GFX908-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, v3, v1, vcc
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range_too_large:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    global_load_dword v0, v[0:1], off
+; GFX90A-ISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX90A-ISEL-NEXT:    v_pk_mov_b32 v[2:3], s[16:17], s[16:17] op_sel:[0,1]
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    v_lshlrev_b64 v[0:1], 2, v[0:1]
+; GFX90A-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v2, v0
+; GFX90A-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, v3, v1, vcc
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range_too_large:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dword v0, v[0:1], off
+; GFX9-4-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 2, s[0:1]
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range_too_large:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    global_load_dword v0, v[0:1], off
+; GFX942-ISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 2, s[0:1]
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range_too_large:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    global_load_dword v0, v[0:1], off
+; GFX950-ISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 2, s[0:1]
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range_too_large:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dword v0, v[0:1], off
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v2, s16
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v3, s17
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    v_lshlrev_b64 v[0:1], 2, v[0:1]
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v2, v0
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, v3, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range_too_large:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    global_load_dword v0, v[0:1], off
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v2, s16
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v3, s17
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    v_lshlrev_b64 v[0:1], 2, v[0:1]
+; GFX1012-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v2, v0
+; GFX1012-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, v3, v1, vcc_lo
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range_too_large:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dword v0, v[0:1], off
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v2, s16
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v3, s17
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    v_lshlrev_b64 v[0:1], 2, v[0:1]
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v2, v0
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, v3, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range_too_large:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b32 v0, v[0:1], off
+; GFX11-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX11-GENERIC-ISEL-NEXT:    v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT:    v_lshlrev_b64 v[0:1], 2, v[0:1]
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v2, v0
+; GFX11-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, v3, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range_too_large:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    global_load_b32 v0, v[0:1], off
+; GFX1250-ISEL-NEXT:    s_wait_xcnt 0x0
+; GFX1250-ISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-ISEL-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 2, s[0:1]
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range_too_large:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b32 v0, v[0:1], off
+; GFX12-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX12-GENERIC-ISEL-NEXT:    v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX12-GENERIC-ISEL-NEXT:    v_lshlrev_b64_e32 v[0:1], 2, v[0:1]
+; GFX12-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v2, v0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX12-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, v3, v1, vcc_lo
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %voffset = load i32, ptr addrspace(1) %voffset.ptr, !range !5, !noundef !{}
+  %zext.offset = zext i32 %voffset to i64
+  %gep = getelementptr inbounds float, ptr addrspace(1) %sbase, i64 %zext.offset
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep, metadata !0)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+;;------------------------------------------------------------------------------
+;; or-with-constant as add
+;;------------------------------------------------------------------------------
+
+;; Check add-as-or with split 64-bit or.
+define <4 x float> @global_load_saddr_i8_offset_or_i64_imm_offset_16(ptr addrspace(6) inreg %sbase, i32 %idx) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_16:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    v_or_b32_e32 v0, 16, v0
+; GFX9-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v1, 0
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_16:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    v_or_b32_e32 v0, 16, v0
+; GFX906-SDAG-NEXT:    v_mov_b32_e32 v1, 0
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_16:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    v_or_b32_e32 v0, 16, v0
+; GFX908-SDAG-NEXT:    v_mov_b32_e32 v1, 0
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_16:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    v_or_b32_e32 v0, 16, v0
+; GFX90A-SDAG-NEXT:    v_mov_b32_e32 v1, 0
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_16:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    v_or_b32_e32 v0, 16, v0
+; GFX9-4-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v1, 0
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_16:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    v_or_b32_e32 v0, 16, v0
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v1, 0
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_16:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    v_or_b32_e32 v0, 16, v0
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v1, 0
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_16:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v1, 0
+; GFX10-1-GENERIC-SDAG-NEXT:    v_or_b32_e32 v0, 16, v0
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_16:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    v_mov_b32_e32 v1, 0
+; GFX1012-SDAG-NEXT:    v_or_b32_e32 v0, 16, v0
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_16:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v1, 0
+; GFX10-3-GENERIC-SDAG-NEXT:    v_or_b32_e32 v0, 16, v0
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_16:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v1, 0
+; GFX11-GENERIC-SDAG-NEXT:    v_or_b32_e32 v0, 16, v0
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_16:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_bitop2_b32 v0, 16, v0 bitop3:0x54
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_16:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v1, 0
+; GFX12-GENERIC-SDAG-NEXT:    v_or_b32_e32 v0, 16, v0
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SE
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_16:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    v_or_b32_e32 v0, 16, v0
+; GFX9-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_16:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    v_or_b32_e32 v0, 16, v0
+; GFX906-ISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_16:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    v_or_b32_e32 v0, 16, v0
+; GFX908-ISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_16:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    v_or_b32_e32 v0, 16, v0
+; GFX90A-ISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_16:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    v_or_b32_e32 v0, 16, v0
+; GFX9-4-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_16:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    v_or_b32_e32 v0, 16, v0
+; GFX942-ISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_16:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    v_or_b32_e32 v0, 16, v0
+; GFX950-ISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_16:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX10-1-GENERIC-ISEL-NEXT:    v_or_b32_e32 v0, 16, v0
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_16:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX1012-ISEL-NEXT:    v_or_b32_e32 v0, 16, v0
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_16:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX10-3-GENERIC-ISEL-NEXT:    v_or_b32_e32 v0, 16, v0
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_16:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX11-GENERIC-ISEL-NEXT:    v_or_b32_e32 v0, 16, v0
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_16:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_bitop2_b32 v0, 16, v0 bitop3:0x54
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_16:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX12-GENERIC-ISEL-NEXT:    v_or_b32_e32 v0, 16, v0
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SE
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %zext.idx = zext i32 %idx to i64
+  %or = or i64 %zext.idx, 16
+  %addr = inttoptr i64 %or to ptr addrspace(1)
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %addr, metadata !1)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+define <4 x float> @global_load_saddr_i8_offset_or_i64_imm_offset_4160(ptr addrspace(6) inreg %sbase, i32 %idx) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_4160:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    v_or_b32_e32 v0, 0x1040, v0
+; GFX9-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v1, 0
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_4160:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    v_or_b32_e32 v0, 0x1040, v0
+; GFX906-SDAG-NEXT:    v_mov_b32_e32 v1, 0
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_4160:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    v_or_b32_e32 v0, 0x1040, v0
+; GFX908-SDAG-NEXT:    v_mov_b32_e32 v1, 0
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_4160:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    v_or_b32_e32 v0, 0x1040, v0
+; GFX90A-SDAG-NEXT:    v_mov_b32_e32 v1, 0
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_4160:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    v_or_b32_e32 v0, 0x1040, v0
+; GFX9-4-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v1, 0
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc1
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_4160:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    v_or_b32_e32 v0, 0x1040, v0
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v1, 0
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc1
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_4160:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    v_or_b32_e32 v0, 0x1040, v0
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v1, 0
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc1
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_4160:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v1, 0
+; GFX10-1-GENERIC-SDAG-NEXT:    v_or_b32_e32 v0, 0x1040, v0
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_4160:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    v_mov_b32_e32 v1, 0
+; GFX1012-SDAG-NEXT:    v_or_b32_e32 v0, 0x1040, v0
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_4160:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v1, 0
+; GFX10-3-GENERIC-SDAG-NEXT:    v_or_b32_e32 v0, 0x1040, v0
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_4160:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v1, 0
+; GFX11-GENERIC-SDAG-NEXT:    v_or_b32_e32 v0, 0x1040, v0
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_4160:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    v_mov_b32_e32 v1, 0
+; GFX1250-SDAG-NEXT:    v_or_b32_e32 v0, 0x1040, v0
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off scope:SCOPE_DEV
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_4160:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v1, 0
+; GFX12-GENERIC-SDAG-NEXT:    v_or_b32_e32 v0, 0x1040, v0
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v[0:1], off scope:SCOPE_DEV
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_4160:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    v_or_b32_e32 v0, 0x1040, v0
+; GFX9-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_4160:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    v_or_b32_e32 v0, 0x1040, v0
+; GFX906-ISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_4160:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    v_or_b32_e32 v0, 0x1040, v0
+; GFX908-ISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_4160:
+; GFX90A-ISEL:       ; %bb.0:
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    v_or_b32_e32 v0, 0x1040, v0
+; GFX90A-ISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_4160:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    v_or_b32_e32 v0, 0x1040, v0
+; GFX9-4-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc1
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_4160:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    v_or_b32_e32 v0, 0x1040, v0
+; GFX942-ISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc1
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_4160:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    v_or_b32_e32 v0, 0x1040, v0
+; GFX950-ISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc1
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_4160:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX10-1-GENERIC-ISEL-NEXT:    v_or_b32_e32 v0, 0x1040, v0
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_4160:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX1012-ISEL-NEXT:    v_or_b32_e32 v0, 0x1040, v0
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_4160:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX10-3-GENERIC-ISEL-NEXT:    v_or_b32_e32 v0, 0x1040, v0
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_4160:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX11-GENERIC-ISEL-NEXT:    v_or_b32_e32 v0, 0x1040, v0
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_4160:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX1250-ISEL-NEXT:    v_or_b32_e32 v0, 0x1040, v0
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off scope:SCOPE_DEV
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_4160:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX12-GENERIC-ISEL-NEXT:    v_or_b32_e32 v0, 0x1040, v0
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off scope:SCOPE_DEV
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %zext.idx = zext i32 %idx to i64
+  %or = or i64 %zext.idx, 4160
+  %addr = inttoptr i64 %or to ptr addrspace(1)
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %addr, metadata !2)
+  %cast.load = bitcast <4 x i32> %load to <4 x float>
+  ret <4 x float> %cast.load
+}
+
+;;------------------------------------------------------------------------------
+;; Full 64-bit scalar add.
+;;------------------------------------------------------------------------------
+define <4 x float> @global_saddr_64bit_lsr_iv(ptr addrspace(1) inreg %arg) {
+; GFX9-GENERIC-SDAG-LABEL: global_saddr_64bit_lsr_iv:
+; GFX9-GENERIC-SDAG:       ; %bb.0: ; %bb
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_mov_b32 s4, -1
+; GFX9-GENERIC-SDAG-NEXT:  .LBB114_1: ; %bb3
+; GFX9-GENERIC-SDAG-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX9-GENERIC-SDAG-NEXT:    s_add_i32 s4, s4, 1
+; GFX9-GENERIC-SDAG-NEXT:    s_cmpk_eq_i32 s4, 0xff
+; GFX9-GENERIC-SDAG-NEXT:    s_cbranch_scc0 .LBB114_1
+; GFX9-GENERIC-SDAG-NEXT:  ; %bb.2: ; %bb2
+; GFX9-GENERIC-SDAG-NEXT:    s_mov_b32 s5, 0
+; GFX9-GENERIC-SDAG-NEXT:    s_lshl_b64 s[4:5], s[4:5], 2
+; GFX9-GENERIC-SDAG-NEXT:    s_add_u32 s4, s16, s4
+; GFX9-GENERIC-SDAG-NEXT:    s_addc_u32 s5, s17, s5
+; GFX9-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[4:5] glc
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_saddr_64bit_lsr_iv:
+; GFX906-SDAG:       ; %bb.0: ; %bb
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    s_mov_b32 s4, -1
+; GFX906-SDAG-NEXT:  .LBB114_1: ; %bb3
+; GFX906-SDAG-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX906-SDAG-NEXT:    s_add_i32 s4, s4, 1
+; GFX906-SDAG-NEXT:    s_cmpk_eq_i32 s4, 0xff
+; GFX906-SDAG-NEXT:    s_cbranch_scc0 .LBB114_1
+; GFX906-SDAG-NEXT:  ; %bb.2: ; %bb2
+; GFX906-SDAG-NEXT:    s_mov_b32 s5, 0
+; GFX906-SDAG-NEXT:    s_lshl_b64 s[4:5], s[4:5], 2
+; GFX906-SDAG-NEXT:    s_add_u32 s4, s16, s4
+; GFX906-SDAG-NEXT:    s_addc_u32 s5, s17, s5
+; GFX906-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[4:5] glc
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_saddr_64bit_lsr_iv:
+; GFX908-SDAG:       ; %bb.0: ; %bb
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    s_mov_b32 s4, -1
+; GFX908-SDAG-NEXT:  .LBB114_1: ; %bb3
+; GFX908-SDAG-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX908-SDAG-NEXT:    s_add_i32 s4, s4, 1
+; GFX908-SDAG-NEXT:    s_cmpk_eq_i32 s4, 0xff
+; GFX908-SDAG-NEXT:    s_cbranch_scc0 .LBB114_1
+; GFX908-SDAG-NEXT:  ; %bb.2: ; %bb2
+; GFX908-SDAG-NEXT:    s_mov_b32 s5, 0
+; GFX908-SDAG-NEXT:    s_lshl_b64 s[4:5], s[4:5], 2
+; GFX908-SDAG-NEXT:    s_add_u32 s4, s16, s4
+; GFX908-SDAG-NEXT:    s_addc_u32 s5, s17, s5
+; GFX908-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[4:5] glc
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_saddr_64bit_lsr_iv:
+; GFX90A-SDAG:       ; %bb.0: ; %bb
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    s_mov_b32 s4, -1
+; GFX90A-SDAG-NEXT:  .LBB114_1: ; %bb3
+; GFX90A-SDAG-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX90A-SDAG-NEXT:    s_add_i32 s4, s4, 1
+; GFX90A-SDAG-NEXT:    s_cmpk_eq_i32 s4, 0xff
+; GFX90A-SDAG-NEXT:    s_cbranch_scc0 .LBB114_1
+; GFX90A-SDAG-NEXT:  ; %bb.2: ; %bb2
+; GFX90A-SDAG-NEXT:    s_mov_b32 s5, 0
+; GFX90A-SDAG-NEXT:    s_lshl_b64 s[4:5], s[4:5], 2
+; GFX90A-SDAG-NEXT:    s_add_u32 s4, s16, s4
+; GFX90A-SDAG-NEXT:    s_addc_u32 s5, s17, s5
+; GFX90A-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[4:5] glc
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_saddr_64bit_lsr_iv:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0: ; %bb
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_mov_b32 s2, -1
+; GFX9-4-GENERIC-SDAG-NEXT:  .LBB114_1: ; %bb3
+; GFX9-4-GENERIC-SDAG-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX9-4-GENERIC-SDAG-NEXT:    s_add_i32 s2, s2, 1
+; GFX9-4-GENERIC-SDAG-NEXT:    s_cmpk_eq_i32 s2, 0xff
+; GFX9-4-GENERIC-SDAG-NEXT:    s_cbranch_scc0 .LBB114_1
+; GFX9-4-GENERIC-SDAG-NEXT:  ; %bb.2: ; %bb2
+; GFX9-4-GENERIC-SDAG-NEXT:    s_mov_b32 s3, 0
+; GFX9-4-GENERIC-SDAG-NEXT:    s_lshl_b64 s[2:3], s[2:3], 2
+; GFX9-4-GENERIC-SDAG-NEXT:    s_add_u32 s0, s0, s2
+; GFX9-4-GENERIC-SDAG-NEXT:    s_addc_u32 s1, s1, s3
+; GFX9-4-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] sc0 sc1
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_saddr_64bit_lsr_iv:
+; GFX942-SDAG:       ; %bb.0: ; %bb
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    s_mov_b32 s2, -1
+; GFX942-SDAG-NEXT:  .LBB114_1: ; %bb3
+; GFX942-SDAG-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-SDAG-NEXT:    s_add_i32 s2, s2, 1
+; GFX942-SDAG-NEXT:    s_cmpk_eq_i32 s2, 0xff
+; GFX942-SDAG-NEXT:    s_cbranch_scc0 .LBB114_1
+; GFX942-SDAG-NEXT:  ; %bb.2: ; %bb2
+; GFX942-SDAG-NEXT:    s_mov_b32 s3, 0
+; GFX942-SDAG-NEXT:    s_lshl_b64 s[2:3], s[2:3], 2
+; GFX942-SDAG-NEXT:    s_add_u32 s0, s0, s2
+; GFX942-SDAG-NEXT:    s_addc_u32 s1, s1, s3
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] sc0 sc1
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_saddr_64bit_lsr_iv:
+; GFX950-SDAG:       ; %bb.0: ; %bb
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    s_mov_b32 s2, -1
+; GFX950-SDAG-NEXT:  .LBB114_1: ; %bb3
+; GFX950-SDAG-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX950-SDAG-NEXT:    s_add_i32 s2, s2, 1
+; GFX950-SDAG-NEXT:    s_cmpk_eq_i32 s2, 0xff
+; GFX950-SDAG-NEXT:    s_cbranch_scc0 .LBB114_1
+; GFX950-SDAG-NEXT:  ; %bb.2: ; %bb2
+; GFX950-SDAG-NEXT:    s_mov_b32 s3, 0
+; GFX950-SDAG-NEXT:    s_lshl_b64 s[2:3], s[2:3], 2
+; GFX950-SDAG-NEXT:    s_add_u32 s0, s0, s2
+; GFX950-SDAG-NEXT:    s_addc_u32 s1, s1, s3
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1] sc0 sc1
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_saddr_64bit_lsr_iv:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0: ; %bb
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_mov_b32 s4, -1
+; GFX10-1-GENERIC-SDAG-NEXT:  .LBB114_1: ; %bb3
+; GFX10-1-GENERIC-SDAG-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX10-1-GENERIC-SDAG-NEXT:    s_add_i32 s4, s4, 1
+; GFX10-1-GENERIC-SDAG-NEXT:    s_cmpk_eq_i32 s4, 0xff
+; GFX10-1-GENERIC-SDAG-NEXT:    s_cbranch_scc0 .LBB114_1
+; GFX10-1-GENERIC-SDAG-NEXT:  ; %bb.2: ; %bb2
+; GFX10-1-GENERIC-SDAG-NEXT:    s_mov_b32 s5, 0
+; GFX10-1-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX10-1-GENERIC-SDAG-NEXT:    s_lshl_b64 s[4:5], s[4:5], 2
+; GFX10-1-GENERIC-SDAG-NEXT:    s_add_u32 s4, s16, s4
+; GFX10-1-GENERIC-SDAG-NEXT:    s_addc_u32 s5, s17, s5
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[4:5] glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_saddr_64bit_lsr_iv:
+; GFX1012-SDAG:       ; %bb.0: ; %bb
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    s_mov_b32 s4, -1
+; GFX1012-SDAG-NEXT:  .LBB114_1: ; %bb3
+; GFX1012-SDAG-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX1012-SDAG-NEXT:    s_add_i32 s4, s4, 1
+; GFX1012-SDAG-NEXT:    s_cmpk_eq_i32 s4, 0xff
+; GFX1012-SDAG-NEXT:    s_cbranch_scc0 .LBB114_1
+; GFX1012-SDAG-NEXT:  ; %bb.2: ; %bb2
+; GFX1012-SDAG-NEXT:    s_mov_b32 s5, 0
+; GFX1012-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX1012-SDAG-NEXT:    s_lshl_b64 s[4:5], s[4:5], 2
+; GFX1012-SDAG-NEXT:    s_add_u32 s4, s16, s4
+; GFX1012-SDAG-NEXT:    s_addc_u32 s5, s17, s5
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[4:5] glc dlc
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_saddr_64bit_lsr_iv:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0: ; %bb
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_mov_b32 s4, -1
+; GFX10-3-GENERIC-SDAG-NEXT:  .LBB114_1: ; %bb3
+; GFX10-3-GENERIC-SDAG-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX10-3-GENERIC-SDAG-NEXT:    s_add_i32 s4, s4, 1
+; GFX10-3-GENERIC-SDAG-NEXT:    s_cmpk_eq_i32 s4, 0xff
+; GFX10-3-GENERIC-SDAG-NEXT:    s_cbranch_scc0 .LBB114_1
+; GFX10-3-GENERIC-SDAG-NEXT:  ; %bb.2: ; %bb2
+; GFX10-3-GENERIC-SDAG-NEXT:    s_mov_b32 s5, 0
+; GFX10-3-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX10-3-GENERIC-SDAG-NEXT:    s_lshl_b64 s[4:5], s[4:5], 2
+; GFX10-3-GENERIC-SDAG-NEXT:    s_add_u32 s4, s16, s4
+; GFX10-3-GENERIC-SDAG-NEXT:    s_addc_u32 s5, s17, s5
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[4:5] glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_saddr_64bit_lsr_iv:
+; GFX11-GENERIC-SDAG:       ; %bb.0: ; %bb
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_mov_b32 s2, -1
+; GFX11-GENERIC-SDAG-NEXT:  .LBB114_1: ; %bb3
+; GFX11-GENERIC-SDAG-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX11-GENERIC-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-GENERIC-SDAG-NEXT:    s_add_i32 s2, s2, 1
+; GFX11-GENERIC-SDAG-NEXT:    s_cmpk_eq_i32 s2, 0xff
+; GFX11-GENERIC-SDAG-NEXT:    s_cbranch_scc0 .LBB114_1
+; GFX11-GENERIC-SDAG-NEXT:  ; %bb.2: ; %bb2
+; GFX11-GENERIC-SDAG-NEXT:    s_mov_b32 s3, 0
+; GFX11-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX11-GENERIC-SDAG-NEXT:    s_lshl_b64 s[2:3], s[2:3], 2
+; GFX11-GENERIC-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-GENERIC-SDAG-NEXT:    s_add_u32 s0, s0, s2
+; GFX11-GENERIC-SDAG-NEXT:    s_addc_u32 s1, s1, s3
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] glc
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_saddr_64bit_lsr_iv:
+; GFX1250-SDAG:       ; %bb.0: ; %bb
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    s_mov_b32 s2, -1
+; GFX1250-SDAG-NEXT:  .LBB114_1: ; %bb3
+; GFX1250-SDAG-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX1250-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX1250-SDAG-NEXT:    s_add_co_i32 s2, s2, 1
+; GFX1250-SDAG-NEXT:    s_cmp_eq_u32 s2, 0xff
+; GFX1250-SDAG-NEXT:    s_cbranch_scc0 .LBB114_1
+; GFX1250-SDAG-NEXT:  ; %bb.2: ; %bb2
+; GFX1250-SDAG-NEXT:    s_mov_b32 s3, 0
+; GFX1250-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX1250-SDAG-NEXT:    s_lshl_b64 s[2:3], s[2:3], 2
+; GFX1250-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-SDAG-NEXT:    s_add_nc_u64 s[0:1], s[0:1], s[2:3]
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_SYS
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_saddr_64bit_lsr_iv:
+; GFX12-GENERIC-SDAG:       ; %bb.0: ; %bb
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_mov_b32 s2, -1
+; GFX12-GENERIC-SDAG-NEXT:  .LBB114_1: ; %bb3
+; GFX12-GENERIC-SDAG-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_alu depctr_sa_sdst(0)
+; GFX12-GENERIC-SDAG-NEXT:    s_add_co_i32 s2, s2, 1
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_alu depctr_sa_sdst(0)
+; GFX12-GENERIC-SDAG-NEXT:    s_cmp_eq_u32 s2, 0xff
+; GFX12-GENERIC-SDAG-NEXT:    s_cbranch_scc0 .LBB114_1
+; GFX12-GENERIC-SDAG-NEXT:  ; %bb.2: ; %bb2
+; GFX12-GENERIC-SDAG-NEXT:    s_mov_b32 s3, 0
+; GFX12-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_alu depctr_sa_sdst(0)
+; GFX12-GENERIC-SDAG-NEXT:    s_lshl_b64 s[2:3], s[2:3], 2
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_alu depctr_sa_sdst(0)
+; GFX12-GENERIC-SDAG-NEXT:    s_add_nc_u64 s[0:1], s[0:1], s[2:3]
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_SYS
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_alu depctr_sa_sdst(0)
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_saddr_64bit_lsr_iv:
+; GFX9-GENERIC-ISEL:       ; %bb.0: ; %bb
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_mov_b32 s4, -1
+; GFX9-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, 0xff
+; GFX9-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, s4
+; GFX9-GENERIC-ISEL-NEXT:  .LBB114_1: ; %bb3
+; GFX9-GENERIC-ISEL-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX9-GENERIC-ISEL-NEXT:    v_add_u32_e32 v0, 1, v0
+; GFX9-GENERIC-ISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v1
+; GFX9-GENERIC-ISEL-NEXT:    s_cbranch_vccz .LBB114_1
+; GFX9-GENERIC-ISEL-NEXT:  ; %bb.2: ; %bb2
+; GFX9-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX9-GENERIC-ISEL-NEXT:    v_lshlrev_b64 v[0:1], 2, v[0:1]
+; GFX9-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v2, s16
+; GFX9-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v3, s17
+; GFX9-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v2, v0
+; GFX9-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, v3, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_saddr_64bit_lsr_iv:
+; GFX906-ISEL:       ; %bb.0: ; %bb
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    s_mov_b32 s4, -1
+; GFX906-ISEL-NEXT:    v_mov_b32_e32 v1, 0xff
+; GFX906-ISEL-NEXT:    v_mov_b32_e32 v0, s4
+; GFX906-ISEL-NEXT:  .LBB114_1: ; %bb3
+; GFX906-ISEL-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX906-ISEL-NEXT:    v_add_u32_e32 v0, 1, v0
+; GFX906-ISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v1
+; GFX906-ISEL-NEXT:    s_cbranch_vccz .LBB114_1
+; GFX906-ISEL-NEXT:  ; %bb.2: ; %bb2
+; GFX906-ISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX906-ISEL-NEXT:    v_lshlrev_b64 v[0:1], 2, v[0:1]
+; GFX906-ISEL-NEXT:    v_mov_b32_e32 v2, s16
+; GFX906-ISEL-NEXT:    v_mov_b32_e32 v3, s17
+; GFX906-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v2, v0
+; GFX906-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, v3, v1, vcc
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_saddr_64bit_lsr_iv:
+; GFX908-ISEL:       ; %bb.0: ; %bb
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    s_mov_b32 s4, -1
+; GFX908-ISEL-NEXT:    v_mov_b32_e32 v1, 0xff
+; GFX908-ISEL-NEXT:    v_mov_b32_e32 v0, s4
+; GFX908-ISEL-NEXT:  .LBB114_1: ; %bb3
+; GFX908-ISEL-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX908-ISEL-NEXT:    v_add_u32_e32 v0, 1, v0
+; GFX908-ISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v1
+; GFX908-ISEL-NEXT:    s_cbranch_vccz .LBB114_1
+; GFX908-ISEL-NEXT:  ; %bb.2: ; %bb2
+; GFX908-ISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX908-ISEL-NEXT:    v_lshlrev_b64 v[0:1], 2, v[0:1]
+; GFX908-ISEL-NEXT:    v_mov_b32_e32 v2, s16
+; GFX908-ISEL-NEXT:    v_mov_b32_e32 v3, s17
+; GFX908-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v2, v0
+; GFX908-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, v3, v1, vcc
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_saddr_64bit_lsr_iv:
+; GFX90A-ISEL:       ; %bb.0: ; %bb
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    s_mov_b32 s4, -1
+; GFX90A-ISEL-NEXT:    v_mov_b32_e32 v1, 0xff
+; GFX90A-ISEL-NEXT:    v_mov_b32_e32 v0, s4
+; GFX90A-ISEL-NEXT:  .LBB114_1: ; %bb3
+; GFX90A-ISEL-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX90A-ISEL-NEXT:    v_add_u32_e32 v0, 1, v0
+; GFX90A-ISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v1
+; GFX90A-ISEL-NEXT:    s_cbranch_vccz .LBB114_1
+; GFX90A-ISEL-NEXT:  ; %bb.2: ; %bb2
+; GFX90A-ISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX90A-ISEL-NEXT:    v_lshlrev_b64 v[0:1], 2, v[0:1]
+; GFX90A-ISEL-NEXT:    v_pk_mov_b32 v[2:3], s[16:17], s[16:17] op_sel:[0,1]
+; GFX90A-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v2, v0
+; GFX90A-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, v3, v1, vcc
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_saddr_64bit_lsr_iv:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0: ; %bb
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_mov_b32 s2, -1
+; GFX9-4-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, 0xff
+; GFX9-4-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, s2
+; GFX9-4-GENERIC-ISEL-NEXT:  .LBB114_1: ; %bb3
+; GFX9-4-GENERIC-ISEL-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX9-4-GENERIC-ISEL-NEXT:    v_add_u32_e32 v0, 1, v0
+; GFX9-4-GENERIC-ISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v1
+; GFX9-4-GENERIC-ISEL-NEXT:    s_cbranch_vccz .LBB114_1
+; GFX9-4-GENERIC-ISEL-NEXT:  ; %bb.2: ; %bb2
+; GFX9-4-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX9-4-GENERIC-ISEL-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 2, s[0:1]
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_saddr_64bit_lsr_iv:
+; GFX942-ISEL:       ; %bb.0: ; %bb
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    s_mov_b32 s2, -1
+; GFX942-ISEL-NEXT:    v_mov_b32_e32 v1, 0xff
+; GFX942-ISEL-NEXT:    v_mov_b32_e32 v0, s2
+; GFX942-ISEL-NEXT:  .LBB114_1: ; %bb3
+; GFX942-ISEL-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-ISEL-NEXT:    v_add_u32_e32 v0, 1, v0
+; GFX942-ISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v1
+; GFX942-ISEL-NEXT:    s_cbranch_vccz .LBB114_1
+; GFX942-ISEL-NEXT:  ; %bb.2: ; %bb2
+; GFX942-ISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX942-ISEL-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 2, s[0:1]
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_saddr_64bit_lsr_iv:
+; GFX950-ISEL:       ; %bb.0: ; %bb
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    s_mov_b32 s2, -1
+; GFX950-ISEL-NEXT:    v_mov_b32_e32 v1, 0xff
+; GFX950-ISEL-NEXT:    v_mov_b32_e32 v0, s2
+; GFX950-ISEL-NEXT:  .LBB114_1: ; %bb3
+; GFX950-ISEL-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX950-ISEL-NEXT:    v_add_u32_e32 v0, 1, v0
+; GFX950-ISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v1
+; GFX950-ISEL-NEXT:    s_cbranch_vccz .LBB114_1
+; GFX950-ISEL-NEXT:  ; %bb.2: ; %bb2
+; GFX950-ISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX950-ISEL-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 2, s[0:1]
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_saddr_64bit_lsr_iv:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0: ; %bb
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_mov_b32 s4, -1
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, s4
+; GFX10-1-GENERIC-ISEL-NEXT:  .LBB114_1: ; %bb3
+; GFX10-1-GENERIC-ISEL-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_nc_u32_e32 v0, 1, v0
+; GFX10-1-GENERIC-ISEL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0xff, v0
+; GFX10-1-GENERIC-ISEL-NEXT:    s_cbranch_vccz .LBB114_1
+; GFX10-1-GENERIC-ISEL-NEXT:  ; %bb.2: ; %bb2
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v2, s16
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v3, s17
+; GFX10-1-GENERIC-ISEL-NEXT:    v_lshlrev_b64 v[0:1], 2, v[0:1]
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v2, v0
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, v3, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_saddr_64bit_lsr_iv:
+; GFX1012-ISEL:       ; %bb.0: ; %bb
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    s_mov_b32 s4, -1
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v0, s4
+; GFX1012-ISEL-NEXT:  .LBB114_1: ; %bb3
+; GFX1012-ISEL-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX1012-ISEL-NEXT:    v_add_nc_u32_e32 v0, 1, v0
+; GFX1012-ISEL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0xff, v0
+; GFX1012-ISEL-NEXT:    s_cbranch_vccz .LBB114_1
+; GFX1012-ISEL-NEXT:  ; %bb.2: ; %bb2
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v2, s16
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v3, s17
+; GFX1012-ISEL-NEXT:    v_lshlrev_b64 v[0:1], 2, v[0:1]
+; GFX1012-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v2, v0
+; GFX1012-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, v3, v1, vcc_lo
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_saddr_64bit_lsr_iv:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0: ; %bb
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_mov_b32 s4, -1
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, s4
+; GFX10-3-GENERIC-ISEL-NEXT:  .LBB114_1: ; %bb3
+; GFX10-3-GENERIC-ISEL-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_nc_u32_e32 v0, 1, v0
+; GFX10-3-GENERIC-ISEL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0xff, v0
+; GFX10-3-GENERIC-ISEL-NEXT:    s_cbranch_vccz .LBB114_1
+; GFX10-3-GENERIC-ISEL-NEXT:  ; %bb.2: ; %bb2
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v2, s16
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v3, s17
+; GFX10-3-GENERIC-ISEL-NEXT:    v_lshlrev_b64 v[0:1], 2, v[0:1]
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v2, v0
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, v3, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_saddr_64bit_lsr_iv:
+; GFX11-GENERIC-ISEL:       ; %bb.0: ; %bb
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_mov_b32 s2, -1
+; GFX11-GENERIC-ISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, s2
+; GFX11-GENERIC-ISEL-NEXT:  .LBB114_1: ; %bb3
+; GFX11-GENERIC-ISEL-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX11-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_nc_u32_e32 v0, 1, v0
+; GFX11-GENERIC-ISEL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0xff, v0
+; GFX11-GENERIC-ISEL-NEXT:    s_cbranch_vccz .LBB114_1
+; GFX11-GENERIC-ISEL-NEXT:  ; %bb.2: ; %bb2
+; GFX11-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX11-GENERIC-ISEL-NEXT:    v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
+; GFX11-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT:    v_lshlrev_b64 v[0:1], 2, v[0:1]
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v2, v0
+; GFX11-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, v3, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_saddr_64bit_lsr_iv:
+; GFX1250-ISEL:       ; %bb.0: ; %bb
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    s_mov_b32 s2, -1
+; GFX1250-ISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-ISEL-NEXT:    v_mov_b32_e32 v0, s2
+; GFX1250-ISEL-NEXT:  .LBB114_1: ; %bb3
+; GFX1250-ISEL-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX1250-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250-ISEL-NEXT:    v_add_nc_u32_e32 v0, 1, v0
+; GFX1250-ISEL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0xff, v0
+; GFX1250-ISEL-NEXT:    s_cbranch_vccz .LBB114_1
+; GFX1250-ISEL-NEXT:  ; %bb.2: ; %bb2
+; GFX1250-ISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX1250-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-ISEL-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 2, s[0:1]
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_saddr_64bit_lsr_iv:
+; GFX12-GENERIC-ISEL:       ; %bb.0: ; %bb
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_mov_b32 s2, -1
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_alu depctr_sa_sdst(0)
+; GFX12-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, s2
+; GFX12-GENERIC-ISEL-NEXT:  .LBB114_1: ; %bb3
+; GFX12-GENERIC-ISEL-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX12-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX12-GENERIC-ISEL-NEXT:    v_add_nc_u32_e32 v0, 1, v0
+; GFX12-GENERIC-ISEL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0xff, v0
+; GFX12-GENERIC-ISEL-NEXT:    s_cbranch_vccz .LBB114_1
+; GFX12-GENERIC-ISEL-NEXT:  ; %bb.2: ; %bb2
+; GFX12-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX12-GENERIC-ISEL-NEXT:    v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
+; GFX12-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX12-GENERIC-ISEL-NEXT:    v_lshlrev_b64_e32 v[0:1], 2, v[0:1]
+; GFX12-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v2, v0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX12-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, v3, v1, vcc_lo
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+bb:
+  br label %bb3
+
+bb2:                                              ; preds = %bb3
+  ret <4 x float> %i6
+
+bb3:                                              ; preds = %bb3, %bb
+  %i = phi i32 [ 0, %bb ], [ %i8, %bb3 ]
+  %i4 = zext i32 %i to i64
+  %i5 = getelementptr inbounds float, ptr addrspace(1) %arg, i64 %i4
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %i5, metadata !3)
+  %i6 = bitcast <4 x i32> %load to <4 x float>
+  %i8 = add nuw nsw i32 %i, 1
+  %i9 = icmp eq i32 %i8, 256
+  br i1 %i9, label %bb2, label %bb3
+}
+
+;; Make sure we only have a single zero vaddr initialization.
+
+define <4 x float> @global_saddr_64bit_lsr_iv_multiload(ptr addrspace(1) inreg %arg, ptr addrspace(1) inreg %arg.1, i32 %x) {
+; GFX9-GENERIC-SDAG-LABEL: global_saddr_64bit_lsr_iv_multiload:
+; GFX9-GENERIC-SDAG:       ; %bb.0: ; %bb
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_mov_b32 s4, -1
+; GFX9-GENERIC-SDAG-NEXT:  .LBB115_1: ; %bb5
+; GFX9-GENERIC-SDAG-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX9-GENERIC-SDAG-NEXT:    s_add_i32 s4, s4, 1
+; GFX9-GENERIC-SDAG-NEXT:    s_cmpk_eq_i32 s4, 0xff
+; GFX9-GENERIC-SDAG-NEXT:    s_cbranch_scc0 .LBB115_1
+; GFX9-GENERIC-SDAG-NEXT:  ; %bb.2: ; %bb2
+; GFX9-GENERIC-SDAG-NEXT:    s_mov_b32 s5, 0
+; GFX9-GENERIC-SDAG-NEXT:    s_lshl_b64 s[4:5], s[4:5], 2
+; GFX9-GENERIC-SDAG-NEXT:    s_add_u32 s4, s16, s4
+; GFX9-GENERIC-SDAG-NEXT:    s_addc_u32 s5, s17, s5
+; GFX9-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[4:5]
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_saddr_64bit_lsr_iv_multiload:
+; GFX906-SDAG:       ; %bb.0: ; %bb
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    s_mov_b32 s4, -1
+; GFX906-SDAG-NEXT:  .LBB115_1: ; %bb5
+; GFX906-SDAG-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX906-SDAG-NEXT:    s_add_i32 s4, s4, 1
+; GFX906-SDAG-NEXT:    s_cmpk_eq_i32 s4, 0xff
+; GFX906-SDAG-NEXT:    s_cbranch_scc0 .LBB115_1
+; GFX906-SDAG-NEXT:  ; %bb.2: ; %bb2
+; GFX906-SDAG-NEXT:    s_mov_b32 s5, 0
+; GFX906-SDAG-NEXT:    s_lshl_b64 s[4:5], s[4:5], 2
+; GFX906-SDAG-NEXT:    s_add_u32 s4, s16, s4
+; GFX906-SDAG-NEXT:    s_addc_u32 s5, s17, s5
+; GFX906-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX906-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[4:5]
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_saddr_64bit_lsr_iv_multiload:
+; GFX908-SDAG:       ; %bb.0: ; %bb
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    s_mov_b32 s4, -1
+; GFX908-SDAG-NEXT:  .LBB115_1: ; %bb5
+; GFX908-SDAG-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX908-SDAG-NEXT:    s_add_i32 s4, s4, 1
+; GFX908-SDAG-NEXT:    s_cmpk_eq_i32 s4, 0xff
+; GFX908-SDAG-NEXT:    s_cbranch_scc0 .LBB115_1
+; GFX908-SDAG-NEXT:  ; %bb.2: ; %bb2
+; GFX908-SDAG-NEXT:    s_mov_b32 s5, 0
+; GFX908-SDAG-NEXT:    s_lshl_b64 s[4:5], s[4:5], 2
+; GFX908-SDAG-NEXT:    s_add_u32 s4, s16, s4
+; GFX908-SDAG-NEXT:    s_addc_u32 s5, s17, s5
+; GFX908-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX908-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[4:5]
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_saddr_64bit_lsr_iv_multiload:
+; GFX90A-SDAG:       ; %bb.0: ; %bb
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    s_mov_b32 s4, -1
+; GFX90A-SDAG-NEXT:  .LBB115_1: ; %bb5
+; GFX90A-SDAG-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX90A-SDAG-NEXT:    s_add_i32 s4, s4, 1
+; GFX90A-SDAG-NEXT:    s_cmpk_eq_i32 s4, 0xff
+; GFX90A-SDAG-NEXT:    s_cbranch_scc0 .LBB115_1
+; GFX90A-SDAG-NEXT:  ; %bb.2: ; %bb2
+; GFX90A-SDAG-NEXT:    s_mov_b32 s5, 0
+; GFX90A-SDAG-NEXT:    s_lshl_b64 s[4:5], s[4:5], 2
+; GFX90A-SDAG-NEXT:    s_add_u32 s4, s16, s4
+; GFX90A-SDAG-NEXT:    s_addc_u32 s5, s17, s5
+; GFX90A-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX90A-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[4:5]
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_saddr_64bit_lsr_iv_multiload:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0: ; %bb
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_mov_b32 s2, -1
+; GFX9-4-GENERIC-SDAG-NEXT:  .LBB115_1: ; %bb5
+; GFX9-4-GENERIC-SDAG-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX9-4-GENERIC-SDAG-NEXT:    s_add_i32 s2, s2, 1
+; GFX9-4-GENERIC-SDAG-NEXT:    s_cmpk_eq_i32 s2, 0xff
+; GFX9-4-GENERIC-SDAG-NEXT:    s_cbranch_scc0 .LBB115_1
+; GFX9-4-GENERIC-SDAG-NEXT:  ; %bb.2: ; %bb2
+; GFX9-4-GENERIC-SDAG-NEXT:    s_mov_b32 s3, 0
+; GFX9-4-GENERIC-SDAG-NEXT:    s_lshl_b64 s[2:3], s[2:3], 2
+; GFX9-4-GENERIC-SDAG-NEXT:    s_add_u32 s0, s0, s2
+; GFX9-4-GENERIC-SDAG-NEXT:    s_addc_u32 s1, s1, s3
+; GFX9-4-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1]
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_saddr_64bit_lsr_iv_multiload:
+; GFX942-SDAG:       ; %bb.0: ; %bb
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    s_mov_b32 s2, -1
+; GFX942-SDAG-NEXT:  .LBB115_1: ; %bb5
+; GFX942-SDAG-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-SDAG-NEXT:    s_add_i32 s2, s2, 1
+; GFX942-SDAG-NEXT:    s_cmpk_eq_i32 s2, 0xff
+; GFX942-SDAG-NEXT:    s_cbranch_scc0 .LBB115_1
+; GFX942-SDAG-NEXT:  ; %bb.2: ; %bb2
+; GFX942-SDAG-NEXT:    s_mov_b32 s3, 0
+; GFX942-SDAG-NEXT:    s_lshl_b64 s[2:3], s[2:3], 2
+; GFX942-SDAG-NEXT:    s_add_u32 s0, s0, s2
+; GFX942-SDAG-NEXT:    s_addc_u32 s1, s1, s3
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX942-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1]
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_saddr_64bit_lsr_iv_multiload:
+; GFX950-SDAG:       ; %bb.0: ; %bb
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    s_mov_b32 s2, -1
+; GFX950-SDAG-NEXT:  .LBB115_1: ; %bb5
+; GFX950-SDAG-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX950-SDAG-NEXT:    s_add_i32 s2, s2, 1
+; GFX950-SDAG-NEXT:    s_cmpk_eq_i32 s2, 0xff
+; GFX950-SDAG-NEXT:    s_cbranch_scc0 .LBB115_1
+; GFX950-SDAG-NEXT:  ; %bb.2: ; %bb2
+; GFX950-SDAG-NEXT:    s_mov_b32 s3, 0
+; GFX950-SDAG-NEXT:    s_lshl_b64 s[2:3], s[2:3], 2
+; GFX950-SDAG-NEXT:    s_add_u32 s0, s0, s2
+; GFX950-SDAG-NEXT:    s_addc_u32 s1, s1, s3
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX950-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1]
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_saddr_64bit_lsr_iv_multiload:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0: ; %bb
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_mov_b32 s4, -1
+; GFX10-1-GENERIC-SDAG-NEXT:  .LBB115_1: ; %bb5
+; GFX10-1-GENERIC-SDAG-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX10-1-GENERIC-SDAG-NEXT:    s_add_i32 s4, s4, 1
+; GFX10-1-GENERIC-SDAG-NEXT:    s_cmpk_eq_i32 s4, 0xff
+; GFX10-1-GENERIC-SDAG-NEXT:    s_cbranch_scc0 .LBB115_1
+; GFX10-1-GENERIC-SDAG-NEXT:  ; %bb.2: ; %bb2
+; GFX10-1-GENERIC-SDAG-NEXT:    s_mov_b32 s5, 0
+; GFX10-1-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX10-1-GENERIC-SDAG-NEXT:    s_lshl_b64 s[4:5], s[4:5], 2
+; GFX10-1-GENERIC-SDAG-NEXT:    s_add_u32 s4, s16, s4
+; GFX10-1-GENERIC-SDAG-NEXT:    s_addc_u32 s5, s17, s5
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[4:5]
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_saddr_64bit_lsr_iv_multiload:
+; GFX1012-SDAG:       ; %bb.0: ; %bb
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    s_mov_b32 s4, -1
+; GFX1012-SDAG-NEXT:  .LBB115_1: ; %bb5
+; GFX1012-SDAG-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX1012-SDAG-NEXT:    s_add_i32 s4, s4, 1
+; GFX1012-SDAG-NEXT:    s_cmpk_eq_i32 s4, 0xff
+; GFX1012-SDAG-NEXT:    s_cbranch_scc0 .LBB115_1
+; GFX1012-SDAG-NEXT:  ; %bb.2: ; %bb2
+; GFX1012-SDAG-NEXT:    s_mov_b32 s5, 0
+; GFX1012-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX1012-SDAG-NEXT:    s_lshl_b64 s[4:5], s[4:5], 2
+; GFX1012-SDAG-NEXT:    s_add_u32 s4, s16, s4
+; GFX1012-SDAG-NEXT:    s_addc_u32 s5, s17, s5
+; GFX1012-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[4:5]
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_saddr_64bit_lsr_iv_multiload:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0: ; %bb
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_mov_b32 s4, -1
+; GFX10-3-GENERIC-SDAG-NEXT:  .LBB115_1: ; %bb5
+; GFX10-3-GENERIC-SDAG-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX10-3-GENERIC-SDAG-NEXT:    s_add_i32 s4, s4, 1
+; GFX10-3-GENERIC-SDAG-NEXT:    s_cmpk_eq_i32 s4, 0xff
+; GFX10-3-GENERIC-SDAG-NEXT:    s_cbranch_scc0 .LBB115_1
+; GFX10-3-GENERIC-SDAG-NEXT:  ; %bb.2: ; %bb2
+; GFX10-3-GENERIC-SDAG-NEXT:    s_mov_b32 s5, 0
+; GFX10-3-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX10-3-GENERIC-SDAG-NEXT:    s_lshl_b64 s[4:5], s[4:5], 2
+; GFX10-3-GENERIC-SDAG-NEXT:    s_add_u32 s4, s16, s4
+; GFX10-3-GENERIC-SDAG-NEXT:    s_addc_u32 s5, s17, s5
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[4:5]
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_saddr_64bit_lsr_iv_multiload:
+; GFX11-GENERIC-SDAG:       ; %bb.0: ; %bb
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_mov_b32 s2, -1
+; GFX11-GENERIC-SDAG-NEXT:  .LBB115_1: ; %bb5
+; GFX11-GENERIC-SDAG-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX11-GENERIC-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-GENERIC-SDAG-NEXT:    s_add_i32 s2, s2, 1
+; GFX11-GENERIC-SDAG-NEXT:    s_cmpk_eq_i32 s2, 0xff
+; GFX11-GENERIC-SDAG-NEXT:    s_cbranch_scc0 .LBB115_1
+; GFX11-GENERIC-SDAG-NEXT:  ; %bb.2: ; %bb2
+; GFX11-GENERIC-SDAG-NEXT:    s_mov_b32 s3, 0
+; GFX11-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX11-GENERIC-SDAG-NEXT:    s_lshl_b64 s[2:3], s[2:3], 2
+; GFX11-GENERIC-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-GENERIC-SDAG-NEXT:    s_add_u32 s0, s0, s2
+; GFX11-GENERIC-SDAG-NEXT:    s_addc_u32 s1, s1, s3
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1]
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_saddr_64bit_lsr_iv_multiload:
+; GFX1250-SDAG:       ; %bb.0: ; %bb
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    s_mov_b32 s2, -1
+; GFX1250-SDAG-NEXT:  .LBB115_1: ; %bb5
+; GFX1250-SDAG-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX1250-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX1250-SDAG-NEXT:    s_add_co_i32 s2, s2, 1
+; GFX1250-SDAG-NEXT:    s_cmp_eq_u32 s2, 0xff
+; GFX1250-SDAG-NEXT:    s_cbranch_scc0 .LBB115_1
+; GFX1250-SDAG-NEXT:  ; %bb.2: ; %bb2
+; GFX1250-SDAG-NEXT:    s_mov_b32 s3, 0
+; GFX1250-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX1250-SDAG-NEXT:    s_lshl_b64 s[2:3], s[2:3], 2
+; GFX1250-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-SDAG-NEXT:    s_add_nc_u64 s[0:1], s[0:1], s[2:3]
+; GFX1250-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1]
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_saddr_64bit_lsr_iv_multiload:
+; GFX12-GENERIC-SDAG:       ; %bb.0: ; %bb
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_mov_b32 s2, -1
+; GFX12-GENERIC-SDAG-NEXT:  .LBB115_1: ; %bb5
+; GFX12-GENERIC-SDAG-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_alu depctr_sa_sdst(0)
+; GFX12-GENERIC-SDAG-NEXT:    s_add_co_i32 s2, s2, 1
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_alu depctr_sa_sdst(0)
+; GFX12-GENERIC-SDAG-NEXT:    s_cmp_eq_u32 s2, 0xff
+; GFX12-GENERIC-SDAG-NEXT:    s_cbranch_scc0 .LBB115_1
+; GFX12-GENERIC-SDAG-NEXT:  ; %bb.2: ; %bb2
+; GFX12-GENERIC-SDAG-NEXT:    s_mov_b32 s3, 0
+; GFX12-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_alu depctr_sa_sdst(0)
+; GFX12-GENERIC-SDAG-NEXT:    s_lshl_b64 s[2:3], s[2:3], 2
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_alu depctr_sa_sdst(0)
+; GFX12-GENERIC-SDAG-NEXT:    s_add_nc_u64 s[0:1], s[0:1], s[2:3]
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b128 v[0:3], v0, s[0:1]
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_alu depctr_sa_sdst(0)
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_saddr_64bit_lsr_iv_multiload:
+; GFX9-GENERIC-ISEL:       ; %bb.0: ; %bb
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_mov_b32 s4, -1
+; GFX9-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, 0xff
+; GFX9-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, s4
+; GFX9-GENERIC-ISEL-NEXT:  .LBB115_1: ; %bb5
+; GFX9-GENERIC-ISEL-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX9-GENERIC-ISEL-NEXT:    v_add_u32_e32 v0, 1, v0
+; GFX9-GENERIC-ISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v1
+; GFX9-GENERIC-ISEL-NEXT:    s_cbranch_vccz .LBB115_1
+; GFX9-GENERIC-ISEL-NEXT:  ; %bb.2: ; %bb2
+; GFX9-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX9-GENERIC-ISEL-NEXT:    v_lshlrev_b64 v[0:1], 2, v[0:1]
+; GFX9-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v2, s16
+; GFX9-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v3, s17
+; GFX9-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v2, v0
+; GFX9-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, v3, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_saddr_64bit_lsr_iv_multiload:
+; GFX906-ISEL:       ; %bb.0: ; %bb
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    s_mov_b32 s4, -1
+; GFX906-ISEL-NEXT:    v_mov_b32_e32 v1, 0xff
+; GFX906-ISEL-NEXT:    v_mov_b32_e32 v0, s4
+; GFX906-ISEL-NEXT:  .LBB115_1: ; %bb5
+; GFX906-ISEL-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX906-ISEL-NEXT:    v_add_u32_e32 v0, 1, v0
+; GFX906-ISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v1
+; GFX906-ISEL-NEXT:    s_cbranch_vccz .LBB115_1
+; GFX906-ISEL-NEXT:  ; %bb.2: ; %bb2
+; GFX906-ISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX906-ISEL-NEXT:    v_lshlrev_b64 v[0:1], 2, v[0:1]
+; GFX906-ISEL-NEXT:    v_mov_b32_e32 v2, s16
+; GFX906-ISEL-NEXT:    v_mov_b32_e32 v3, s17
+; GFX906-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v2, v0
+; GFX906-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, v3, v1, vcc
+; GFX906-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_saddr_64bit_lsr_iv_multiload:
+; GFX908-ISEL:       ; %bb.0: ; %bb
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    s_mov_b32 s4, -1
+; GFX908-ISEL-NEXT:    v_mov_b32_e32 v1, 0xff
+; GFX908-ISEL-NEXT:    v_mov_b32_e32 v0, s4
+; GFX908-ISEL-NEXT:  .LBB115_1: ; %bb5
+; GFX908-ISEL-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX908-ISEL-NEXT:    v_add_u32_e32 v0, 1, v0
+; GFX908-ISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v1
+; GFX908-ISEL-NEXT:    s_cbranch_vccz .LBB115_1
+; GFX908-ISEL-NEXT:  ; %bb.2: ; %bb2
+; GFX908-ISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX908-ISEL-NEXT:    v_lshlrev_b64 v[0:1], 2, v[0:1]
+; GFX908-ISEL-NEXT:    v_mov_b32_e32 v2, s16
+; GFX908-ISEL-NEXT:    v_mov_b32_e32 v3, s17
+; GFX908-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v2, v0
+; GFX908-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, v3, v1, vcc
+; GFX908-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_saddr_64bit_lsr_iv_multiload:
+; GFX90A-ISEL:       ; %bb.0: ; %bb
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT:    s_mov_b32 s4, -1
+; GFX90A-ISEL-NEXT:    v_mov_b32_e32 v1, 0xff
+; GFX90A-ISEL-NEXT:    v_mov_b32_e32 v0, s4
+; GFX90A-ISEL-NEXT:  .LBB115_1: ; %bb5
+; GFX90A-ISEL-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX90A-ISEL-NEXT:    v_add_u32_e32 v0, 1, v0
+; GFX90A-ISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v1
+; GFX90A-ISEL-NEXT:    s_cbranch_vccz .LBB115_1
+; GFX90A-ISEL-NEXT:  ; %bb.2: ; %bb2
+; GFX90A-ISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX90A-ISEL-NEXT:    v_lshlrev_b64 v[0:1], 2, v[0:1]
+; GFX90A-ISEL-NEXT:    v_pk_mov_b32 v[2:3], s[16:17], s[16:17] op_sel:[0,1]
+; GFX90A-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v2, v0
+; GFX90A-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, v3, v1, vcc
+; GFX90A-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX90A-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_saddr_64bit_lsr_iv_multiload:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0: ; %bb
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_mov_b32 s2, -1
+; GFX9-4-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, 0xff
+; GFX9-4-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, s2
+; GFX9-4-GENERIC-ISEL-NEXT:  .LBB115_1: ; %bb5
+; GFX9-4-GENERIC-ISEL-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX9-4-GENERIC-ISEL-NEXT:    v_add_u32_e32 v0, 1, v0
+; GFX9-4-GENERIC-ISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v1
+; GFX9-4-GENERIC-ISEL-NEXT:    s_cbranch_vccz .LBB115_1
+; GFX9-4-GENERIC-ISEL-NEXT:  ; %bb.2: ; %bb2
+; GFX9-4-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX9-4-GENERIC-ISEL-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 2, s[0:1]
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_saddr_64bit_lsr_iv_multiload:
+; GFX942-ISEL:       ; %bb.0: ; %bb
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    s_mov_b32 s2, -1
+; GFX942-ISEL-NEXT:    v_mov_b32_e32 v1, 0xff
+; GFX942-ISEL-NEXT:    v_mov_b32_e32 v0, s2
+; GFX942-ISEL-NEXT:  .LBB115_1: ; %bb5
+; GFX942-ISEL-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-ISEL-NEXT:    v_add_u32_e32 v0, 1, v0
+; GFX942-ISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v1
+; GFX942-ISEL-NEXT:    s_cbranch_vccz .LBB115_1
+; GFX942-ISEL-NEXT:  ; %bb.2: ; %bb2
+; GFX942-ISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX942-ISEL-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 2, s[0:1]
+; GFX942-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_saddr_64bit_lsr_iv_multiload:
+; GFX950-ISEL:       ; %bb.0: ; %bb
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    s_mov_b32 s2, -1
+; GFX950-ISEL-NEXT:    v_mov_b32_e32 v1, 0xff
+; GFX950-ISEL-NEXT:    v_mov_b32_e32 v0, s2
+; GFX950-ISEL-NEXT:  .LBB115_1: ; %bb5
+; GFX950-ISEL-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX950-ISEL-NEXT:    v_add_u32_e32 v0, 1, v0
+; GFX950-ISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v1
+; GFX950-ISEL-NEXT:    s_cbranch_vccz .LBB115_1
+; GFX950-ISEL-NEXT:  ; %bb.2: ; %bb2
+; GFX950-ISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX950-ISEL-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 2, s[0:1]
+; GFX950-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_saddr_64bit_lsr_iv_multiload:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0: ; %bb
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_mov_b32 s4, -1
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, s4
+; GFX10-1-GENERIC-ISEL-NEXT:  .LBB115_1: ; %bb5
+; GFX10-1-GENERIC-ISEL-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_nc_u32_e32 v0, 1, v0
+; GFX10-1-GENERIC-ISEL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0xff, v0
+; GFX10-1-GENERIC-ISEL-NEXT:    s_cbranch_vccz .LBB115_1
+; GFX10-1-GENERIC-ISEL-NEXT:  ; %bb.2: ; %bb2
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v2, s16
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v3, s17
+; GFX10-1-GENERIC-ISEL-NEXT:    v_lshlrev_b64 v[0:1], 2, v[0:1]
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v2, v0
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, v3, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_saddr_64bit_lsr_iv_multiload:
+; GFX1012-ISEL:       ; %bb.0: ; %bb
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    s_mov_b32 s4, -1
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v0, s4
+; GFX1012-ISEL-NEXT:  .LBB115_1: ; %bb5
+; GFX1012-ISEL-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX1012-ISEL-NEXT:    v_add_nc_u32_e32 v0, 1, v0
+; GFX1012-ISEL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0xff, v0
+; GFX1012-ISEL-NEXT:    s_cbranch_vccz .LBB115_1
+; GFX1012-ISEL-NEXT:  ; %bb.2: ; %bb2
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v2, s16
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v3, s17
+; GFX1012-ISEL-NEXT:    v_lshlrev_b64 v[0:1], 2, v[0:1]
+; GFX1012-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v2, v0
+; GFX1012-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, v3, v1, vcc_lo
+; GFX1012-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_saddr_64bit_lsr_iv_multiload:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0: ; %bb
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_mov_b32 s4, -1
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, s4
+; GFX10-3-GENERIC-ISEL-NEXT:  .LBB115_1: ; %bb5
+; GFX10-3-GENERIC-ISEL-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_nc_u32_e32 v0, 1, v0
+; GFX10-3-GENERIC-ISEL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0xff, v0
+; GFX10-3-GENERIC-ISEL-NEXT:    s_cbranch_vccz .LBB115_1
+; GFX10-3-GENERIC-ISEL-NEXT:  ; %bb.2: ; %bb2
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v2, s16
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v3, s17
+; GFX10-3-GENERIC-ISEL-NEXT:    v_lshlrev_b64 v[0:1], 2, v[0:1]
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v2, v0
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, v3, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_saddr_64bit_lsr_iv_multiload:
+; GFX11-GENERIC-ISEL:       ; %bb.0: ; %bb
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_mov_b32 s2, -1
+; GFX11-GENERIC-ISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, s2
+; GFX11-GENERIC-ISEL-NEXT:  .LBB115_1: ; %bb5
+; GFX11-GENERIC-ISEL-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX11-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_nc_u32_e32 v0, 1, v0
+; GFX11-GENERIC-ISEL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0xff, v0
+; GFX11-GENERIC-ISEL-NEXT:    s_cbranch_vccz .LBB115_1
+; GFX11-GENERIC-ISEL-NEXT:  ; %bb.2: ; %bb2
+; GFX11-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX11-GENERIC-ISEL-NEXT:    v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
+; GFX11-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT:    v_lshlrev_b64 v[0:1], 2, v[0:1]
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v2, v0
+; GFX11-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, v3, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_saddr_64bit_lsr_iv_multiload:
+; GFX1250-ISEL:       ; %bb.0: ; %bb
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    s_mov_b32 s2, -1
+; GFX1250-ISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-ISEL-NEXT:    v_mov_b32_e32 v0, s2
+; GFX1250-ISEL-NEXT:  .LBB115_1: ; %bb5
+; GFX1250-ISEL-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX1250-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250-ISEL-NEXT:    v_add_nc_u32_e32 v0, 1, v0
+; GFX1250-ISEL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0xff, v0
+; GFX1250-ISEL-NEXT:    s_cbranch_vccz .LBB115_1
+; GFX1250-ISEL-NEXT:  ; %bb.2: ; %bb2
+; GFX1250-ISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX1250-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-ISEL-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 2, s[0:1]
+; GFX1250-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_saddr_64bit_lsr_iv_multiload:
+; GFX12-GENERIC-ISEL:       ; %bb.0: ; %bb
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_mov_b32 s2, -1
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_alu depctr_sa_sdst(0)
+; GFX12-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, s2
+; GFX12-GENERIC-ISEL-NEXT:  .LBB115_1: ; %bb5
+; GFX12-GENERIC-ISEL-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX12-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX12-GENERIC-ISEL-NEXT:    v_add_nc_u32_e32 v0, 1, v0
+; GFX12-GENERIC-ISEL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0xff, v0
+; GFX12-GENERIC-ISEL-NEXT:    s_cbranch_vccz .LBB115_1
+; GFX12-GENERIC-ISEL-NEXT:  ; %bb.2: ; %bb2
+; GFX12-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX12-GENERIC-ISEL-NEXT:    v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
+; GFX12-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX12-GENERIC-ISEL-NEXT:    v_lshlrev_b64_e32 v[0:1], 2, v[0:1]
+; GFX12-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v2, v0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX12-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, v3, v1, vcc_lo
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b128 v[0:3], v[0:1], off
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+bb:
+  br label %bb5
+
+bb2:
+  %y = icmp eq i32 %x, 0
+  br i1 %y, label %bb3, label %bb4
+
+bb3:
+  ret <4 x float> %i6
+
+bb4:
+  ret <4 x float> %i6.1
+
+bb5:
+  %i = phi i32 [ 0, %bb ], [ %i8, %bb5 ]
+  %i4 = zext i32 %i to i64
+  %i5 = getelementptr inbounds float, ptr addrspace(1) %arg, i64 %i4
+  %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %i5, metadata !0)
+  %i6 = bitcast <4 x i32> %load to <4 x float>
+  %i5.1 = getelementptr inbounds float, ptr addrspace(1) %arg.1, i64 %i4
+  %load.1 = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %i5, metadata !1)
+  %i6.1 = bitcast <4 x i32> %load to <4 x float>
+  %i8 = add nuw nsw i32 %i, 1
+  %i9 = icmp eq i32 %i8, 256
+  br i1 %i9, label %bb2, label %bb5
+}
+;;==============================================================================
+;; } End saddr addressing modes
+;;==============================================================================
+
+!0 = !{!"wavefront"}
+!1 = !{!"workgroup"}
+!2 = !{!"agent"}
+!3 = !{!""}
+
+!4 = !{i32 0, i32 1073741824} ; (1 << 30)
+!5 = !{i32 0, i32 1073741825} ; (1 << 30) + 1
+
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GFX: {{.*}}
+; GFX-ISEL: {{.*}}
+; GFX-SDAG: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.store.b128.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.store.b128.ll
new file mode 100644
index 0000000000000..b71be3f4ca034
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.store.b128.ll
@@ -0,0 +1,3888 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx9-generic    < %s  | FileCheck -check-prefixes=GFX,GFX-SDAG,GFX9-GENERIC-SDAG    %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx906          < %s  | FileCheck -check-prefixes=GFX,GFX-SDAG,GFX906-SDAG          %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx908          < %s  | FileCheck -check-prefixes=GFX,GFX-SDAG,GFX908-SDAG          %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx90a          < %s  | FileCheck -check-prefixes=GFX,GFX-SDAG,GFX90a-SDAG          %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx9-4-generic  < %s  | FileCheck -check-prefixes=GFX,GFX-SDAG,GFX9-4-GENERIC-SDAG  %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx942          < %s  | FileCheck -check-prefixes=GFX,GFX-SDAG,GFX942-SDAG          %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx950          < %s  | FileCheck -check-prefixes=GFX,GFX-SDAG,GFX950-SDAG          %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx10-1-generic < %s  | FileCheck -check-prefixes=GFX,GFX-SDAG,GFX10-1-GENERIC-SDAG %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1012         < %s  | FileCheck -check-prefixes=GFX,GFX-SDAG,GFX1012-SDAG         %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx10-3-generic < %s  | FileCheck -check-prefixes=GFX,GFX-SDAG,GFX10-3-GENERIC-SDAG %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx11-generic   < %s  | FileCheck -check-prefixes=GFX,GFX-SDAG,GFX11-GENERIC-SDAG   %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250         < %s  | FileCheck -check-prefixes=GFX,GFX-SDAG,GFX1250-SDAG         %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx12-generic   < %s  | FileCheck -check-prefixes=GFX,GFX-SDAG,GFX12-GENERIC-SDAG   %s
+
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx9-generic    < %s  | FileCheck -check-prefixes=GFX,GFX-ISEL,GFX9-GENERIC-ISEL    %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx906          < %s  | FileCheck -check-prefixes=GFX,GFX-ISEL,GFX906-ISEL          %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx908          < %s  | FileCheck -check-prefixes=GFX,GFX-ISEL,GFX908-ISEL          %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx90a          < %s  | FileCheck -check-prefixes=GFX,GFX-ISEL,GFX90a-ISEL          %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx9-4-generic  < %s  | FileCheck -check-prefixes=GFX,GFX-ISEL,GFX9-4-GENERIC-ISEL  %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx942          < %s  | FileCheck -check-prefixes=GFX,GFX-ISEL,GFX942-ISEL          %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx950          < %s  | FileCheck -check-prefixes=GFX,GFX-ISEL,GFX950-ISEL          %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx10-1-generic < %s  | FileCheck -check-prefixes=GFX,GFX-ISEL,GFX10-1-GENERIC-ISEL %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1012         < %s  | FileCheck -check-prefixes=GFX,GFX-ISEL,GFX1012-ISEL         %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx10-3-generic < %s  | FileCheck -check-prefixes=GFX,GFX-ISEL,GFX10-3-GENERIC-ISEL %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx11-generic   < %s  | FileCheck -check-prefixes=GFX,GFX-ISEL,GFX11-GENERIC-ISEL   %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1250         < %s  | FileCheck -check-prefixes=GFX,GFX-ISEL,GFX1250-ISEL         %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx12-generic   < %s  | FileCheck -check-prefixes=GFX,GFX-ISEL,GFX12-GENERIC-ISEL   %s
+
+;;==============================================================================
+;; A few basic test cases
+;;==============================================================================
+define void @global_store_b128_0_00(ptr addrspace(1) %addr, <4 x i32> %data) {
+; GFX9-GENERIC-SDAG-LABEL: global_store_b128_0_00:
+; GFX9-GENERIC-SDAG:       ; %bb.0: ; %entry
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_store_b128_0_00:
+; GFX906-SDAG:       ; %bb.0: ; %entry
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_store_b128_0_00:
+; GFX908-SDAG:       ; %bb.0: ; %entry
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90a-SDAG-LABEL: global_store_b128_0_00:
+; GFX90a-SDAG:       ; %bb.0: ; %entry
+; GFX90a-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90a-SDAG-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
+; GFX90a-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90a-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_store_b128_0_00:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0: ; %entry
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_store_b128_0_00:
+; GFX942-SDAG:       ; %bb.0: ; %entry
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_store_b128_0_00:
+; GFX950-SDAG:       ; %bb.0: ; %entry
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_store_b128_0_00:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0: ; %entry
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_store_b128_0_00:
+; GFX1012-SDAG:       ; %bb.0: ; %entry
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_store_b128_0_00:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0: ; %entry
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_store_b128_0_00:
+; GFX11-GENERIC-SDAG:       ; %bb.0: ; %entry
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    global_store_b128 v[0:1], v[2:5], off
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_store_b128_0_00:
+; GFX1250-SDAG:       ; %bb.0: ; %entry
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    global_store_b128 v[0:1], v[2:5], off
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_store_b128_0_00:
+; GFX12-GENERIC-SDAG:       ; %bb.0: ; %entry
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    global_store_b128 v[0:1], v[2:5], off
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_store_b128_0_00:
+; GFX9-GENERIC-ISEL:       ; %bb.0: ; %entry
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_store_b128_0_00:
+; GFX906-ISEL:       ; %bb.0: ; %entry
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_store_b128_0_00:
+; GFX908-ISEL:       ; %bb.0: ; %entry
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90a-ISEL-LABEL: global_store_b128_0_00:
+; GFX90a-ISEL:       ; %bb.0: ; %entry
+; GFX90a-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90a-ISEL-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
+; GFX90a-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90a-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_store_b128_0_00:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0: ; %entry
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_store_b128_0_00:
+; GFX942-ISEL:       ; %bb.0: ; %entry
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_store_b128_0_00:
+; GFX950-ISEL:       ; %bb.0: ; %entry
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_store_b128_0_00:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0: ; %entry
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_store_b128_0_00:
+; GFX1012-ISEL:       ; %bb.0: ; %entry
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_store_b128_0_00:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0: ; %entry
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_store_b128_0_00:
+; GFX11-GENERIC-ISEL:       ; %bb.0: ; %entry
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    global_store_b128 v[0:1], v[2:5], off
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_store_b128_0_00:
+; GFX1250-ISEL:       ; %bb.0: ; %entry
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    global_store_b128 v[0:1], v[2:5], off
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_store_b128_0_00:
+; GFX12-GENERIC-ISEL:       ; %bb.0: ; %entry
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    global_store_b128 v[0:1], v[2:5], off
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+entry:
+  call void @llvm.amdgcn.global.store.b128(ptr addrspace(1) %addr, <4 x i32> %data, metadata !0)
+  ret void
+}
+
+define void @global_store_b128_0_01(ptr addrspace(1) %addr, <4 x i32> %data) {
+; GFX9-GENERIC-SDAG-LABEL: global_store_b128_0_01:
+; GFX9-GENERIC-SDAG:       ; %bb.0: ; %entry
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_store_b128_0_01:
+; GFX906-SDAG:       ; %bb.0: ; %entry
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_store_b128_0_01:
+; GFX908-SDAG:       ; %bb.0: ; %entry
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90a-SDAG-LABEL: global_store_b128_0_01:
+; GFX90a-SDAG:       ; %bb.0: ; %entry
+; GFX90a-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90a-SDAG-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
+; GFX90a-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90a-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_store_b128_0_01:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0: ; %entry
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off sc0
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_store_b128_0_01:
+; GFX942-SDAG:       ; %bb.0: ; %entry
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off sc0
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_store_b128_0_01:
+; GFX950-SDAG:       ; %bb.0: ; %entry
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off sc0
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_store_b128_0_01:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0: ; %entry
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_store_b128_0_01:
+; GFX1012-SDAG:       ; %bb.0: ; %entry
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_store_b128_0_01:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0: ; %entry
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_store_b128_0_01:
+; GFX11-GENERIC-SDAG:       ; %bb.0: ; %entry
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    global_store_b128 v[0:1], v[2:5], off
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_store_b128_0_01:
+; GFX1250-SDAG:       ; %bb.0: ; %entry
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    global_store_b128 v[0:1], v[2:5], off
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_store_b128_0_01:
+; GFX12-GENERIC-SDAG:       ; %bb.0: ; %entry
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    global_store_b128 v[0:1], v[2:5], off scope:SCOPE_SE
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_store_b128_0_01:
+; GFX9-GENERIC-ISEL:       ; %bb.0: ; %entry
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_store_b128_0_01:
+; GFX906-ISEL:       ; %bb.0: ; %entry
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_store_b128_0_01:
+; GFX908-ISEL:       ; %bb.0: ; %entry
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90a-ISEL-LABEL: global_store_b128_0_01:
+; GFX90a-ISEL:       ; %bb.0: ; %entry
+; GFX90a-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90a-ISEL-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
+; GFX90a-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90a-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_store_b128_0_01:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0: ; %entry
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off sc0
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_store_b128_0_01:
+; GFX942-ISEL:       ; %bb.0: ; %entry
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off sc0
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_store_b128_0_01:
+; GFX950-ISEL:       ; %bb.0: ; %entry
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off sc0
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_store_b128_0_01:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0: ; %entry
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_store_b128_0_01:
+; GFX1012-ISEL:       ; %bb.0: ; %entry
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_store_b128_0_01:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0: ; %entry
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_store_b128_0_01:
+; GFX11-GENERIC-ISEL:       ; %bb.0: ; %entry
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    global_store_b128 v[0:1], v[2:5], off
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_store_b128_0_01:
+; GFX1250-ISEL:       ; %bb.0: ; %entry
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    global_store_b128 v[0:1], v[2:5], off
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_store_b128_0_01:
+; GFX12-GENERIC-ISEL:       ; %bb.0: ; %entry
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    global_store_b128 v[0:1], v[2:5], off scope:SCOPE_SE
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+entry:
+  call void @llvm.amdgcn.global.store.b128(ptr addrspace(1) %addr, <4 x i32> %data, metadata !1)
+  ret void
+}
+
+define void @global_store_b128_0_10(ptr addrspace(1) %addr, <4 x i32> %data) {
+; GFX9-GENERIC-SDAG-LABEL: global_store_b128_0_10:
+; GFX9-GENERIC-SDAG:       ; %bb.0: ; %entry
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_store_b128_0_10:
+; GFX906-SDAG:       ; %bb.0: ; %entry
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_store_b128_0_10:
+; GFX908-SDAG:       ; %bb.0: ; %entry
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90a-SDAG-LABEL: global_store_b128_0_10:
+; GFX90a-SDAG:       ; %bb.0: ; %entry
+; GFX90a-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90a-SDAG-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
+; GFX90a-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90a-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_store_b128_0_10:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0: ; %entry
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off sc1
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_store_b128_0_10:
+; GFX942-SDAG:       ; %bb.0: ; %entry
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off sc1
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_store_b128_0_10:
+; GFX950-SDAG:       ; %bb.0: ; %entry
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off sc1
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_store_b128_0_10:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0: ; %entry
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_store_b128_0_10:
+; GFX1012-SDAG:       ; %bb.0: ; %entry
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_store_b128_0_10:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0: ; %entry
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_store_b128_0_10:
+; GFX11-GENERIC-SDAG:       ; %bb.0: ; %entry
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    global_store_b128 v[0:1], v[2:5], off
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_store_b128_0_10:
+; GFX1250-SDAG:       ; %bb.0: ; %entry
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    global_store_b128 v[0:1], v[2:5], off scope:SCOPE_DEV
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_store_b128_0_10:
+; GFX12-GENERIC-SDAG:       ; %bb.0: ; %entry
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    global_store_b128 v[0:1], v[2:5], off scope:SCOPE_DEV
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_store_b128_0_10:
+; GFX9-GENERIC-ISEL:       ; %bb.0: ; %entry
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_store_b128_0_10:
+; GFX906-ISEL:       ; %bb.0: ; %entry
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_store_b128_0_10:
+; GFX908-ISEL:       ; %bb.0: ; %entry
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90a-ISEL-LABEL: global_store_b128_0_10:
+; GFX90a-ISEL:       ; %bb.0: ; %entry
+; GFX90a-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90a-ISEL-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
+; GFX90a-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90a-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_store_b128_0_10:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0: ; %entry
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off sc1
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_store_b128_0_10:
+; GFX942-ISEL:       ; %bb.0: ; %entry
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off sc1
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_store_b128_0_10:
+; GFX950-ISEL:       ; %bb.0: ; %entry
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off sc1
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_store_b128_0_10:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0: ; %entry
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_store_b128_0_10:
+; GFX1012-ISEL:       ; %bb.0: ; %entry
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_store_b128_0_10:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0: ; %entry
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_store_b128_0_10:
+; GFX11-GENERIC-ISEL:       ; %bb.0: ; %entry
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    global_store_b128 v[0:1], v[2:5], off
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_store_b128_0_10:
+; GFX1250-ISEL:       ; %bb.0: ; %entry
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    global_store_b128 v[0:1], v[2:5], off scope:SCOPE_DEV
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_store_b128_0_10:
+; GFX12-GENERIC-ISEL:       ; %bb.0: ; %entry
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    global_store_b128 v[0:1], v[2:5], off scope:SCOPE_DEV
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+entry:
+  call void @llvm.amdgcn.global.store.b128(ptr addrspace(1) %addr, <4 x i32> %data, metadata !2)
+  ret void
+}
+
+define void @global_store_b128_0_11(ptr addrspace(1) %addr, <4 x i32> %data) {
+; GFX9-GENERIC-SDAG-LABEL: global_store_b128_0_11:
+; GFX9-GENERIC-SDAG:       ; %bb.0: ; %entry
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_store_b128_0_11:
+; GFX906-SDAG:       ; %bb.0: ; %entry
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_store_b128_0_11:
+; GFX908-SDAG:       ; %bb.0: ; %entry
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90a-SDAG-LABEL: global_store_b128_0_11:
+; GFX90a-SDAG:       ; %bb.0: ; %entry
+; GFX90a-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90a-SDAG-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
+; GFX90a-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90a-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_store_b128_0_11:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0: ; %entry
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off sc0 sc1
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_store_b128_0_11:
+; GFX942-SDAG:       ; %bb.0: ; %entry
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off sc0 sc1
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_store_b128_0_11:
+; GFX950-SDAG:       ; %bb.0: ; %entry
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off sc0 sc1
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_store_b128_0_11:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0: ; %entry
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_store_b128_0_11:
+; GFX1012-SDAG:       ; %bb.0: ; %entry
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_store_b128_0_11:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0: ; %entry
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_store_b128_0_11:
+; GFX11-GENERIC-SDAG:       ; %bb.0: ; %entry
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    global_store_b128 v[0:1], v[2:5], off
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_store_b128_0_11:
+; GFX1250-SDAG:       ; %bb.0: ; %entry
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    global_store_b128 v[0:1], v[2:5], off scope:SCOPE_SYS
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_store_b128_0_11:
+; GFX12-GENERIC-SDAG:       ; %bb.0: ; %entry
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    global_store_b128 v[0:1], v[2:5], off scope:SCOPE_SYS
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_store_b128_0_11:
+; GFX9-GENERIC-ISEL:       ; %bb.0: ; %entry
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_store_b128_0_11:
+; GFX906-ISEL:       ; %bb.0: ; %entry
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_store_b128_0_11:
+; GFX908-ISEL:       ; %bb.0: ; %entry
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90a-ISEL-LABEL: global_store_b128_0_11:
+; GFX90a-ISEL:       ; %bb.0: ; %entry
+; GFX90a-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90a-ISEL-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
+; GFX90a-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90a-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_store_b128_0_11:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0: ; %entry
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off sc0 sc1
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_store_b128_0_11:
+; GFX942-ISEL:       ; %bb.0: ; %entry
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off sc0 sc1
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_store_b128_0_11:
+; GFX950-ISEL:       ; %bb.0: ; %entry
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off sc0 sc1
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_store_b128_0_11:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0: ; %entry
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_store_b128_0_11:
+; GFX1012-ISEL:       ; %bb.0: ; %entry
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_store_b128_0_11:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0: ; %entry
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_store_b128_0_11:
+; GFX11-GENERIC-ISEL:       ; %bb.0: ; %entry
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    global_store_b128 v[0:1], v[2:5], off
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_store_b128_0_11:
+; GFX1250-ISEL:       ; %bb.0: ; %entry
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    global_store_b128 v[0:1], v[2:5], off scope:SCOPE_SYS
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_store_b128_0_11:
+; GFX12-GENERIC-ISEL:       ; %bb.0: ; %entry
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    global_store_b128 v[0:1], v[2:5], off scope:SCOPE_SYS
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+entry:
+  call void @llvm.amdgcn.global.store.b128(ptr addrspace(1) %addr, <4 x i32> %data, metadata !3)
+  ret void
+}
+
+;;==============================================================================
+;; Signed offset addressing modes (derived from global-saddr-store.ll) {
+;;==============================================================================
+
+define void @global_store_i8_zext_vgpr(ptr addrspace(1) %sbase, ptr addrspace(1) %voffset.ptr, <4 x i32> %data) {
+; GFX9-GENERIC-SDAG-LABEL: global_store_i8_zext_vgpr:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dword v2, v[2:3], off
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_store_i8_zext_vgpr:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    global_load_dword v2, v[2:3], off
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-SDAG-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_store_i8_zext_vgpr:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    global_load_dword v2, v[2:3], off
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-SDAG-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90a-SDAG-LABEL: global_store_i8_zext_vgpr:
+; GFX90a-SDAG:       ; %bb.0:
+; GFX90a-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90a-SDAG-NEXT:    global_load_dword v2, v[2:3], off
+; GFX90a-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90a-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90a-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90a-SDAG-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off
+; GFX90a-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90a-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_store_i8_zext_vgpr:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dword v2, v[2:3], off
+; GFX9-4-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX9-4-GENERIC-SDAG-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_store_i8_zext_vgpr:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    global_load_dword v2, v[2:3], off
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942-SDAG-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_store_i8_zext_vgpr:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    global_load_dword v2, v[2:3], off
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX950-SDAG-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_store_i8_zext_vgpr:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dword v2, v[2:3], off
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_store_i8_zext_vgpr:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    global_load_dword v2, v[2:3], off
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_store_i8_zext_vgpr:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dword v2, v[2:3], off
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_store_i8_zext_vgpr:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b32 v2, v[2:3], off
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT:    global_store_b128 v[0:1], v[4:7], off
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_store_i8_zext_vgpr:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    global_load_b32 v2, v[2:3], off
+; GFX1250-SDAG-NEXT:    s_wait_xcnt 0x0
+; GFX1250-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT:    v_add_nc_u64_e32 v[0:1], v[0:1], v[2:3]
+; GFX1250-SDAG-NEXT:    global_store_b128 v[0:1], v[4:7], off
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_store_i8_zext_vgpr:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b32 v2, v[2:3], off
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-SDAG-NEXT:    global_store_b128 v[0:1], v[4:7], off
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_store_i8_zext_vgpr:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dword v2, v[2:3], off
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_store_i8_zext_vgpr:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    global_load_dword v2, v[2:3], off
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-ISEL-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_store_i8_zext_vgpr:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    global_load_dword v2, v[2:3], off
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-ISEL-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90a-ISEL-LABEL: global_store_i8_zext_vgpr:
+; GFX90a-ISEL:       ; %bb.0:
+; GFX90a-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90a-ISEL-NEXT:    global_load_dword v2, v[2:3], off
+; GFX90a-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90a-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90a-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90a-ISEL-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off
+; GFX90a-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90a-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_store_i8_zext_vgpr:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dword v2, v[2:3], off
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-4-GENERIC-ISEL-NEXT:    s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_store_i8_zext_vgpr:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    global_load_dword v2, v[2:3], off
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX942-ISEL-NEXT:    s_nop 1
+; GFX942-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX942-ISEL-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_store_i8_zext_vgpr:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    global_load_dword v2, v[2:3], off
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX950-ISEL-NEXT:    s_nop 1
+; GFX950-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX950-ISEL-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_store_i8_zext_vgpr:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dword v2, v[2:3], off
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_store_i8_zext_vgpr:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    global_load_dword v2, v[2:3], off
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-ISEL-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_store_i8_zext_vgpr:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dword v2, v[2:3], off
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_store_i8_zext_vgpr:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b32 v2, v[2:3], off
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT:    global_store_b128 v[0:1], v[4:7], off
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_store_i8_zext_vgpr:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    global_load_b32 v2, v[2:3], off
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1250-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX1250-ISEL-NEXT:    s_wait_xcnt 0x0
+; GFX1250-ISEL-NEXT:    global_store_b128 v[0:1], v[4:7], off
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_store_i8_zext_vgpr:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b32 v2, v[2:3], off
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-ISEL-NEXT:    global_store_b128 v[0:1], v[4:7], off
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %voffset = load i32, ptr addrspace(1) %voffset.ptr
+  %zext.offset = zext i32 %voffset to i64
+  %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
+  call void @llvm.amdgcn.global.store.b128(ptr addrspace(1) %gep0, <4 x i32> %data, metadata !0)
+  ret void
+}
+
+define void @global_store_v4i32_zext_vgpr_offset_neg128(ptr addrspace(1) %sbase, i32 %voffset, <4 x i32> %data) {
+; GFX9-GENERIC-SDAG-LABEL: global_store_v4i32_zext_vgpr_offset_neg128:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT:    global_store_dwordx4 v[0:1], v[3:6], off offset:-128
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_store_v4i32_zext_vgpr_offset_neg128:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-SDAG-NEXT:    global_store_dwordx4 v[0:1], v[3:6], off offset:-128
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_store_v4i32_zext_vgpr_offset_neg128:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-SDAG-NEXT:    global_store_dwordx4 v[0:1], v[3:6], off offset:-128
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90a-SDAG-LABEL: global_store_v4i32_zext_vgpr_offset_neg128:
+; GFX90a-SDAG:       ; %bb.0:
+; GFX90a-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90a-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90a-SDAG-NEXT:    v_mov_b32_e32 v7, v6
+; GFX90a-SDAG-NEXT:    v_mov_b32_e32 v6, v5
+; GFX90a-SDAG-NEXT:    v_mov_b32_e32 v5, v4
+; GFX90a-SDAG-NEXT:    v_mov_b32_e32 v4, v3
+; GFX90a-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90a-SDAG-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off offset:-128
+; GFX90a-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90a-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_store_v4i32_zext_vgpr_offset_neg128:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v7, v6
+; GFX9-4-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v6, v5
+; GFX9-4-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v5, v4
+; GFX9-4-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v4, v3
+; GFX9-4-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX9-4-GENERIC-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX9-4-GENERIC-SDAG-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off offset:-128 sc0
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_store_v4i32_zext_vgpr_offset_neg128:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v7, v6
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v6, v5
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v5, v4
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v4, v3
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX942-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942-SDAG-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off offset:-128 sc0
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_store_v4i32_zext_vgpr_offset_neg128:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v7, v6
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v6, v5
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v5, v4
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v4, v3
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX950-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX950-SDAG-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off offset:-128 sc0
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_store_v4i32_zext_vgpr_offset_neg128:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT:    global_store_dwordx4 v[0:1], v[3:6], off offset:-128
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_store_v4i32_zext_vgpr_offset_neg128:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT:    global_store_dwordx4 v[0:1], v[3:6], off offset:-128
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_store_v4i32_zext_vgpr_offset_neg128:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT:    global_store_dwordx4 v[0:1], v[3:6], off offset:-128
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_store_v4i32_zext_vgpr_offset_neg128:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT:    global_store_b128 v[0:1], v[3:6], off offset:-128
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_store_v4i32_zext_vgpr_offset_neg128:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    v_dual_mov_b32 v7, v6 :: v_dual_mov_b32 v6, v5
+; GFX1250-SDAG-NEXT:    v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3
+; GFX1250-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX1250-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT:    v_add_nc_u64_e32 v[0:1], v[0:1], v[2:3]
+; GFX1250-SDAG-NEXT:    global_store_b128 v[0:1], v[4:7], off offset:-128
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_store_v4i32_zext_vgpr_offset_neg128:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-SDAG-NEXT:    global_store_b128 v[0:1], v[3:6], off offset:-128 scope:SCOPE_SE
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_store_v4i32_zext_vgpr_offset_neg128:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT:    global_store_dwordx4 v[0:1], v[3:6], off offset:-128
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_store_v4i32_zext_vgpr_offset_neg128:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-ISEL-NEXT:    global_store_dwordx4 v[0:1], v[3:6], off offset:-128
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_store_v4i32_zext_vgpr_offset_neg128:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-ISEL-NEXT:    global_store_dwordx4 v[0:1], v[3:6], off offset:-128
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90a-ISEL-LABEL: global_store_v4i32_zext_vgpr_offset_neg128:
+; GFX90a-ISEL:       ; %bb.0:
+; GFX90a-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90a-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90a-ISEL-NEXT:    v_mov_b32_e32 v8, v3
+; GFX90a-ISEL-NEXT:    v_mov_b32_e32 v9, v4
+; GFX90a-ISEL-NEXT:    v_mov_b32_e32 v10, v5
+; GFX90a-ISEL-NEXT:    v_mov_b32_e32 v11, v6
+; GFX90a-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90a-ISEL-NEXT:    global_store_dwordx4 v[0:1], v[8:11], off offset:-128
+; GFX90a-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90a-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_store_v4i32_zext_vgpr_offset_neg128:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-4-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v8, v3
+; GFX9-4-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v9, v4
+; GFX9-4-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v10, v5
+; GFX9-4-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v11, v6
+; GFX9-4-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT:    global_store_dwordx4 v[0:1], v[8:11], off offset:-128 sc0
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_store_v4i32_zext_vgpr_offset_neg128:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX942-ISEL-NEXT:    v_mov_b32_e32 v8, v3
+; GFX942-ISEL-NEXT:    v_mov_b32_e32 v9, v4
+; GFX942-ISEL-NEXT:    v_mov_b32_e32 v10, v5
+; GFX942-ISEL-NEXT:    v_mov_b32_e32 v11, v6
+; GFX942-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX942-ISEL-NEXT:    global_store_dwordx4 v[0:1], v[8:11], off offset:-128 sc0
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_store_v4i32_zext_vgpr_offset_neg128:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX950-ISEL-NEXT:    v_mov_b32_e32 v8, v3
+; GFX950-ISEL-NEXT:    v_mov_b32_e32 v9, v4
+; GFX950-ISEL-NEXT:    v_mov_b32_e32 v10, v5
+; GFX950-ISEL-NEXT:    v_mov_b32_e32 v11, v6
+; GFX950-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX950-ISEL-NEXT:    global_store_dwordx4 v[0:1], v[8:11], off offset:-128 sc0
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_store_v4i32_zext_vgpr_offset_neg128:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT:    global_store_dwordx4 v[0:1], v[3:6], off offset:-128
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_store_v4i32_zext_vgpr_offset_neg128:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-ISEL-NEXT:    global_store_dwordx4 v[0:1], v[3:6], off offset:-128
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_store_v4i32_zext_vgpr_offset_neg128:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT:    global_store_dwordx4 v[0:1], v[3:6], off offset:-128
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_store_v4i32_zext_vgpr_offset_neg128:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT:    global_store_b128 v[0:1], v[3:6], off offset:-128
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_store_v4i32_zext_vgpr_offset_neg128:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1250-ISEL-NEXT:    v_dual_mov_b32 v8, v3 :: v_dual_mov_b32 v9, v4
+; GFX1250-ISEL-NEXT:    v_dual_mov_b32 v10, v5 :: v_dual_mov_b32 v11, v6
+; GFX1250-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX1250-ISEL-NEXT:    global_store_b128 v[0:1], v[8:11], off offset:-128
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_store_v4i32_zext_vgpr_offset_neg128:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-ISEL-NEXT:    global_store_b128 v[0:1], v[3:6], off offset:-128 scope:SCOPE_SE
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %zext.offset = zext i32 %voffset to i64
+  %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
+  %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
+  call void @llvm.amdgcn.global.store.b128(ptr addrspace(1) %gep1, <4 x i32> %data, metadata !1)
+  ret void
+}
+
+;; Maximum positive offset on gfx10
+define void @global_store_i8_zext_vgpr_offset_2047(ptr addrspace(1) %sbase, ptr addrspace(1) %voffset.ptr, <4 x i32> %data) {
+; GFX9-GENERIC-SDAG-LABEL: global_store_i8_zext_vgpr_offset_2047:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dword v2, v[2:3], off
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off offset:2047
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_store_i8_zext_vgpr_offset_2047:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    global_load_dword v2, v[2:3], off
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-SDAG-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off offset:2047
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_store_i8_zext_vgpr_offset_2047:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    global_load_dword v2, v[2:3], off
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-SDAG-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off offset:2047
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90a-SDAG-LABEL: global_store_i8_zext_vgpr_offset_2047:
+; GFX90a-SDAG:       ; %bb.0:
+; GFX90a-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90a-SDAG-NEXT:    global_load_dword v2, v[2:3], off
+; GFX90a-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90a-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90a-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90a-SDAG-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off offset:2047
+; GFX90a-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90a-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_store_i8_zext_vgpr_offset_2047:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dword v2, v[2:3], off
+; GFX9-4-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX9-4-GENERIC-SDAG-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off offset:2047 sc1
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_store_i8_zext_vgpr_offset_2047:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    global_load_dword v2, v[2:3], off
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942-SDAG-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off offset:2047 sc1
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_store_i8_zext_vgpr_offset_2047:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    global_load_dword v2, v[2:3], off
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX950-SDAG-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off offset:2047 sc1
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_store_i8_zext_vgpr_offset_2047:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dword v2, v[2:3], off
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off offset:2047
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_store_i8_zext_vgpr_offset_2047:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    global_load_dword v2, v[2:3], off
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off offset:2047
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_store_i8_zext_vgpr_offset_2047:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dword v2, v[2:3], off
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off offset:2047
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_store_i8_zext_vgpr_offset_2047:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b32 v2, v[2:3], off
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT:    global_store_b128 v[0:1], v[4:7], off offset:2047
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_store_i8_zext_vgpr_offset_2047:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    global_load_b32 v2, v[2:3], off
+; GFX1250-SDAG-NEXT:    s_wait_xcnt 0x0
+; GFX1250-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT:    v_add_nc_u64_e32 v[0:1], v[0:1], v[2:3]
+; GFX1250-SDAG-NEXT:    global_store_b128 v[0:1], v[4:7], off offset:2047 scope:SCOPE_DEV
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_store_i8_zext_vgpr_offset_2047:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b32 v2, v[2:3], off
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-SDAG-NEXT:    global_store_b128 v[0:1], v[4:7], off offset:2047 scope:SCOPE_DEV
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_store_i8_zext_vgpr_offset_2047:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dword v2, v[2:3], off
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off offset:2047
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_store_i8_zext_vgpr_offset_2047:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    global_load_dword v2, v[2:3], off
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-ISEL-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off offset:2047
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_store_i8_zext_vgpr_offset_2047:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    global_load_dword v2, v[2:3], off
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-ISEL-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off offset:2047
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90a-ISEL-LABEL: global_store_i8_zext_vgpr_offset_2047:
+; GFX90a-ISEL:       ; %bb.0:
+; GFX90a-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90a-ISEL-NEXT:    global_load_dword v2, v[2:3], off
+; GFX90a-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90a-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90a-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90a-ISEL-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off offset:2047
+; GFX90a-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90a-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_store_i8_zext_vgpr_offset_2047:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dword v2, v[2:3], off
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-4-GENERIC-ISEL-NEXT:    s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off offset:2047 sc1
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_store_i8_zext_vgpr_offset_2047:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    global_load_dword v2, v[2:3], off
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX942-ISEL-NEXT:    s_nop 1
+; GFX942-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX942-ISEL-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off offset:2047 sc1
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_store_i8_zext_vgpr_offset_2047:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    global_load_dword v2, v[2:3], off
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX950-ISEL-NEXT:    s_nop 1
+; GFX950-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX950-ISEL-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off offset:2047 sc1
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_store_i8_zext_vgpr_offset_2047:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dword v2, v[2:3], off
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off offset:2047
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_store_i8_zext_vgpr_offset_2047:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    global_load_dword v2, v[2:3], off
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-ISEL-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off offset:2047
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_store_i8_zext_vgpr_offset_2047:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dword v2, v[2:3], off
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off offset:2047
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_store_i8_zext_vgpr_offset_2047:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b32 v2, v[2:3], off
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT:    global_store_b128 v[0:1], v[4:7], off offset:2047
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_store_i8_zext_vgpr_offset_2047:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    global_load_b32 v2, v[2:3], off
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1250-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX1250-ISEL-NEXT:    s_wait_xcnt 0x0
+; GFX1250-ISEL-NEXT:    global_store_b128 v[0:1], v[4:7], off offset:2047 scope:SCOPE_DEV
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_store_i8_zext_vgpr_offset_2047:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b32 v2, v[2:3], off
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-ISEL-NEXT:    global_store_b128 v[0:1], v[4:7], off offset:2047 scope:SCOPE_DEV
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %voffset = load i32, ptr addrspace(1) %voffset.ptr
+  %zext.offset = zext i32 %voffset to i64
+  %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
+  %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 2047
+  call void @llvm.amdgcn.global.store.b128(ptr addrspace(1) %gep1, <4 x i32> %data, metadata !2)
+  ret void
+}
+
+;; Maximum negative offset on gfx10
+define void @global_store_i8_zext_vgpr_offset_neg2048(ptr addrspace(1) %sbase, ptr addrspace(1) %voffset.ptr, <4 x i32> %data) {
+; GFX9-GENERIC-SDAG-LABEL: global_store_i8_zext_vgpr_offset_neg2048:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dword v2, v[2:3], off
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off offset:-2048
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_store_i8_zext_vgpr_offset_neg2048:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    global_load_dword v2, v[2:3], off
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-SDAG-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off offset:-2048
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_store_i8_zext_vgpr_offset_neg2048:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    global_load_dword v2, v[2:3], off
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-SDAG-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off offset:-2048
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90a-SDAG-LABEL: global_store_i8_zext_vgpr_offset_neg2048:
+; GFX90a-SDAG:       ; %bb.0:
+; GFX90a-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90a-SDAG-NEXT:    global_load_dword v2, v[2:3], off
+; GFX90a-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90a-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90a-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90a-SDAG-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off offset:-2048
+; GFX90a-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90a-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_store_i8_zext_vgpr_offset_neg2048:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dword v2, v[2:3], off
+; GFX9-4-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX9-4-GENERIC-SDAG-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off offset:-2048 sc0 sc1
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_store_i8_zext_vgpr_offset_neg2048:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    global_load_dword v2, v[2:3], off
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942-SDAG-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off offset:-2048 sc0 sc1
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_store_i8_zext_vgpr_offset_neg2048:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    global_load_dword v2, v[2:3], off
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX950-SDAG-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off offset:-2048 sc0 sc1
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_store_i8_zext_vgpr_offset_neg2048:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dword v2, v[2:3], off
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off offset:-2048
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_store_i8_zext_vgpr_offset_neg2048:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    global_load_dword v2, v[2:3], off
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off offset:-2048
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_store_i8_zext_vgpr_offset_neg2048:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dword v2, v[2:3], off
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off offset:-2048
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_store_i8_zext_vgpr_offset_neg2048:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b32 v2, v[2:3], off
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT:    global_store_b128 v[0:1], v[4:7], off offset:-2048
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_store_i8_zext_vgpr_offset_neg2048:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    global_load_b32 v2, v[2:3], off
+; GFX1250-SDAG-NEXT:    s_wait_xcnt 0x0
+; GFX1250-SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT:    v_add_nc_u64_e32 v[0:1], v[0:1], v[2:3]
+; GFX1250-SDAG-NEXT:    global_store_b128 v[0:1], v[4:7], off offset:-2048 scope:SCOPE_SYS
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_store_i8_zext_vgpr_offset_neg2048:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b32 v2, v[2:3], off
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-SDAG-NEXT:    global_store_b128 v[0:1], v[4:7], off offset:-2048 scope:SCOPE_SYS
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_store_i8_zext_vgpr_offset_neg2048:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dword v2, v[2:3], off
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off offset:-2048
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_store_i8_zext_vgpr_offset_neg2048:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    global_load_dword v2, v[2:3], off
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-ISEL-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off offset:-2048
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_store_i8_zext_vgpr_offset_neg2048:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    global_load_dword v2, v[2:3], off
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-ISEL-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off offset:-2048
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90a-ISEL-LABEL: global_store_i8_zext_vgpr_offset_neg2048:
+; GFX90a-ISEL:       ; %bb.0:
+; GFX90a-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90a-ISEL-NEXT:    global_load_dword v2, v[2:3], off
+; GFX90a-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90a-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90a-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90a-ISEL-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off offset:-2048
+; GFX90a-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90a-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_store_i8_zext_vgpr_offset_neg2048:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dword v2, v[2:3], off
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-4-GENERIC-ISEL-NEXT:    s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off offset:-2048 sc0 sc1
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_store_i8_zext_vgpr_offset_neg2048:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    global_load_dword v2, v[2:3], off
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX942-ISEL-NEXT:    s_nop 1
+; GFX942-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX942-ISEL-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off offset:-2048 sc0 sc1
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_store_i8_zext_vgpr_offset_neg2048:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    global_load_dword v2, v[2:3], off
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX950-ISEL-NEXT:    s_nop 1
+; GFX950-ISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX950-ISEL-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off offset:-2048 sc0 sc1
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_store_i8_zext_vgpr_offset_neg2048:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dword v2, v[2:3], off
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off offset:-2048
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_store_i8_zext_vgpr_offset_neg2048:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    global_load_dword v2, v[2:3], off
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-ISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-ISEL-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off offset:-2048
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_store_i8_zext_vgpr_offset_neg2048:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dword v2, v[2:3], off
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off offset:-2048
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_store_i8_zext_vgpr_offset_neg2048:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b32 v2, v[2:3], off
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT:    global_store_b128 v[0:1], v[4:7], off offset:-2048
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_store_i8_zext_vgpr_offset_neg2048:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    global_load_b32 v2, v[2:3], off
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1250-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX1250-ISEL-NEXT:    s_wait_xcnt 0x0
+; GFX1250-ISEL-NEXT:    global_store_b128 v[0:1], v[4:7], off offset:-2048 scope:SCOPE_SYS
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_store_i8_zext_vgpr_offset_neg2048:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b32 v2, v[2:3], off
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-ISEL-NEXT:    global_store_b128 v[0:1], v[4:7], off offset:-2048 scope:SCOPE_SYS
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %voffset = load i32, ptr addrspace(1) %voffset.ptr
+  %zext.offset = zext i32 %voffset to i64
+  %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
+  %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -2048
+  call void @llvm.amdgcn.global.store.b128(ptr addrspace(1) %gep1, <4 x i32> %data, metadata !3)
+  ret void
+}
+;;==============================================================================
+;; } end signed offset addressing modes
+;;==============================================================================
+
+;;==============================================================================
+;; Various saddr addressing modes (derived from global-saddr-load.ll) {
+;;==============================================================================
+
+define void @global_store_saddr_i8_zext_vgpr(ptr addrspace(1) inreg %sbase, ptr addrspace(1) %voffset.ptr, <4 x i32> %data) {
+; GFX9-GENERIC-SDAG-LABEL: global_store_saddr_i8_zext_vgpr:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dword v0, v[0:1], off
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    global_store_dwordx4 v0, v[2:5], s[16:17]
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_store_saddr_i8_zext_vgpr:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    global_load_dword v0, v[0:1], off
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    global_store_dwordx4 v0, v[2:5], s[16:17]
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_store_saddr_i8_zext_vgpr:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    global_load_dword v0, v[0:1], off
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    global_store_dwordx4 v0, v[2:5], s[16:17]
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90a-SDAG-LABEL: global_store_saddr_i8_zext_vgpr:
+; GFX90a-SDAG:       ; %bb.0:
+; GFX90a-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90a-SDAG-NEXT:    global_load_dword v0, v[0:1], off
+; GFX90a-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90a-SDAG-NEXT:    global_store_dwordx4 v0, v[2:5], s[16:17]
+; GFX90a-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90a-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_store_saddr_i8_zext_vgpr:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dword v0, v[0:1], off
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    global_store_dwordx4 v0, v[2:5], s[0:1]
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_store_saddr_i8_zext_vgpr:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    global_load_dword v0, v[0:1], off
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    global_store_dwordx4 v0, v[2:5], s[0:1]
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_store_saddr_i8_zext_vgpr:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    global_load_dword v0, v[0:1], off
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    global_store_dwordx4 v0, v[2:5], s[0:1]
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_store_saddr_i8_zext_vgpr:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dword v0, v[0:1], off
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    global_store_dwordx4 v0, v[2:5], s[16:17]
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_store_saddr_i8_zext_vgpr:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    global_load_dword v0, v[0:1], off
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    global_store_dwordx4 v0, v[2:5], s[16:17]
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_store_saddr_i8_zext_vgpr:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dword v0, v[0:1], off
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    global_store_dwordx4 v0, v[2:5], s[16:17]
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_store_saddr_i8_zext_vgpr:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b32 v0, v[0:1], off
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    global_store_b128 v0, v[2:5], s[0:1]
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_store_saddr_i8_zext_vgpr:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    global_load_b32 v0, v[0:1], off
+; GFX1250-SDAG-NEXT:    s_wait_xcnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    global_store_b128 v0, v[2:5], s[0:1]
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_store_saddr_i8_zext_vgpr:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b32 v0, v[0:1], off
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    global_store_b128 v0, v[2:5], s[0:1]
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_store_saddr_i8_zext_vgpr:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dword v0, v[0:1], off
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    global_store_dwordx4 v0, v[2:5], s[16:17]
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_store_saddr_i8_zext_vgpr:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    global_load_dword v0, v[0:1], off
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    global_store_dwordx4 v0, v[2:5], s[16:17]
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_store_saddr_i8_zext_vgpr:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    global_load_dword v0, v[0:1], off
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    global_store_dwordx4 v0, v[2:5], s[16:17]
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90a-ISEL-LABEL: global_store_saddr_i8_zext_vgpr:
+; GFX90a-ISEL:       ; %bb.0:
+; GFX90a-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90a-ISEL-NEXT:    global_load_dword v0, v[0:1], off
+; GFX90a-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90a-ISEL-NEXT:    global_store_dwordx4 v0, v[2:5], s[16:17]
+; GFX90a-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90a-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_store_saddr_i8_zext_vgpr:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dword v0, v[0:1], off
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    global_store_dwordx4 v0, v[2:5], s[0:1]
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_store_saddr_i8_zext_vgpr:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    global_load_dword v0, v[0:1], off
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    global_store_dwordx4 v0, v[2:5], s[0:1]
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_store_saddr_i8_zext_vgpr:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    global_load_dword v0, v[0:1], off
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    global_store_dwordx4 v0, v[2:5], s[0:1]
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_store_saddr_i8_zext_vgpr:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dword v0, v[0:1], off
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    global_store_dwordx4 v0, v[2:5], s[16:17]
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_store_saddr_i8_zext_vgpr:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    global_load_dword v0, v[0:1], off
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    global_store_dwordx4 v0, v[2:5], s[16:17]
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_store_saddr_i8_zext_vgpr:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dword v0, v[0:1], off
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    global_store_dwordx4 v0, v[2:5], s[16:17]
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_store_saddr_i8_zext_vgpr:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b32 v0, v[0:1], off
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    global_store_b128 v0, v[2:5], s[0:1]
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_store_saddr_i8_zext_vgpr:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    global_load_b32 v0, v[0:1], off
+; GFX1250-ISEL-NEXT:    s_wait_xcnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    global_store_b128 v0, v[2:5], s[0:1]
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_store_saddr_i8_zext_vgpr:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b32 v0, v[0:1], off
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    global_store_b128 v0, v[2:5], s[0:1]
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %voffset = load i32, ptr addrspace(1) %voffset.ptr
+  %zext.offset = zext i32 %voffset to i64
+  %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
+  call void @llvm.amdgcn.global.store.b128(ptr addrspace(1) %gep0, <4 x i32> %data, metadata !0)
+  ret void
+}
+
+define void @global_store_saddr_v4i32_zext_vgpr_offset_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, <4 x i32> %data) {
+; GFX9-GENERIC-SDAG-LABEL: global_store_saddr_v4i32_zext_vgpr_offset_neg128:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    global_store_dwordx4 v0, v[1:4], s[16:17] offset:-128
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_store_saddr_v4i32_zext_vgpr_offset_neg128:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    global_store_dwordx4 v0, v[1:4], s[16:17] offset:-128
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_store_saddr_v4i32_zext_vgpr_offset_neg128:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    global_store_dwordx4 v0, v[1:4], s[16:17] offset:-128
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90a-SDAG-LABEL: global_store_saddr_v4i32_zext_vgpr_offset_neg128:
+; GFX90a-SDAG:       ; %bb.0:
+; GFX90a-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90a-SDAG-NEXT:    v_mov_b32_e32 v5, v4
+; GFX90a-SDAG-NEXT:    v_mov_b32_e32 v4, v3
+; GFX90a-SDAG-NEXT:    v_mov_b32_e32 v3, v2
+; GFX90a-SDAG-NEXT:    v_mov_b32_e32 v2, v1
+; GFX90a-SDAG-NEXT:    global_store_dwordx4 v0, v[2:5], s[16:17] offset:-128
+; GFX90a-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90a-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_store_saddr_v4i32_zext_vgpr_offset_neg128:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v5, v4
+; GFX9-4-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v4, v3
+; GFX9-4-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v3, v2
+; GFX9-4-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v2, v1
+; GFX9-4-GENERIC-SDAG-NEXT:    global_store_dwordx4 v0, v[2:5], s[0:1] offset:-128 sc0
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_store_saddr_v4i32_zext_vgpr_offset_neg128:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v5, v4
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v4, v3
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v3, v2
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v2, v1
+; GFX942-SDAG-NEXT:    global_store_dwordx4 v0, v[2:5], s[0:1] offset:-128 sc0
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_store_saddr_v4i32_zext_vgpr_offset_neg128:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v5, v4
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v4, v3
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v3, v2
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v2, v1
+; GFX950-SDAG-NEXT:    global_store_dwordx4 v0, v[2:5], s[0:1] offset:-128 sc0
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_store_saddr_v4i32_zext_vgpr_offset_neg128:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    global_store_dwordx4 v0, v[1:4], s[16:17] offset:-128
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_store_saddr_v4i32_zext_vgpr_offset_neg128:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    global_store_dwordx4 v0, v[1:4], s[16:17] offset:-128
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_store_saddr_v4i32_zext_vgpr_offset_neg128:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    global_store_dwordx4 v0, v[1:4], s[16:17] offset:-128
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_store_saddr_v4i32_zext_vgpr_offset_neg128:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    global_store_b128 v0, v[1:4], s[0:1] offset:-128
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_store_saddr_v4i32_zext_vgpr_offset_neg128:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3
+; GFX1250-SDAG-NEXT:    v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1
+; GFX1250-SDAG-NEXT:    global_store_b128 v0, v[2:5], s[0:1] offset:-128
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_store_saddr_v4i32_zext_vgpr_offset_neg128:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    global_store_b128 v0, v[1:4], s[0:1] offset:-128 scope:SCOPE_SE
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_store_saddr_v4i32_zext_vgpr_offset_neg128:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    global_store_dwordx4 v0, v[1:4], s[16:17] offset:-128
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_store_saddr_v4i32_zext_vgpr_offset_neg128:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    global_store_dwordx4 v0, v[1:4], s[16:17] offset:-128
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_store_saddr_v4i32_zext_vgpr_offset_neg128:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    global_store_dwordx4 v0, v[1:4], s[16:17] offset:-128
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90a-ISEL-LABEL: global_store_saddr_v4i32_zext_vgpr_offset_neg128:
+; GFX90a-ISEL:       ; %bb.0:
+; GFX90a-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90a-ISEL-NEXT:    v_mov_b32_e32 v6, v1
+; GFX90a-ISEL-NEXT:    v_mov_b32_e32 v7, v2
+; GFX90a-ISEL-NEXT:    v_mov_b32_e32 v8, v3
+; GFX90a-ISEL-NEXT:    v_mov_b32_e32 v9, v4
+; GFX90a-ISEL-NEXT:    global_store_dwordx4 v0, v[6:9], s[16:17] offset:-128
+; GFX90a-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90a-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_store_saddr_v4i32_zext_vgpr_offset_neg128:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v6, v1
+; GFX9-4-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v7, v2
+; GFX9-4-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v8, v3
+; GFX9-4-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v9, v4
+; GFX9-4-GENERIC-ISEL-NEXT:    global_store_dwordx4 v0, v[6:9], s[0:1] offset:-128 sc0
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_store_saddr_v4i32_zext_vgpr_offset_neg128:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    v_mov_b32_e32 v6, v1
+; GFX942-ISEL-NEXT:    v_mov_b32_e32 v7, v2
+; GFX942-ISEL-NEXT:    v_mov_b32_e32 v8, v3
+; GFX942-ISEL-NEXT:    v_mov_b32_e32 v9, v4
+; GFX942-ISEL-NEXT:    global_store_dwordx4 v0, v[6:9], s[0:1] offset:-128 sc0
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_store_saddr_v4i32_zext_vgpr_offset_neg128:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    v_mov_b32_e32 v6, v1
+; GFX950-ISEL-NEXT:    v_mov_b32_e32 v7, v2
+; GFX950-ISEL-NEXT:    v_mov_b32_e32 v8, v3
+; GFX950-ISEL-NEXT:    v_mov_b32_e32 v9, v4
+; GFX950-ISEL-NEXT:    global_store_dwordx4 v0, v[6:9], s[0:1] offset:-128 sc0
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_store_saddr_v4i32_zext_vgpr_offset_neg128:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    global_store_dwordx4 v0, v[1:4], s[16:17] offset:-128
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_store_saddr_v4i32_zext_vgpr_offset_neg128:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    global_store_dwordx4 v0, v[1:4], s[16:17] offset:-128
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_store_saddr_v4i32_zext_vgpr_offset_neg128:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    global_store_dwordx4 v0, v[1:4], s[16:17] offset:-128
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_store_saddr_v4i32_zext_vgpr_offset_neg128:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    global_store_b128 v0, v[1:4], s[0:1] offset:-128
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_store_saddr_v4i32_zext_vgpr_offset_neg128:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    v_dual_mov_b32 v6, v1 :: v_dual_mov_b32 v7, v2
+; GFX1250-ISEL-NEXT:    v_dual_mov_b32 v8, v3 :: v_dual_mov_b32 v9, v4
+; GFX1250-ISEL-NEXT:    global_store_b128 v0, v[6:9], s[0:1] offset:-128
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_store_saddr_v4i32_zext_vgpr_offset_neg128:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    global_store_b128 v0, v[1:4], s[0:1] offset:-128 scope:SCOPE_SE
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %zext.offset = zext i32 %voffset to i64
+  %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
+  %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
+  call void @llvm.amdgcn.global.store.b128(ptr addrspace(1) %gep1, <4 x i32> %data, metadata !1)
+  ret void
+}
+
+;; Maximum positive offset on gfx10
+define void @global_store_saddr_i8_zext_vgpr_offset_2047(ptr addrspace(1) inreg %sbase, ptr addrspace(1) %voffset.ptr, <4 x i32> %data) {
+; GFX9-GENERIC-SDAG-LABEL: global_store_saddr_i8_zext_vgpr_offset_2047:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dword v0, v[0:1], off
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    global_store_dwordx4 v0, v[2:5], s[16:17] offset:2047
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_store_saddr_i8_zext_vgpr_offset_2047:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    global_load_dword v0, v[0:1], off
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    global_store_dwordx4 v0, v[2:5], s[16:17] offset:2047
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_store_saddr_i8_zext_vgpr_offset_2047:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    global_load_dword v0, v[0:1], off
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    global_store_dwordx4 v0, v[2:5], s[16:17] offset:2047
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90a-SDAG-LABEL: global_store_saddr_i8_zext_vgpr_offset_2047:
+; GFX90a-SDAG:       ; %bb.0:
+; GFX90a-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90a-SDAG-NEXT:    global_load_dword v0, v[0:1], off
+; GFX90a-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90a-SDAG-NEXT:    global_store_dwordx4 v0, v[2:5], s[16:17] offset:2047
+; GFX90a-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90a-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_store_saddr_i8_zext_vgpr_offset_2047:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dword v0, v[0:1], off
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    global_store_dwordx4 v0, v[2:5], s[0:1] offset:2047 sc1
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_store_saddr_i8_zext_vgpr_offset_2047:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    global_load_dword v0, v[0:1], off
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    global_store_dwordx4 v0, v[2:5], s[0:1] offset:2047 sc1
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_store_saddr_i8_zext_vgpr_offset_2047:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    global_load_dword v0, v[0:1], off
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    global_store_dwordx4 v0, v[2:5], s[0:1] offset:2047 sc1
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_store_saddr_i8_zext_vgpr_offset_2047:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dword v0, v[0:1], off
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    global_store_dwordx4 v0, v[2:5], s[16:17] offset:2047
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_store_saddr_i8_zext_vgpr_offset_2047:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    global_load_dword v0, v[0:1], off
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    global_store_dwordx4 v0, v[2:5], s[16:17] offset:2047
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_store_saddr_i8_zext_vgpr_offset_2047:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dword v0, v[0:1], off
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    global_store_dwordx4 v0, v[2:5], s[16:17] offset:2047
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_store_saddr_i8_zext_vgpr_offset_2047:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b32 v0, v[0:1], off
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    global_store_b128 v0, v[2:5], s[0:1] offset:2047
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_store_saddr_i8_zext_vgpr_offset_2047:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    global_load_b32 v0, v[0:1], off
+; GFX1250-SDAG-NEXT:    s_wait_xcnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    global_store_b128 v0, v[2:5], s[0:1] offset:2047 scope:SCOPE_DEV
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_store_saddr_i8_zext_vgpr_offset_2047:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b32 v0, v[0:1], off
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    global_store_b128 v0, v[2:5], s[0:1] offset:2047 scope:SCOPE_DEV
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_store_saddr_i8_zext_vgpr_offset_2047:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dword v0, v[0:1], off
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    global_store_dwordx4 v0, v[2:5], s[16:17] offset:2047
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_store_saddr_i8_zext_vgpr_offset_2047:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    global_load_dword v0, v[0:1], off
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    global_store_dwordx4 v0, v[2:5], s[16:17] offset:2047
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_store_saddr_i8_zext_vgpr_offset_2047:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    global_load_dword v0, v[0:1], off
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    global_store_dwordx4 v0, v[2:5], s[16:17] offset:2047
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90a-ISEL-LABEL: global_store_saddr_i8_zext_vgpr_offset_2047:
+; GFX90a-ISEL:       ; %bb.0:
+; GFX90a-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90a-ISEL-NEXT:    global_load_dword v0, v[0:1], off
+; GFX90a-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90a-ISEL-NEXT:    global_store_dwordx4 v0, v[2:5], s[16:17] offset:2047
+; GFX90a-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90a-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_store_saddr_i8_zext_vgpr_offset_2047:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dword v0, v[0:1], off
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    global_store_dwordx4 v0, v[2:5], s[0:1] offset:2047 sc1
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_store_saddr_i8_zext_vgpr_offset_2047:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    global_load_dword v0, v[0:1], off
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    global_store_dwordx4 v0, v[2:5], s[0:1] offset:2047 sc1
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_store_saddr_i8_zext_vgpr_offset_2047:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    global_load_dword v0, v[0:1], off
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    global_store_dwordx4 v0, v[2:5], s[0:1] offset:2047 sc1
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_store_saddr_i8_zext_vgpr_offset_2047:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dword v0, v[0:1], off
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    global_store_dwordx4 v0, v[2:5], s[16:17] offset:2047
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_store_saddr_i8_zext_vgpr_offset_2047:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    global_load_dword v0, v[0:1], off
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    global_store_dwordx4 v0, v[2:5], s[16:17] offset:2047
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_store_saddr_i8_zext_vgpr_offset_2047:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dword v0, v[0:1], off
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    global_store_dwordx4 v0, v[2:5], s[16:17] offset:2047
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_store_saddr_i8_zext_vgpr_offset_2047:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b32 v0, v[0:1], off
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    global_store_b128 v0, v[2:5], s[0:1] offset:2047
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_store_saddr_i8_zext_vgpr_offset_2047:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    global_load_b32 v0, v[0:1], off
+; GFX1250-ISEL-NEXT:    s_wait_xcnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    global_store_b128 v0, v[2:5], s[0:1] offset:2047 scope:SCOPE_DEV
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_store_saddr_i8_zext_vgpr_offset_2047:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b32 v0, v[0:1], off
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    global_store_b128 v0, v[2:5], s[0:1] offset:2047 scope:SCOPE_DEV
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %voffset = load i32, ptr addrspace(1) %voffset.ptr
+  %zext.offset = zext i32 %voffset to i64
+  %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
+  %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 2047
+  call void @llvm.amdgcn.global.store.b128(ptr addrspace(1) %gep1, <4 x i32> %data, metadata !2)
+  ret void
+}
+
+;; Maximum negative offset on gfx10
+define void @global_store_saddr_i8_zext_vgpr_offset_neg2048(ptr addrspace(1) inreg %sbase, ptr addrspace(1) %voffset.ptr, <4 x i32> %data) {
+; GFX9-GENERIC-SDAG-LABEL: global_store_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    global_load_dword v0, v[0:1], off
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    global_store_dwordx4 v0, v[2:5], s[16:17] offset:-2048
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_store_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT:    global_load_dword v0, v[0:1], off
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    global_store_dwordx4 v0, v[2:5], s[16:17] offset:-2048
+; GFX906-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_store_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT:    global_load_dword v0, v[0:1], off
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    global_store_dwordx4 v0, v[2:5], s[16:17] offset:-2048
+; GFX908-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90a-SDAG-LABEL: global_store_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX90a-SDAG:       ; %bb.0:
+; GFX90a-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90a-SDAG-NEXT:    global_load_dword v0, v[0:1], off
+; GFX90a-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90a-SDAG-NEXT:    global_store_dwordx4 v0, v[2:5], s[16:17] offset:-2048
+; GFX90a-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX90a-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_store_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    global_load_dword v0, v[0:1], off
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    global_store_dwordx4 v0, v[2:5], s[0:1] offset:-2048 sc0 sc1
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_store_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT:    global_load_dword v0, v[0:1], off
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    global_store_dwordx4 v0, v[2:5], s[0:1] offset:-2048 sc0 sc1
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_store_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT:    global_load_dword v0, v[0:1], off
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    global_store_dwordx4 v0, v[2:5], s[0:1] offset:-2048 sc0 sc1
+; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_store_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    global_load_dword v0, v[0:1], off
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    global_store_dwordx4 v0, v[2:5], s[16:17] offset:-2048
+; GFX10-1-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_store_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    global_load_dword v0, v[0:1], off
+; GFX1012-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT:    global_store_dwordx4 v0, v[2:5], s[16:17] offset:-2048
+; GFX1012-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_store_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    global_load_dword v0, v[0:1], off
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    global_store_dwordx4 v0, v[2:5], s[16:17] offset:-2048
+; GFX10-3-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_store_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    global_load_b32 v0, v[0:1], off
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    global_store_b128 v0, v[2:5], s[0:1] offset:-2048
+; GFX11-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_store_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    global_load_b32 v0, v[0:1], off
+; GFX1250-SDAG-NEXT:    s_wait_xcnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT:    global_store_b128 v0, v[2:5], s[0:1] offset:-2048 scope:SCOPE_SYS
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_store_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    global_load_b32 v0, v[0:1], off
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    global_store_b128 v0, v[2:5], s[0:1] offset:-2048 scope:SCOPE_SYS
+; GFX12-GENERIC-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_store_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    global_load_dword v0, v[0:1], off
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    global_store_dwordx4 v0, v[2:5], s[16:17] offset:-2048
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_store_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT:    global_load_dword v0, v[0:1], off
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    global_store_dwordx4 v0, v[2:5], s[16:17] offset:-2048
+; GFX906-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_store_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT:    global_load_dword v0, v[0:1], off
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    global_store_dwordx4 v0, v[2:5], s[16:17] offset:-2048
+; GFX908-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90a-ISEL-LABEL: global_store_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX90a-ISEL:       ; %bb.0:
+; GFX90a-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90a-ISEL-NEXT:    global_load_dword v0, v[0:1], off
+; GFX90a-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90a-ISEL-NEXT:    global_store_dwordx4 v0, v[2:5], s[16:17] offset:-2048
+; GFX90a-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX90a-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_store_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    global_load_dword v0, v[0:1], off
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    global_store_dwordx4 v0, v[2:5], s[0:1] offset:-2048 sc0 sc1
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_store_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT:    global_load_dword v0, v[0:1], off
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    global_store_dwordx4 v0, v[2:5], s[0:1] offset:-2048 sc0 sc1
+; GFX942-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_store_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT:    global_load_dword v0, v[0:1], off
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    global_store_dwordx4 v0, v[2:5], s[0:1] offset:-2048 sc0 sc1
+; GFX950-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_store_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    global_load_dword v0, v[0:1], off
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    global_store_dwordx4 v0, v[2:5], s[16:17] offset:-2048
+; GFX10-1-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_store_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    global_load_dword v0, v[0:1], off
+; GFX1012-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT:    global_store_dwordx4 v0, v[2:5], s[16:17] offset:-2048
+; GFX1012-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_store_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    global_load_dword v0, v[0:1], off
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    global_store_dwordx4 v0, v[2:5], s[16:17] offset:-2048
+; GFX10-3-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_store_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    global_load_b32 v0, v[0:1], off
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    global_store_b128 v0, v[2:5], s[0:1] offset:-2048
+; GFX11-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_store_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    global_load_b32 v0, v[0:1], off
+; GFX1250-ISEL-NEXT:    s_wait_xcnt 0x0
+; GFX1250-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT:    global_store_b128 v0, v[2:5], s[0:1] offset:-2048 scope:SCOPE_SYS
+; GFX1250-ISEL-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_store_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    global_load_b32 v0, v[0:1], off
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    global_store_b128 v0, v[2:5], s[0:1] offset:-2048 scope:SCOPE_SYS
+; GFX12-GENERIC-ISEL-NEXT:    s_setpc_b64 s[30:31]
+  %voffset = load i32, ptr addrspace(1) %voffset.ptr
+  %zext.offset = zext i32 %voffset to i64
+  %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
+  %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -2048
+  call void @llvm.amdgcn.global.store.b128(ptr addrspace(1) %gep1, <4 x i32> %data, metadata !3)
+  ret void
+}
+
+;;------------------------------------------------------------------------------
+;; Uniformity edge cases
+;;------------------------------------------------------------------------------
+
+ at ptr.in.lds = internal addrspace(3) global ptr addrspace(1) poison
+
+;; Base pointer is uniform, but also in VGPRs
+define amdgpu_kernel void @global_store_saddr_uniform_ptr_in_vgprs(i32 %voffset, <4 x i32> %data) {
+; GFX9-GENERIC-SDAG-LABEL: global_store_saddr_uniform_ptr_in_vgprs:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_load_dword s6, s[4:5], 0x24
+; GFX9-GENERIC-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x34
+; GFX9-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-GENERIC-SDAG-NEXT:    ds_read_b64 v[4:5], v0
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v6, s6
+; GFX9-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, s0
+; GFX9-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v1, s1
+; GFX9-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v2, s2
+; GFX9-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v3, s3
+; GFX9-GENERIC-SDAG-NEXT:    v_readfirstlane_b32 s0, v4
+; GFX9-GENERIC-SDAG-NEXT:    v_readfirstlane_b32 s1, v5
+; GFX9-GENERIC-SDAG-NEXT:    s_nop 4
+; GFX9-GENERIC-SDAG-NEXT:    global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX9-GENERIC-SDAG-NEXT:    s_endpgm
+;
+; GFX906-SDAG-LABEL: global_store_saddr_uniform_ptr_in_vgprs:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_load_dword s6, s[4:5], 0x24
+; GFX906-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x34
+; GFX906-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX906-SDAG-NEXT:    ds_read_b64 v[4:5], v0
+; GFX906-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX906-SDAG-NEXT:    v_mov_b32_e32 v6, s6
+; GFX906-SDAG-NEXT:    v_mov_b32_e32 v0, s0
+; GFX906-SDAG-NEXT:    v_mov_b32_e32 v1, s1
+; GFX906-SDAG-NEXT:    v_mov_b32_e32 v2, s2
+; GFX906-SDAG-NEXT:    v_mov_b32_e32 v3, s3
+; GFX906-SDAG-NEXT:    v_readfirstlane_b32 s0, v4
+; GFX906-SDAG-NEXT:    v_readfirstlane_b32 s1, v5
+; GFX906-SDAG-NEXT:    s_nop 4
+; GFX906-SDAG-NEXT:    global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX906-SDAG-NEXT:    s_endpgm
+;
+; GFX908-SDAG-LABEL: global_store_saddr_uniform_ptr_in_vgprs:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_load_dword s6, s[4:5], 0x24
+; GFX908-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x34
+; GFX908-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX908-SDAG-NEXT:    ds_read_b64 v[4:5], v0
+; GFX908-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX908-SDAG-NEXT:    v_mov_b32_e32 v6, s6
+; GFX908-SDAG-NEXT:    v_mov_b32_e32 v0, s0
+; GFX908-SDAG-NEXT:    v_mov_b32_e32 v1, s1
+; GFX908-SDAG-NEXT:    v_mov_b32_e32 v2, s2
+; GFX908-SDAG-NEXT:    v_mov_b32_e32 v3, s3
+; GFX908-SDAG-NEXT:    v_readfirstlane_b32 s0, v4
+; GFX908-SDAG-NEXT:    v_readfirstlane_b32 s1, v5
+; GFX908-SDAG-NEXT:    s_nop 4
+; GFX908-SDAG-NEXT:    global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX908-SDAG-NEXT:    s_endpgm
+;
+; GFX90a-SDAG-LABEL: global_store_saddr_uniform_ptr_in_vgprs:
+; GFX90a-SDAG:       ; %bb.0:
+; GFX90a-SDAG-NEXT:    s_load_dword s6, s[4:5], 0x24
+; GFX90a-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x34
+; GFX90a-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX90a-SDAG-NEXT:    ds_read_b64 v[0:1], v0
+; GFX90a-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX90a-SDAG-NEXT:    v_mov_b32_e32 v2, s6
+; GFX90a-SDAG-NEXT:    v_pk_mov_b32 v[6:7], s[2:3], s[2:3] op_sel:[0,1]
+; GFX90a-SDAG-NEXT:    v_pk_mov_b32 v[4:5], s[0:1], s[0:1] op_sel:[0,1]
+; GFX90a-SDAG-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX90a-SDAG-NEXT:    v_readfirstlane_b32 s1, v1
+; GFX90a-SDAG-NEXT:    s_nop 4
+; GFX90a-SDAG-NEXT:    global_store_dwordx4 v2, v[4:7], s[0:1]
+; GFX90a-SDAG-NEXT:    s_endpgm
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_store_saddr_uniform_ptr_in_vgprs:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_load_dword s6, s[4:5], 0x24
+; GFX9-4-GENERIC-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x34
+; GFX9-4-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-4-GENERIC-SDAG-NEXT:    ds_read_b64 v[0:1], v0
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v2, s6
+; GFX9-4-GENERIC-SDAG-NEXT:    v_mov_b64_e32 v[6:7], s[2:3]
+; GFX9-4-GENERIC-SDAG-NEXT:    v_mov_b64_e32 v[4:5], s[0:1]
+; GFX9-4-GENERIC-SDAG-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX9-4-GENERIC-SDAG-NEXT:    v_readfirstlane_b32 s1, v1
+; GFX9-4-GENERIC-SDAG-NEXT:    s_nop 4
+; GFX9-4-GENERIC-SDAG-NEXT:    global_store_dwordx4 v2, v[4:7], s[0:1]
+; GFX9-4-GENERIC-SDAG-NEXT:    s_endpgm
+;
+; GFX942-SDAG-LABEL: global_store_saddr_uniform_ptr_in_vgprs:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_load_dword s6, s[4:5], 0x24
+; GFX942-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x34
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX942-SDAG-NEXT:    ds_read_b64 v[0:1], v0
+; GFX942-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v2, s6
+; GFX942-SDAG-NEXT:    v_mov_b64_e32 v[6:7], s[2:3]
+; GFX942-SDAG-NEXT:    v_mov_b64_e32 v[4:5], s[0:1]
+; GFX942-SDAG-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX942-SDAG-NEXT:    v_readfirstlane_b32 s1, v1
+; GFX942-SDAG-NEXT:    s_nop 4
+; GFX942-SDAG-NEXT:    global_store_dwordx4 v2, v[4:7], s[0:1]
+; GFX942-SDAG-NEXT:    s_endpgm
+;
+; GFX950-SDAG-LABEL: global_store_saddr_uniform_ptr_in_vgprs:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_load_dword s6, s[4:5], 0x24
+; GFX950-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x34
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX950-SDAG-NEXT:    ds_read_b64 v[0:1], v0
+; GFX950-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v2, s6
+; GFX950-SDAG-NEXT:    v_mov_b64_e32 v[6:7], s[2:3]
+; GFX950-SDAG-NEXT:    v_mov_b64_e32 v[4:5], s[0:1]
+; GFX950-SDAG-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX950-SDAG-NEXT:    v_readfirstlane_b32 s1, v1
+; GFX950-SDAG-NEXT:    s_nop 4
+; GFX950-SDAG-NEXT:    global_store_dwordx4 v2, v[4:7], s[0:1]
+; GFX950-SDAG-NEXT:    s_endpgm
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_store_saddr_uniform_ptr_in_vgprs:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX10-1-GENERIC-SDAG-NEXT:    s_clause 0x1
+; GFX10-1-GENERIC-SDAG-NEXT:    s_load_dword s6, s[4:5], 0x24
+; GFX10-1-GENERIC-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x34
+; GFX10-1-GENERIC-SDAG-NEXT:    ds_read_b64 v[4:5], v0
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v6, s6
+; GFX10-1-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, s0
+; GFX10-1-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v1, s1
+; GFX10-1-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v2, s2
+; GFX10-1-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v3, s3
+; GFX10-1-GENERIC-SDAG-NEXT:    v_readfirstlane_b32 s0, v4
+; GFX10-1-GENERIC-SDAG-NEXT:    v_readfirstlane_b32 s1, v5
+; GFX10-1-GENERIC-SDAG-NEXT:    global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX10-1-GENERIC-SDAG-NEXT:    s_endpgm
+;
+; GFX1012-SDAG-LABEL: global_store_saddr_uniform_ptr_in_vgprs:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX1012-SDAG-NEXT:    s_clause 0x1
+; GFX1012-SDAG-NEXT:    s_load_dword s6, s[4:5], 0x24
+; GFX1012-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x34
+; GFX1012-SDAG-NEXT:    ds_read_b64 v[4:5], v0
+; GFX1012-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    v_mov_b32_e32 v6, s6
+; GFX1012-SDAG-NEXT:    v_mov_b32_e32 v0, s0
+; GFX1012-SDAG-NEXT:    v_mov_b32_e32 v1, s1
+; GFX1012-SDAG-NEXT:    v_mov_b32_e32 v2, s2
+; GFX1012-SDAG-NEXT:    v_mov_b32_e32 v3, s3
+; GFX1012-SDAG-NEXT:    v_readfirstlane_b32 s0, v4
+; GFX1012-SDAG-NEXT:    v_readfirstlane_b32 s1, v5
+; GFX1012-SDAG-NEXT:    global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX1012-SDAG-NEXT:    s_endpgm
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_store_saddr_uniform_ptr_in_vgprs:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX10-3-GENERIC-SDAG-NEXT:    s_clause 0x1
+; GFX10-3-GENERIC-SDAG-NEXT:    s_load_dword s6, s[4:5], 0x24
+; GFX10-3-GENERIC-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x34
+; GFX10-3-GENERIC-SDAG-NEXT:    ds_read_b64 v[4:5], v0
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v6, s6
+; GFX10-3-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, s0
+; GFX10-3-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v1, s1
+; GFX10-3-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v2, s2
+; GFX10-3-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v3, s3
+; GFX10-3-GENERIC-SDAG-NEXT:    v_readfirstlane_b32 s0, v4
+; GFX10-3-GENERIC-SDAG-NEXT:    v_readfirstlane_b32 s1, v5
+; GFX10-3-GENERIC-SDAG-NEXT:    global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX10-3-GENERIC-SDAG-NEXT:    s_endpgm
+;
+; GFX11-GENERIC-SDAG-LABEL: global_store_saddr_uniform_ptr_in_vgprs:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_clause 0x1
+; GFX11-GENERIC-SDAG-NEXT:    s_load_b32 s6, s[4:5], 0x24
+; GFX11-GENERIC-SDAG-NEXT:    s_load_b128 s[0:3], s[4:5], 0x34
+; GFX11-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX11-GENERIC-SDAG-NEXT:    ds_load_b64 v[4:5], v0
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v6, s6
+; GFX11-GENERIC-SDAG-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX11-GENERIC-SDAG-NEXT:    v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
+; GFX11-GENERIC-SDAG-NEXT:    v_readfirstlane_b32 s0, v4
+; GFX11-GENERIC-SDAG-NEXT:    v_readfirstlane_b32 s1, v5
+; GFX11-GENERIC-SDAG-NEXT:    global_store_b128 v6, v[0:3], s[0:1]
+; GFX11-GENERIC-SDAG-NEXT:    s_endpgm
+;
+; GFX1250-SDAG-LABEL: global_store_saddr_uniform_ptr_in_vgprs:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
+; GFX1250-SDAG-NEXT:    s_clause 0x1
+; GFX1250-SDAG-NEXT:    s_load_b32 s6, s[4:5], 0x24
+; GFX1250-SDAG-NEXT:    s_load_b128 s[0:3], s[4:5], 0x34
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v6, s6
+; GFX1250-SDAG-NEXT:    ds_load_b64 v[4:5], v0
+; GFX1250-SDAG-NEXT:    v_mov_b64_e32 v[0:1], s[0:1]
+; GFX1250-SDAG-NEXT:    v_mov_b64_e32 v[2:3], s[2:3]
+; GFX1250-SDAG-NEXT:    s_wait_dscnt 0x0
+; GFX1250-SDAG-NEXT:    v_readfirstlane_b32 s0, v4
+; GFX1250-SDAG-NEXT:    v_readfirstlane_b32 s1, v5
+; GFX1250-SDAG-NEXT:    global_store_b128 v6, v[0:3], s[0:1]
+; GFX1250-SDAG-NEXT:    s_endpgm
+;
+; GFX12-GENERIC-SDAG-LABEL: global_store_saddr_uniform_ptr_in_vgprs:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_clause 0x1
+; GFX12-GENERIC-SDAG-NEXT:    s_load_b32 s6, s[4:5], 0x24
+; GFX12-GENERIC-SDAG-NEXT:    s_load_b128 s[0:3], s[4:5], 0x34
+; GFX12-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX12-GENERIC-SDAG-NEXT:    ds_load_b64 v[4:5], v0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v6, s6
+; GFX12-GENERIC-SDAG-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX12-GENERIC-SDAG-NEXT:    v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    v_readfirstlane_b32 s0, v4
+; GFX12-GENERIC-SDAG-NEXT:    v_readfirstlane_b32 s1, v5
+; GFX12-GENERIC-SDAG-NEXT:    global_store_b128 v6, v[0:3], s[0:1]
+; GFX12-GENERIC-SDAG-NEXT:    s_endpgm
+;
+; GFX9-GENERIC-ISEL-LABEL: global_store_saddr_uniform_ptr_in_vgprs:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_load_dword s6, s[4:5], 0x24
+; GFX9-GENERIC-ISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x34
+; GFX9-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-GENERIC-ISEL-NEXT:    ds_read_b64 v[0:1], v0
+; GFX9-GENERIC-ISEL-NEXT:    s_mov_b32 s7, 0
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v2, s6
+; GFX9-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v3, s7
+; GFX9-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v4, vcc, v0, v2
+; GFX9-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v5, vcc, v1, v3, vcc
+; GFX9-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, s0
+; GFX9-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, s1
+; GFX9-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v2, s2
+; GFX9-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v3, s3
+; GFX9-GENERIC-ISEL-NEXT:    global_store_dwordx4 v[4:5], v[0:3], off
+; GFX9-GENERIC-ISEL-NEXT:    s_endpgm
+;
+; GFX906-ISEL-LABEL: global_store_saddr_uniform_ptr_in_vgprs:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_load_dword s6, s[4:5], 0x24
+; GFX906-ISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x34
+; GFX906-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX906-ISEL-NEXT:    ds_read_b64 v[0:1], v0
+; GFX906-ISEL-NEXT:    s_mov_b32 s7, 0
+; GFX906-ISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX906-ISEL-NEXT:    v_mov_b32_e32 v2, s6
+; GFX906-ISEL-NEXT:    v_mov_b32_e32 v3, s7
+; GFX906-ISEL-NEXT:    v_add_co_u32_e32 v4, vcc, v0, v2
+; GFX906-ISEL-NEXT:    v_addc_co_u32_e32 v5, vcc, v1, v3, vcc
+; GFX906-ISEL-NEXT:    v_mov_b32_e32 v0, s0
+; GFX906-ISEL-NEXT:    v_mov_b32_e32 v1, s1
+; GFX906-ISEL-NEXT:    v_mov_b32_e32 v2, s2
+; GFX906-ISEL-NEXT:    v_mov_b32_e32 v3, s3
+; GFX906-ISEL-NEXT:    global_store_dwordx4 v[4:5], v[0:3], off
+; GFX906-ISEL-NEXT:    s_endpgm
+;
+; GFX908-ISEL-LABEL: global_store_saddr_uniform_ptr_in_vgprs:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_load_dword s6, s[4:5], 0x24
+; GFX908-ISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x34
+; GFX908-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX908-ISEL-NEXT:    ds_read_b64 v[0:1], v0
+; GFX908-ISEL-NEXT:    s_mov_b32 s7, 0
+; GFX908-ISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX908-ISEL-NEXT:    v_mov_b32_e32 v2, s6
+; GFX908-ISEL-NEXT:    v_mov_b32_e32 v3, s7
+; GFX908-ISEL-NEXT:    v_add_co_u32_e32 v4, vcc, v0, v2
+; GFX908-ISEL-NEXT:    v_addc_co_u32_e32 v5, vcc, v1, v3, vcc
+; GFX908-ISEL-NEXT:    v_mov_b32_e32 v0, s0
+; GFX908-ISEL-NEXT:    v_mov_b32_e32 v1, s1
+; GFX908-ISEL-NEXT:    v_mov_b32_e32 v2, s2
+; GFX908-ISEL-NEXT:    v_mov_b32_e32 v3, s3
+; GFX908-ISEL-NEXT:    global_store_dwordx4 v[4:5], v[0:3], off
+; GFX908-ISEL-NEXT:    s_endpgm
+;
+; GFX90a-ISEL-LABEL: global_store_saddr_uniform_ptr_in_vgprs:
+; GFX90a-ISEL:       ; %bb.0:
+; GFX90a-ISEL-NEXT:    s_load_dword s6, s[4:5], 0x24
+; GFX90a-ISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x34
+; GFX90a-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX90a-ISEL-NEXT:    ds_read_b64 v[0:1], v0
+; GFX90a-ISEL-NEXT:    s_mov_b32 s7, 0
+; GFX90a-ISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX90a-ISEL-NEXT:    v_pk_mov_b32 v[2:3], s[6:7], s[6:7] op_sel:[0,1]
+; GFX90a-ISEL-NEXT:    v_add_co_u32_e32 v4, vcc, v0, v2
+; GFX90a-ISEL-NEXT:    v_addc_co_u32_e32 v5, vcc, v1, v3, vcc
+; GFX90a-ISEL-NEXT:    v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1]
+; GFX90a-ISEL-NEXT:    v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1]
+; GFX90a-ISEL-NEXT:    global_store_dwordx4 v[4:5], v[0:3], off
+; GFX90a-ISEL-NEXT:    s_endpgm
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_store_saddr_uniform_ptr_in_vgprs:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_load_dword s6, s[4:5], 0x24
+; GFX9-4-GENERIC-ISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x34
+; GFX9-4-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-4-GENERIC-ISEL-NEXT:    ds_read_b64 v[0:1], v0
+; GFX9-4-GENERIC-ISEL-NEXT:    s_mov_b32 s7, 0
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    v_mov_b64_e32 v[2:3], s[6:7]
+; GFX9-4-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v4, vcc, v0, v2
+; GFX9-4-GENERIC-ISEL-NEXT:    s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v5, vcc, v1, v3, vcc
+; GFX9-4-GENERIC-ISEL-NEXT:    v_mov_b64_e32 v[0:1], s[0:1]
+; GFX9-4-GENERIC-ISEL-NEXT:    v_mov_b64_e32 v[2:3], s[2:3]
+; GFX9-4-GENERIC-ISEL-NEXT:    global_store_dwordx4 v[4:5], v[0:3], off
+; GFX9-4-GENERIC-ISEL-NEXT:    s_endpgm
+;
+; GFX942-ISEL-LABEL: global_store_saddr_uniform_ptr_in_vgprs:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_load_dword s6, s[4:5], 0x24
+; GFX942-ISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x34
+; GFX942-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX942-ISEL-NEXT:    ds_read_b64 v[0:1], v0
+; GFX942-ISEL-NEXT:    s_mov_b32 s7, 0
+; GFX942-ISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-ISEL-NEXT:    v_mov_b64_e32 v[2:3], s[6:7]
+; GFX942-ISEL-NEXT:    v_add_co_u32_e32 v4, vcc, v0, v2
+; GFX942-ISEL-NEXT:    s_nop 1
+; GFX942-ISEL-NEXT:    v_addc_co_u32_e32 v5, vcc, v1, v3, vcc
+; GFX942-ISEL-NEXT:    v_mov_b64_e32 v[0:1], s[0:1]
+; GFX942-ISEL-NEXT:    v_mov_b64_e32 v[2:3], s[2:3]
+; GFX942-ISEL-NEXT:    global_store_dwordx4 v[4:5], v[0:3], off
+; GFX942-ISEL-NEXT:    s_endpgm
+;
+; GFX950-ISEL-LABEL: global_store_saddr_uniform_ptr_in_vgprs:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_load_dword s6, s[4:5], 0x24
+; GFX950-ISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x34
+; GFX950-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX950-ISEL-NEXT:    ds_read_b64 v[0:1], v0
+; GFX950-ISEL-NEXT:    s_mov_b32 s7, 0
+; GFX950-ISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX950-ISEL-NEXT:    v_mov_b64_e32 v[2:3], s[6:7]
+; GFX950-ISEL-NEXT:    v_add_co_u32_e32 v4, vcc, v0, v2
+; GFX950-ISEL-NEXT:    s_nop 1
+; GFX950-ISEL-NEXT:    v_addc_co_u32_e32 v5, vcc, v1, v3, vcc
+; GFX950-ISEL-NEXT:    v_mov_b64_e32 v[0:1], s[0:1]
+; GFX950-ISEL-NEXT:    v_mov_b64_e32 v[2:3], s[2:3]
+; GFX950-ISEL-NEXT:    global_store_dwordx4 v[4:5], v[0:3], off
+; GFX950-ISEL-NEXT:    s_endpgm
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_store_saddr_uniform_ptr_in_vgprs:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX10-1-GENERIC-ISEL-NEXT:    s_clause 0x1
+; GFX10-1-GENERIC-ISEL-NEXT:    s_load_dword s6, s[4:5], 0x24
+; GFX10-1-GENERIC-ISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x34
+; GFX10-1-GENERIC-ISEL-NEXT:    s_mov_b32 s7, 0
+; GFX10-1-GENERIC-ISEL-NEXT:    ds_read_b64 v[0:1], v0
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v2, s6
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v3, s7
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_u32 v4, vcc_lo, v0, v2
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e32 v5, vcc_lo, v1, v3, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, s0
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, s1
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v2, s2
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v3, s3
+; GFX10-1-GENERIC-ISEL-NEXT:    global_store_dwordx4 v[4:5], v[0:3], off
+; GFX10-1-GENERIC-ISEL-NEXT:    s_endpgm
+;
+; GFX1012-ISEL-LABEL: global_store_saddr_uniform_ptr_in_vgprs:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX1012-ISEL-NEXT:    s_clause 0x1
+; GFX1012-ISEL-NEXT:    s_load_dword s6, s[4:5], 0x24
+; GFX1012-ISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x34
+; GFX1012-ISEL-NEXT:    s_mov_b32 s7, 0
+; GFX1012-ISEL-NEXT:    ds_read_b64 v[0:1], v0
+; GFX1012-ISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v2, s6
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v3, s7
+; GFX1012-ISEL-NEXT:    v_add_co_u32 v4, vcc_lo, v0, v2
+; GFX1012-ISEL-NEXT:    v_add_co_ci_u32_e32 v5, vcc_lo, v1, v3, vcc_lo
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v0, s0
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v1, s1
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v2, s2
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v3, s3
+; GFX1012-ISEL-NEXT:    global_store_dwordx4 v[4:5], v[0:3], off
+; GFX1012-ISEL-NEXT:    s_endpgm
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_store_saddr_uniform_ptr_in_vgprs:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX10-3-GENERIC-ISEL-NEXT:    s_clause 0x1
+; GFX10-3-GENERIC-ISEL-NEXT:    s_load_dword s6, s[4:5], 0x24
+; GFX10-3-GENERIC-ISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x34
+; GFX10-3-GENERIC-ISEL-NEXT:    s_mov_b32 s7, 0
+; GFX10-3-GENERIC-ISEL-NEXT:    ds_read_b64 v[0:1], v0
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v2, s6
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v3, s7
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_u32 v4, vcc_lo, v0, v2
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v5, null, v1, v3, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, s0
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, s1
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v2, s2
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v3, s3
+; GFX10-3-GENERIC-ISEL-NEXT:    global_store_dwordx4 v[4:5], v[0:3], off
+; GFX10-3-GENERIC-ISEL-NEXT:    s_endpgm
+;
+; GFX11-GENERIC-ISEL-LABEL: global_store_saddr_uniform_ptr_in_vgprs:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_clause 0x1
+; GFX11-GENERIC-ISEL-NEXT:    s_load_b32 s6, s[4:5], 0x24
+; GFX11-GENERIC-ISEL-NEXT:    s_load_b128 s[0:3], s[4:5], 0x34
+; GFX11-GENERIC-ISEL-NEXT:    s_mov_b32 s7, 0
+; GFX11-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    v_dual_mov_b32 v2, s6 :: v_dual_mov_b32 v3, s7
+; GFX11-GENERIC-ISEL-NEXT:    ds_load_b64 v[0:1], v0
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_u32 v4, vcc_lo, v0, v2
+; GFX11-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v5, null, v1, v3, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v3, s3
+; GFX11-GENERIC-ISEL-NEXT:    v_dual_mov_b32 v1, s1 :: v_dual_mov_b32 v2, s2
+; GFX11-GENERIC-ISEL-NEXT:    global_store_b128 v[4:5], v[0:3], off
+; GFX11-GENERIC-ISEL-NEXT:    s_endpgm
+;
+; GFX1250-ISEL-LABEL: global_store_saddr_uniform_ptr_in_vgprs:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
+; GFX1250-ISEL-NEXT:    s_clause 0x1
+; GFX1250-ISEL-NEXT:    s_load_b32 s6, s[4:5], 0x24
+; GFX1250-ISEL-NEXT:    s_load_b128 s[0:3], s[4:5], 0x34
+; GFX1250-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX1250-ISEL-NEXT:    s_mov_b32 s7, 0
+; GFX1250-ISEL-NEXT:    ds_load_b64 v[0:1], v0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    v_mov_b64_e32 v[2:3], s[6:7]
+; GFX1250-ISEL-NEXT:    s_wait_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250-ISEL-NEXT:    v_add_co_u32 v4, vcc_lo, v0, v2
+; GFX1250-ISEL-NEXT:    v_add_co_ci_u32_e64 v5, null, v1, v3, vcc_lo
+; GFX1250-ISEL-NEXT:    v_mov_b64_e32 v[0:1], s[0:1]
+; GFX1250-ISEL-NEXT:    v_mov_b64_e32 v[2:3], s[2:3]
+; GFX1250-ISEL-NEXT:    global_store_b128 v[4:5], v[0:3], off
+; GFX1250-ISEL-NEXT:    s_endpgm
+;
+; GFX12-GENERIC-ISEL-LABEL: global_store_saddr_uniform_ptr_in_vgprs:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_clause 0x1
+; GFX12-GENERIC-ISEL-NEXT:    s_load_b32 s6, s[4:5], 0x24
+; GFX12-GENERIC-ISEL-NEXT:    s_load_b128 s[0:3], s[4:5], 0x34
+; GFX12-GENERIC-ISEL-NEXT:    s_mov_b32 s7, 0
+; GFX12-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    v_dual_mov_b32 v2, s6 :: v_dual_mov_b32 v3, s7
+; GFX12-GENERIC-ISEL-NEXT:    ds_load_b64 v[0:1], v0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    v_add_co_u32 v4, vcc_lo, v0, v2
+; GFX12-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX12-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v5, null, v1, v3, vcc_lo
+; GFX12-GENERIC-ISEL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v3, s3
+; GFX12-GENERIC-ISEL-NEXT:    v_dual_mov_b32 v1, s1 :: v_dual_mov_b32 v2, s2
+; GFX12-GENERIC-ISEL-NEXT:    global_store_b128 v[4:5], v[0:3], off
+; GFX12-GENERIC-ISEL-NEXT:    s_endpgm
+  %sbase = load ptr addrspace(1), ptr addrspace(3) @ptr.in.lds
+  %zext.offset = zext i32 %voffset to i64
+  %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
+  call void @llvm.amdgcn.global.store.b128(ptr addrspace(1) %gep0, <4 x i32> %data, metadata !0)
+  ret void
+}
+
+;; Base pointer is uniform, but also in VGPRs, with imm offset
+define amdgpu_kernel void @global_store_saddr_uniform_ptr_in_vgprs_immoffset(i32 %voffset, <4 x i32> %data) {
+; GFX9-GENERIC-SDAG-LABEL: global_store_saddr_uniform_ptr_in_vgprs_immoffset:
+; GFX9-GENERIC-SDAG:       ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT:    s_load_dword s6, s[4:5], 0x24
+; GFX9-GENERIC-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x34
+; GFX9-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-GENERIC-SDAG-NEXT:    ds_read_b64 v[4:5], v0
+; GFX9-GENERIC-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v6, s6
+; GFX9-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, s0
+; GFX9-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v1, s1
+; GFX9-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v2, s2
+; GFX9-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v3, s3
+; GFX9-GENERIC-SDAG-NEXT:    v_readfirstlane_b32 s0, v4
+; GFX9-GENERIC-SDAG-NEXT:    v_readfirstlane_b32 s1, v5
+; GFX9-GENERIC-SDAG-NEXT:    s_nop 4
+; GFX9-GENERIC-SDAG-NEXT:    global_store_dwordx4 v6, v[0:3], s[0:1] offset:-120
+; GFX9-GENERIC-SDAG-NEXT:    s_endpgm
+;
+; GFX906-SDAG-LABEL: global_store_saddr_uniform_ptr_in_vgprs_immoffset:
+; GFX906-SDAG:       ; %bb.0:
+; GFX906-SDAG-NEXT:    s_load_dword s6, s[4:5], 0x24
+; GFX906-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x34
+; GFX906-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX906-SDAG-NEXT:    ds_read_b64 v[4:5], v0
+; GFX906-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX906-SDAG-NEXT:    v_mov_b32_e32 v6, s6
+; GFX906-SDAG-NEXT:    v_mov_b32_e32 v0, s0
+; GFX906-SDAG-NEXT:    v_mov_b32_e32 v1, s1
+; GFX906-SDAG-NEXT:    v_mov_b32_e32 v2, s2
+; GFX906-SDAG-NEXT:    v_mov_b32_e32 v3, s3
+; GFX906-SDAG-NEXT:    v_readfirstlane_b32 s0, v4
+; GFX906-SDAG-NEXT:    v_readfirstlane_b32 s1, v5
+; GFX906-SDAG-NEXT:    s_nop 4
+; GFX906-SDAG-NEXT:    global_store_dwordx4 v6, v[0:3], s[0:1] offset:-120
+; GFX906-SDAG-NEXT:    s_endpgm
+;
+; GFX908-SDAG-LABEL: global_store_saddr_uniform_ptr_in_vgprs_immoffset:
+; GFX908-SDAG:       ; %bb.0:
+; GFX908-SDAG-NEXT:    s_load_dword s6, s[4:5], 0x24
+; GFX908-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x34
+; GFX908-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX908-SDAG-NEXT:    ds_read_b64 v[4:5], v0
+; GFX908-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX908-SDAG-NEXT:    v_mov_b32_e32 v6, s6
+; GFX908-SDAG-NEXT:    v_mov_b32_e32 v0, s0
+; GFX908-SDAG-NEXT:    v_mov_b32_e32 v1, s1
+; GFX908-SDAG-NEXT:    v_mov_b32_e32 v2, s2
+; GFX908-SDAG-NEXT:    v_mov_b32_e32 v3, s3
+; GFX908-SDAG-NEXT:    v_readfirstlane_b32 s0, v4
+; GFX908-SDAG-NEXT:    v_readfirstlane_b32 s1, v5
+; GFX908-SDAG-NEXT:    s_nop 4
+; GFX908-SDAG-NEXT:    global_store_dwordx4 v6, v[0:3], s[0:1] offset:-120
+; GFX908-SDAG-NEXT:    s_endpgm
+;
+; GFX90a-SDAG-LABEL: global_store_saddr_uniform_ptr_in_vgprs_immoffset:
+; GFX90a-SDAG:       ; %bb.0:
+; GFX90a-SDAG-NEXT:    s_load_dword s6, s[4:5], 0x24
+; GFX90a-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x34
+; GFX90a-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX90a-SDAG-NEXT:    ds_read_b64 v[0:1], v0
+; GFX90a-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX90a-SDAG-NEXT:    v_mov_b32_e32 v2, s6
+; GFX90a-SDAG-NEXT:    v_pk_mov_b32 v[6:7], s[2:3], s[2:3] op_sel:[0,1]
+; GFX90a-SDAG-NEXT:    v_pk_mov_b32 v[4:5], s[0:1], s[0:1] op_sel:[0,1]
+; GFX90a-SDAG-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX90a-SDAG-NEXT:    v_readfirstlane_b32 s1, v1
+; GFX90a-SDAG-NEXT:    s_nop 4
+; GFX90a-SDAG-NEXT:    global_store_dwordx4 v2, v[4:7], s[0:1] offset:-120
+; GFX90a-SDAG-NEXT:    s_endpgm
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_store_saddr_uniform_ptr_in_vgprs_immoffset:
+; GFX9-4-GENERIC-SDAG:       ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT:    s_load_dword s6, s[4:5], 0x24
+; GFX9-4-GENERIC-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x34
+; GFX9-4-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-4-GENERIC-SDAG-NEXT:    ds_read_b64 v[0:1], v0
+; GFX9-4-GENERIC-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v2, s6
+; GFX9-4-GENERIC-SDAG-NEXT:    v_mov_b64_e32 v[6:7], s[2:3]
+; GFX9-4-GENERIC-SDAG-NEXT:    v_mov_b64_e32 v[4:5], s[0:1]
+; GFX9-4-GENERIC-SDAG-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX9-4-GENERIC-SDAG-NEXT:    v_readfirstlane_b32 s1, v1
+; GFX9-4-GENERIC-SDAG-NEXT:    s_nop 4
+; GFX9-4-GENERIC-SDAG-NEXT:    global_store_dwordx4 v2, v[4:7], s[0:1] offset:-120 sc0
+; GFX9-4-GENERIC-SDAG-NEXT:    s_endpgm
+;
+; GFX942-SDAG-LABEL: global_store_saddr_uniform_ptr_in_vgprs_immoffset:
+; GFX942-SDAG:       ; %bb.0:
+; GFX942-SDAG-NEXT:    s_load_dword s6, s[4:5], 0x24
+; GFX942-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x34
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX942-SDAG-NEXT:    ds_read_b64 v[0:1], v0
+; GFX942-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v2, s6
+; GFX942-SDAG-NEXT:    v_mov_b64_e32 v[6:7], s[2:3]
+; GFX942-SDAG-NEXT:    v_mov_b64_e32 v[4:5], s[0:1]
+; GFX942-SDAG-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX942-SDAG-NEXT:    v_readfirstlane_b32 s1, v1
+; GFX942-SDAG-NEXT:    s_nop 4
+; GFX942-SDAG-NEXT:    global_store_dwordx4 v2, v[4:7], s[0:1] offset:-120 sc0
+; GFX942-SDAG-NEXT:    s_endpgm
+;
+; GFX950-SDAG-LABEL: global_store_saddr_uniform_ptr_in_vgprs_immoffset:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    s_load_dword s6, s[4:5], 0x24
+; GFX950-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x34
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX950-SDAG-NEXT:    ds_read_b64 v[0:1], v0
+; GFX950-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v2, s6
+; GFX950-SDAG-NEXT:    v_mov_b64_e32 v[6:7], s[2:3]
+; GFX950-SDAG-NEXT:    v_mov_b64_e32 v[4:5], s[0:1]
+; GFX950-SDAG-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX950-SDAG-NEXT:    v_readfirstlane_b32 s1, v1
+; GFX950-SDAG-NEXT:    s_nop 4
+; GFX950-SDAG-NEXT:    global_store_dwordx4 v2, v[4:7], s[0:1] offset:-120 sc0
+; GFX950-SDAG-NEXT:    s_endpgm
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_store_saddr_uniform_ptr_in_vgprs_immoffset:
+; GFX10-1-GENERIC-SDAG:       ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX10-1-GENERIC-SDAG-NEXT:    s_clause 0x1
+; GFX10-1-GENERIC-SDAG-NEXT:    s_load_dword s6, s[4:5], 0x24
+; GFX10-1-GENERIC-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x34
+; GFX10-1-GENERIC-SDAG-NEXT:    ds_read_b64 v[4:5], v0
+; GFX10-1-GENERIC-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v6, s6
+; GFX10-1-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, s0
+; GFX10-1-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v1, s1
+; GFX10-1-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v2, s2
+; GFX10-1-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v3, s3
+; GFX10-1-GENERIC-SDAG-NEXT:    v_readfirstlane_b32 s0, v4
+; GFX10-1-GENERIC-SDAG-NEXT:    v_readfirstlane_b32 s1, v5
+; GFX10-1-GENERIC-SDAG-NEXT:    global_store_dwordx4 v6, v[0:3], s[0:1] offset:-120
+; GFX10-1-GENERIC-SDAG-NEXT:    s_endpgm
+;
+; GFX1012-SDAG-LABEL: global_store_saddr_uniform_ptr_in_vgprs_immoffset:
+; GFX1012-SDAG:       ; %bb.0:
+; GFX1012-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX1012-SDAG-NEXT:    s_clause 0x1
+; GFX1012-SDAG-NEXT:    s_load_dword s6, s[4:5], 0x24
+; GFX1012-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x34
+; GFX1012-SDAG-NEXT:    ds_read_b64 v[4:5], v0
+; GFX1012-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX1012-SDAG-NEXT:    v_mov_b32_e32 v6, s6
+; GFX1012-SDAG-NEXT:    v_mov_b32_e32 v0, s0
+; GFX1012-SDAG-NEXT:    v_mov_b32_e32 v1, s1
+; GFX1012-SDAG-NEXT:    v_mov_b32_e32 v2, s2
+; GFX1012-SDAG-NEXT:    v_mov_b32_e32 v3, s3
+; GFX1012-SDAG-NEXT:    v_readfirstlane_b32 s0, v4
+; GFX1012-SDAG-NEXT:    v_readfirstlane_b32 s1, v5
+; GFX1012-SDAG-NEXT:    global_store_dwordx4 v6, v[0:3], s[0:1] offset:-120
+; GFX1012-SDAG-NEXT:    s_endpgm
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_store_saddr_uniform_ptr_in_vgprs_immoffset:
+; GFX10-3-GENERIC-SDAG:       ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX10-3-GENERIC-SDAG-NEXT:    s_clause 0x1
+; GFX10-3-GENERIC-SDAG-NEXT:    s_load_dword s6, s[4:5], 0x24
+; GFX10-3-GENERIC-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x34
+; GFX10-3-GENERIC-SDAG-NEXT:    ds_read_b64 v[4:5], v0
+; GFX10-3-GENERIC-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v6, s6
+; GFX10-3-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, s0
+; GFX10-3-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v1, s1
+; GFX10-3-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v2, s2
+; GFX10-3-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v3, s3
+; GFX10-3-GENERIC-SDAG-NEXT:    v_readfirstlane_b32 s0, v4
+; GFX10-3-GENERIC-SDAG-NEXT:    v_readfirstlane_b32 s1, v5
+; GFX10-3-GENERIC-SDAG-NEXT:    global_store_dwordx4 v6, v[0:3], s[0:1] offset:-120
+; GFX10-3-GENERIC-SDAG-NEXT:    s_endpgm
+;
+; GFX11-GENERIC-SDAG-LABEL: global_store_saddr_uniform_ptr_in_vgprs_immoffset:
+; GFX11-GENERIC-SDAG:       ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT:    s_clause 0x1
+; GFX11-GENERIC-SDAG-NEXT:    s_load_b32 s6, s[4:5], 0x24
+; GFX11-GENERIC-SDAG-NEXT:    s_load_b128 s[0:3], s[4:5], 0x34
+; GFX11-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX11-GENERIC-SDAG-NEXT:    ds_load_b64 v[4:5], v0
+; GFX11-GENERIC-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v6, s6
+; GFX11-GENERIC-SDAG-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX11-GENERIC-SDAG-NEXT:    v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
+; GFX11-GENERIC-SDAG-NEXT:    v_readfirstlane_b32 s0, v4
+; GFX11-GENERIC-SDAG-NEXT:    v_readfirstlane_b32 s1, v5
+; GFX11-GENERIC-SDAG-NEXT:    global_store_b128 v6, v[0:3], s[0:1] offset:-120
+; GFX11-GENERIC-SDAG-NEXT:    s_endpgm
+;
+; GFX1250-SDAG-LABEL: global_store_saddr_uniform_ptr_in_vgprs_immoffset:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
+; GFX1250-SDAG-NEXT:    s_clause 0x1
+; GFX1250-SDAG-NEXT:    s_load_b32 s6, s[4:5], 0x24
+; GFX1250-SDAG-NEXT:    s_load_b128 s[0:3], s[4:5], 0x34
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v6, s6
+; GFX1250-SDAG-NEXT:    ds_load_b64 v[4:5], v0
+; GFX1250-SDAG-NEXT:    v_mov_b64_e32 v[0:1], s[0:1]
+; GFX1250-SDAG-NEXT:    v_mov_b64_e32 v[2:3], s[2:3]
+; GFX1250-SDAG-NEXT:    s_wait_dscnt 0x0
+; GFX1250-SDAG-NEXT:    v_readfirstlane_b32 s0, v4
+; GFX1250-SDAG-NEXT:    v_readfirstlane_b32 s1, v5
+; GFX1250-SDAG-NEXT:    global_store_b128 v6, v[0:3], s[0:1] offset:-120
+; GFX1250-SDAG-NEXT:    s_endpgm
+;
+; GFX12-GENERIC-SDAG-LABEL: global_store_saddr_uniform_ptr_in_vgprs_immoffset:
+; GFX12-GENERIC-SDAG:       ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT:    s_clause 0x1
+; GFX12-GENERIC-SDAG-NEXT:    s_load_b32 s6, s[4:5], 0x24
+; GFX12-GENERIC-SDAG-NEXT:    s_load_b128 s[0:3], s[4:5], 0x34
+; GFX12-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX12-GENERIC-SDAG-NEXT:    ds_load_b64 v[4:5], v0
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    v_mov_b32_e32 v6, s6
+; GFX12-GENERIC-SDAG-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX12-GENERIC-SDAG-NEXT:    v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
+; GFX12-GENERIC-SDAG-NEXT:    s_wait_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT:    v_readfirstlane_b32 s0, v4
+; GFX12-GENERIC-SDAG-NEXT:    v_readfirstlane_b32 s1, v5
+; GFX12-GENERIC-SDAG-NEXT:    global_store_b128 v6, v[0:3], s[0:1] offset:-120 scope:SCOPE_SE
+; GFX12-GENERIC-SDAG-NEXT:    s_endpgm
+;
+; GFX9-GENERIC-ISEL-LABEL: global_store_saddr_uniform_ptr_in_vgprs_immoffset:
+; GFX9-GENERIC-ISEL:       ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT:    s_load_dword s6, s[4:5], 0x24
+; GFX9-GENERIC-ISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x34
+; GFX9-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-GENERIC-ISEL-NEXT:    ds_read_b64 v[0:1], v0
+; GFX9-GENERIC-ISEL-NEXT:    s_mov_b32 s7, 0
+; GFX9-GENERIC-ISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v2, s6
+; GFX9-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v3, s7
+; GFX9-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v4, vcc, v0, v2
+; GFX9-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v5, vcc, v1, v3, vcc
+; GFX9-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, s0
+; GFX9-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, s1
+; GFX9-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v2, s2
+; GFX9-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v3, s3
+; GFX9-GENERIC-ISEL-NEXT:    global_store_dwordx4 v[4:5], v[0:3], off offset:-120
+; GFX9-GENERIC-ISEL-NEXT:    s_endpgm
+;
+; GFX906-ISEL-LABEL: global_store_saddr_uniform_ptr_in_vgprs_immoffset:
+; GFX906-ISEL:       ; %bb.0:
+; GFX906-ISEL-NEXT:    s_load_dword s6, s[4:5], 0x24
+; GFX906-ISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x34
+; GFX906-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX906-ISEL-NEXT:    ds_read_b64 v[0:1], v0
+; GFX906-ISEL-NEXT:    s_mov_b32 s7, 0
+; GFX906-ISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX906-ISEL-NEXT:    v_mov_b32_e32 v2, s6
+; GFX906-ISEL-NEXT:    v_mov_b32_e32 v3, s7
+; GFX906-ISEL-NEXT:    v_add_co_u32_e32 v4, vcc, v0, v2
+; GFX906-ISEL-NEXT:    v_addc_co_u32_e32 v5, vcc, v1, v3, vcc
+; GFX906-ISEL-NEXT:    v_mov_b32_e32 v0, s0
+; GFX906-ISEL-NEXT:    v_mov_b32_e32 v1, s1
+; GFX906-ISEL-NEXT:    v_mov_b32_e32 v2, s2
+; GFX906-ISEL-NEXT:    v_mov_b32_e32 v3, s3
+; GFX906-ISEL-NEXT:    global_store_dwordx4 v[4:5], v[0:3], off offset:-120
+; GFX906-ISEL-NEXT:    s_endpgm
+;
+; GFX908-ISEL-LABEL: global_store_saddr_uniform_ptr_in_vgprs_immoffset:
+; GFX908-ISEL:       ; %bb.0:
+; GFX908-ISEL-NEXT:    s_load_dword s6, s[4:5], 0x24
+; GFX908-ISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x34
+; GFX908-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX908-ISEL-NEXT:    ds_read_b64 v[0:1], v0
+; GFX908-ISEL-NEXT:    s_mov_b32 s7, 0
+; GFX908-ISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX908-ISEL-NEXT:    v_mov_b32_e32 v2, s6
+; GFX908-ISEL-NEXT:    v_mov_b32_e32 v3, s7
+; GFX908-ISEL-NEXT:    v_add_co_u32_e32 v4, vcc, v0, v2
+; GFX908-ISEL-NEXT:    v_addc_co_u32_e32 v5, vcc, v1, v3, vcc
+; GFX908-ISEL-NEXT:    v_mov_b32_e32 v0, s0
+; GFX908-ISEL-NEXT:    v_mov_b32_e32 v1, s1
+; GFX908-ISEL-NEXT:    v_mov_b32_e32 v2, s2
+; GFX908-ISEL-NEXT:    v_mov_b32_e32 v3, s3
+; GFX908-ISEL-NEXT:    global_store_dwordx4 v[4:5], v[0:3], off offset:-120
+; GFX908-ISEL-NEXT:    s_endpgm
+;
+; GFX90a-ISEL-LABEL: global_store_saddr_uniform_ptr_in_vgprs_immoffset:
+; GFX90a-ISEL:       ; %bb.0:
+; GFX90a-ISEL-NEXT:    s_load_dword s6, s[4:5], 0x24
+; GFX90a-ISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x34
+; GFX90a-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX90a-ISEL-NEXT:    ds_read_b64 v[0:1], v0
+; GFX90a-ISEL-NEXT:    s_mov_b32 s7, 0
+; GFX90a-ISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX90a-ISEL-NEXT:    v_pk_mov_b32 v[2:3], s[6:7], s[6:7] op_sel:[0,1]
+; GFX90a-ISEL-NEXT:    v_add_co_u32_e32 v4, vcc, v0, v2
+; GFX90a-ISEL-NEXT:    v_addc_co_u32_e32 v5, vcc, v1, v3, vcc
+; GFX90a-ISEL-NEXT:    v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1]
+; GFX90a-ISEL-NEXT:    v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1]
+; GFX90a-ISEL-NEXT:    global_store_dwordx4 v[4:5], v[0:3], off offset:-120
+; GFX90a-ISEL-NEXT:    s_endpgm
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_store_saddr_uniform_ptr_in_vgprs_immoffset:
+; GFX9-4-GENERIC-ISEL:       ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT:    s_load_dword s6, s[4:5], 0x24
+; GFX9-4-GENERIC-ISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x34
+; GFX9-4-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-4-GENERIC-ISEL-NEXT:    ds_read_b64 v[0:1], v0
+; GFX9-4-GENERIC-ISEL-NEXT:    s_mov_b32 s7, 0
+; GFX9-4-GENERIC-ISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT:    v_mov_b64_e32 v[2:3], s[6:7]
+; GFX9-4-GENERIC-ISEL-NEXT:    v_add_co_u32_e32 v4, vcc, v0, v2
+; GFX9-4-GENERIC-ISEL-NEXT:    s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT:    v_addc_co_u32_e32 v5, vcc, v1, v3, vcc
+; GFX9-4-GENERIC-ISEL-NEXT:    v_mov_b64_e32 v[0:1], s[0:1]
+; GFX9-4-GENERIC-ISEL-NEXT:    v_mov_b64_e32 v[2:3], s[2:3]
+; GFX9-4-GENERIC-ISEL-NEXT:    global_store_dwordx4 v[4:5], v[0:3], off offset:-120 sc0
+; GFX9-4-GENERIC-ISEL-NEXT:    s_endpgm
+;
+; GFX942-ISEL-LABEL: global_store_saddr_uniform_ptr_in_vgprs_immoffset:
+; GFX942-ISEL:       ; %bb.0:
+; GFX942-ISEL-NEXT:    s_load_dword s6, s[4:5], 0x24
+; GFX942-ISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x34
+; GFX942-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX942-ISEL-NEXT:    ds_read_b64 v[0:1], v0
+; GFX942-ISEL-NEXT:    s_mov_b32 s7, 0
+; GFX942-ISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-ISEL-NEXT:    v_mov_b64_e32 v[2:3], s[6:7]
+; GFX942-ISEL-NEXT:    v_add_co_u32_e32 v4, vcc, v0, v2
+; GFX942-ISEL-NEXT:    s_nop 1
+; GFX942-ISEL-NEXT:    v_addc_co_u32_e32 v5, vcc, v1, v3, vcc
+; GFX942-ISEL-NEXT:    v_mov_b64_e32 v[0:1], s[0:1]
+; GFX942-ISEL-NEXT:    v_mov_b64_e32 v[2:3], s[2:3]
+; GFX942-ISEL-NEXT:    global_store_dwordx4 v[4:5], v[0:3], off offset:-120 sc0
+; GFX942-ISEL-NEXT:    s_endpgm
+;
+; GFX950-ISEL-LABEL: global_store_saddr_uniform_ptr_in_vgprs_immoffset:
+; GFX950-ISEL:       ; %bb.0:
+; GFX950-ISEL-NEXT:    s_load_dword s6, s[4:5], 0x24
+; GFX950-ISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x34
+; GFX950-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX950-ISEL-NEXT:    ds_read_b64 v[0:1], v0
+; GFX950-ISEL-NEXT:    s_mov_b32 s7, 0
+; GFX950-ISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX950-ISEL-NEXT:    v_mov_b64_e32 v[2:3], s[6:7]
+; GFX950-ISEL-NEXT:    v_add_co_u32_e32 v4, vcc, v0, v2
+; GFX950-ISEL-NEXT:    s_nop 1
+; GFX950-ISEL-NEXT:    v_addc_co_u32_e32 v5, vcc, v1, v3, vcc
+; GFX950-ISEL-NEXT:    v_mov_b64_e32 v[0:1], s[0:1]
+; GFX950-ISEL-NEXT:    v_mov_b64_e32 v[2:3], s[2:3]
+; GFX950-ISEL-NEXT:    global_store_dwordx4 v[4:5], v[0:3], off offset:-120 sc0
+; GFX950-ISEL-NEXT:    s_endpgm
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_store_saddr_uniform_ptr_in_vgprs_immoffset:
+; GFX10-1-GENERIC-ISEL:       ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX10-1-GENERIC-ISEL-NEXT:    s_clause 0x1
+; GFX10-1-GENERIC-ISEL-NEXT:    s_load_dword s6, s[4:5], 0x24
+; GFX10-1-GENERIC-ISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x34
+; GFX10-1-GENERIC-ISEL-NEXT:    s_mov_b32 s7, 0
+; GFX10-1-GENERIC-ISEL-NEXT:    ds_read_b64 v[0:1], v0
+; GFX10-1-GENERIC-ISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v2, s6
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v3, s7
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_u32 v4, vcc_lo, v0, v2
+; GFX10-1-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e32 v5, vcc_lo, v1, v3, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, s0
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, s1
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v2, s2
+; GFX10-1-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v3, s3
+; GFX10-1-GENERIC-ISEL-NEXT:    global_store_dwordx4 v[4:5], v[0:3], off offset:-120
+; GFX10-1-GENERIC-ISEL-NEXT:    s_endpgm
+;
+; GFX1012-ISEL-LABEL: global_store_saddr_uniform_ptr_in_vgprs_immoffset:
+; GFX1012-ISEL:       ; %bb.0:
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX1012-ISEL-NEXT:    s_clause 0x1
+; GFX1012-ISEL-NEXT:    s_load_dword s6, s[4:5], 0x24
+; GFX1012-ISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x34
+; GFX1012-ISEL-NEXT:    s_mov_b32 s7, 0
+; GFX1012-ISEL-NEXT:    ds_read_b64 v[0:1], v0
+; GFX1012-ISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v2, s6
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v3, s7
+; GFX1012-ISEL-NEXT:    v_add_co_u32 v4, vcc_lo, v0, v2
+; GFX1012-ISEL-NEXT:    v_add_co_ci_u32_e32 v5, vcc_lo, v1, v3, vcc_lo
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v0, s0
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v1, s1
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v2, s2
+; GFX1012-ISEL-NEXT:    v_mov_b32_e32 v3, s3
+; GFX1012-ISEL-NEXT:    global_store_dwordx4 v[4:5], v[0:3], off offset:-120
+; GFX1012-ISEL-NEXT:    s_endpgm
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_store_saddr_uniform_ptr_in_vgprs_immoffset:
+; GFX10-3-GENERIC-ISEL:       ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX10-3-GENERIC-ISEL-NEXT:    s_clause 0x1
+; GFX10-3-GENERIC-ISEL-NEXT:    s_load_dword s6, s[4:5], 0x24
+; GFX10-3-GENERIC-ISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x34
+; GFX10-3-GENERIC-ISEL-NEXT:    s_mov_b32 s7, 0
+; GFX10-3-GENERIC-ISEL-NEXT:    ds_read_b64 v[0:1], v0
+; GFX10-3-GENERIC-ISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v2, s6
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v3, s7
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_u32 v4, vcc_lo, v0, v2
+; GFX10-3-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v5, null, v1, v3, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, s0
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v1, s1
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v2, s2
+; GFX10-3-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v3, s3
+; GFX10-3-GENERIC-ISEL-NEXT:    global_store_dwordx4 v[4:5], v[0:3], off offset:-120
+; GFX10-3-GENERIC-ISEL-NEXT:    s_endpgm
+;
+; GFX11-GENERIC-ISEL-LABEL: global_store_saddr_uniform_ptr_in_vgprs_immoffset:
+; GFX11-GENERIC-ISEL:       ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT:    s_clause 0x1
+; GFX11-GENERIC-ISEL-NEXT:    s_load_b32 s6, s[4:5], 0x24
+; GFX11-GENERIC-ISEL-NEXT:    s_load_b128 s[0:3], s[4:5], 0x34
+; GFX11-GENERIC-ISEL-NEXT:    s_mov_b32 s7, 0
+; GFX11-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    v_dual_mov_b32 v2, s6 :: v_dual_mov_b32 v3, s7
+; GFX11-GENERIC-ISEL-NEXT:    ds_load_b64 v[0:1], v0
+; GFX11-GENERIC-ISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_u32 v4, vcc_lo, v0, v2
+; GFX11-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v5, null, v1, v3, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v3, s3
+; GFX11-GENERIC-ISEL-NEXT:    v_dual_mov_b32 v1, s1 :: v_dual_mov_b32 v2, s2
+; GFX11-GENERIC-ISEL-NEXT:    global_store_b128 v[4:5], v[0:3], off offset:-120
+; GFX11-GENERIC-ISEL-NEXT:    s_endpgm
+;
+; GFX1250-ISEL-LABEL: global_store_saddr_uniform_ptr_in_vgprs_immoffset:
+; GFX1250-ISEL:       ; %bb.0:
+; GFX1250-ISEL-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
+; GFX1250-ISEL-NEXT:    s_clause 0x1
+; GFX1250-ISEL-NEXT:    s_load_b32 s6, s[4:5], 0x24
+; GFX1250-ISEL-NEXT:    s_load_b128 s[0:3], s[4:5], 0x34
+; GFX1250-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX1250-ISEL-NEXT:    s_mov_b32 s7, 0
+; GFX1250-ISEL-NEXT:    ds_load_b64 v[0:1], v0
+; GFX1250-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT:    v_mov_b64_e32 v[2:3], s[6:7]
+; GFX1250-ISEL-NEXT:    s_wait_dscnt 0x0
+; GFX1250-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250-ISEL-NEXT:    v_add_co_u32 v4, vcc_lo, v0, v2
+; GFX1250-ISEL-NEXT:    v_add_co_ci_u32_e64 v5, null, v1, v3, vcc_lo
+; GFX1250-ISEL-NEXT:    v_mov_b64_e32 v[0:1], s[0:1]
+; GFX1250-ISEL-NEXT:    v_mov_b64_e32 v[2:3], s[2:3]
+; GFX1250-ISEL-NEXT:    global_store_b128 v[4:5], v[0:3], off offset:-120
+; GFX1250-ISEL-NEXT:    s_endpgm
+;
+; GFX12-GENERIC-ISEL-LABEL: global_store_saddr_uniform_ptr_in_vgprs_immoffset:
+; GFX12-GENERIC-ISEL:       ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT:    s_clause 0x1
+; GFX12-GENERIC-ISEL-NEXT:    s_load_b32 s6, s[4:5], 0x24
+; GFX12-GENERIC-ISEL-NEXT:    s_load_b128 s[0:3], s[4:5], 0x34
+; GFX12-GENERIC-ISEL-NEXT:    s_mov_b32 s7, 0
+; GFX12-GENERIC-ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    v_dual_mov_b32 v2, s6 :: v_dual_mov_b32 v3, s7
+; GFX12-GENERIC-ISEL-NEXT:    ds_load_b64 v[0:1], v0
+; GFX12-GENERIC-ISEL-NEXT:    s_wait_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT:    v_add_co_u32 v4, vcc_lo, v0, v2
+; GFX12-GENERIC-ISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX12-GENERIC-ISEL-NEXT:    v_add_co_ci_u32_e64 v5, null, v1, v3, vcc_lo
+; GFX12-GENERIC-ISEL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v3, s3
+; GFX12-GENERIC-ISEL-NEXT:    v_dual_mov_b32 v1, s1 :: v_dual_mov_b32 v2, s2
+; GFX12-GENERIC-ISEL-NEXT:    global_store_b128 v[4:5], v[0:3], off offset:-120 scope:SCOPE_SE
+; GFX12-GENERIC-ISEL-NEXT:    s_endpgm
+  %sbase = load ptr addrspace(1), ptr addrspace(3) @ptr.in.lds
+  %zext.offset = zext i32 %voffset to i64
+  %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
+  %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -120
+  call void @llvm.amdgcn.global.store.b128(ptr addrspace(1) %gep1, <4 x i32> %data, metadata !1)
+  ret void
+}
+
+;;==============================================================================
+;; } End saddr addressing modes
+;;==============================================================================
+
+
+!0 = !{!"wavefront"}
+!1 = !{!"workgroup"}
+!2 = !{!"agent"}
+!3 = !{!""}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GFX: {{.*}}
+; GFX-ISEL: {{.*}}
+; GFX-SDAG: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/unsupported-global-load.ll b/llvm/test/CodeGen/AMDGPU/unsupported-global-load.ll
new file mode 100644
index 0000000000000..af4b9dc1d98d4
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/unsupported-global-load.ll
@@ -0,0 +1,36 @@
+; RUN: not llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx602          < %s 2>&1 | FileCheck -check-prefixes=GFX602          %s
+; RUN: not llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx705          < %s 2>&1 | FileCheck -check-prefixes=GFX705          %s
+; RUN: not llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx810          < %s 2>&1 | FileCheck -check-prefixes=GFX810          %s
+; xxx: not llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx9-generic    < %s 2>&1 | FileCheck -check-prefixes=GFX9-GENERIC    %s
+; xxx: not llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx9-4-generic  < %s 2>&1 | FileCheck -check-prefixes=GFX9-4-GENERIC  %s
+; xxx: not llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx10-1-generic < %s 2>&1 | FileCheck -check-prefixes=GFX10-1-GENERIC %s
+; xxx: not llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx10-3-generic < %s 2>&1 | FileCheck -check-prefixes=GFX10-3-GENERIC %s
+; xxx: not llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx11-generic   < %s 2>&1 | FileCheck -check-prefixes=GFX11-GENERIC   %s
+; xxx: not llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx12-generic   < %s 2>&1 | FileCheck -check-prefixes=GFX12-GENERIC   %s
+; xxx: not llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250         < %s 2>&1 | FileCheck -check-prefixes=GFX1250         %s
+
+; RUN: not llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx602          < %s 2>&1 | FileCheck -check-prefixes=GFX602-GBL-ISEL          %s
+; RUN: not llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx705          < %s 2>&1 | FileCheck -check-prefixes=GFX705-GBL-ISEL          %s
+; RUN: not llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx810          < %s 2>&1 | FileCheck -check-prefixes=GFX810-GBL-ISEL          %s
+; xxx: not llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx9-generic    < %s 2>&1 | FileCheck -check-prefixes=GFX9-GENERIC-GBL-ISEL    %s
+; xxx: not llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx9-4-generic  < %s 2>&1 | FileCheck -check-prefixes=GFX9-4-GENERIC-GBL-ISEL  %s
+; xxx: not llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx10-1-generic < %s 2>&1 | FileCheck -check-prefixes=GFX10-1-GENERIC-GBL-ISEL %s
+; xxx: not llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx10-3-generic < %s 2>&1 | FileCheck -check-prefixes=GFX10-3-GENERIC-GBL-ISEL %s
+; xxx: not llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx11-generic   < %s 2>&1 | FileCheck -check-prefixes=GFX11-GENERIC-GBL-ISEL   %s
+; xxx: not llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx12-generic   < %s 2>&1 | FileCheck -check-prefixes=GFX12-GENERIC-GBL-ISEL   %s
+; xxx: not llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1250         < %s 2>&1 | FileCheck -check-prefixes=GFX1250-GBL-ISEL         %s
+
+define <4 x i32> @global_load_b128(ptr addrspace(1) %addr) {
+; GFX602:          LLVM ERROR: Cannot select: intrinsic %llvm.amdgcn.global.load.b128
+; GFX705:          LLVM ERROR: Cannot select: intrinsic %llvm.amdgcn.global.load.b128
+; GFX810:          LLVM ERROR: Cannot select: intrinsic %llvm.amdgcn.global.load.b128
+
+; GFX602-GBL-ISEL: LLVM ERROR: cannot select: {{.*}} intrinsic(@llvm.amdgcn.global.load.b128)
+; GFX705-GBL-ISEL: LLVM ERROR: cannot select: {{.*}} intrinsic(@llvm.amdgcn.global.load.b128)
+; GFX810-GBL-ISEL: LLVM ERROR: cannot select: {{.*}} intrinsic(@llvm.amdgcn.global.load.b128)
+entry:
+  %data = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %addr, metadata !0)
+  ret <4 x i32> %data
+}
+
+!0 = !{!""}
diff --git a/llvm/test/CodeGen/AMDGPU/unsupported-global-store.ll b/llvm/test/CodeGen/AMDGPU/unsupported-global-store.ll
new file mode 100644
index 0000000000000..a24c17f0e9905
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/unsupported-global-store.ll
@@ -0,0 +1,36 @@
+; RUN: not llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx602          < %s 2>&1 | FileCheck -check-prefixes=GFX602          %s
+; RUN: not llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx705          < %s 2>&1 | FileCheck -check-prefixes=GFX705          %s
+; RUN: not llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx810          < %s 2>&1 | FileCheck -check-prefixes=GFX810          %s
+; xxx: not llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx9-generic    < %s 2>&1 | FileCheck -check-prefixes=GFX9-GENERIC    %s
+; xxx: not llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx9-4-generic  < %s 2>&1 | FileCheck -check-prefixes=GFX9-4-GENERIC  %s
+; xxx: not llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx10-1-generic < %s 2>&1 | FileCheck -check-prefixes=GFX10-1-GENERIC %s
+; xxx: not llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx10-3-generic < %s 2>&1 | FileCheck -check-prefixes=GFX10-3-GENERIC %s
+; xxx: not llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx11-generic   < %s 2>&1 | FileCheck -check-prefixes=GFX11-GENERIC   %s
+; xxx: not llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx12-generic   < %s 2>&1 | FileCheck -check-prefixes=GFX12-GENERIC   %s
+; xxx: not llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250         < %s 2>&1 | FileCheck -check-prefixes=GFX1250         %s
+
+; RUN: not llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx602          < %s 2>&1 | FileCheck -check-prefixes=GFX602-GBL-ISEL          %s
+; RUN: not llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx705          < %s 2>&1 | FileCheck -check-prefixes=GFX705-GBL-ISEL          %s
+; RUN: not llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx810          < %s 2>&1 | FileCheck -check-prefixes=GFX810-GBL-ISEL          %s
+; xxx: not llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx9-generic    < %s 2>&1 | FileCheck -check-prefixes=GFX9-GENERIC-GBL-ISEL    %s
+; xxx: not llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx9-4-generic  < %s 2>&1 | FileCheck -check-prefixes=GFX9-4-GENERIC-GBL-ISEL  %s
+; xxx: not llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx10-1-generic < %s 2>&1 | FileCheck -check-prefixes=GFX10-1-GENERIC-GBL-ISEL %s
+; xxx: not llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx10-3-generic < %s 2>&1 | FileCheck -check-prefixes=GFX10-3-GENERIC-GBL-ISEL %s
+; xxx: not llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx11-generic   < %s 2>&1 | FileCheck -check-prefixes=GFX11-GENERIC-GBL-ISEL   %s
+; xxx: not llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx12-generic   < %s 2>&1 | FileCheck -check-prefixes=GFX12-GENERIC-GBL-ISEL   %s
+; xxx: not llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1250         < %s 2>&1 | FileCheck -check-prefixes=GFX1250-GBL-ISEL         %s
+
+define void @global_store_b128(ptr addrspace(1) %addr, <4 x i32> %data) {
+; GFX602:          LLVM ERROR: Cannot select: intrinsic %llvm.amdgcn.global.store.b128
+; GFX705:          LLVM ERROR: Cannot select: intrinsic %llvm.amdgcn.global.store.b128
+; GFX810:          LLVM ERROR: Cannot select: intrinsic %llvm.amdgcn.global.store.b128
+
+; GFX602-GBL-ISEL: LLVM ERROR: cannot select: {{.*}} intrinsic(@llvm.amdgcn.global.store.b128)
+; GFX705-GBL-ISEL: LLVM ERROR: cannot select: {{.*}} intrinsic(@llvm.amdgcn.global.store.b128)
+; GFX810-GBL-ISEL: LLVM ERROR: cannot select: {{.*}} intrinsic(@llvm.amdgcn.global.store.b128)
+entry:
+  call void @llvm.amdgcn.global.store.b128(ptr addrspace(1) %addr, <4 x i32> %data, metadata !0)
+  ret void
+}
+
+!0 = !{!""}
diff --git a/llvm/test/Verifier/amdgpu-intrinsics.ll b/llvm/test/Verifier/amdgpu-intrinsics.ll
new file mode 100644
index 0000000000000..b774c4cb12fbd
--- /dev/null
+++ b/llvm/test/Verifier/amdgpu-intrinsics.ll
@@ -0,0 +1,66 @@
+; RUN: not llvm-as < %s 2>&1 | FileCheck %s
+
+; ---------- i32 metadata ------------------------------------------------------
+; CHECK: global load/store intrinsics require that the last argument is a metadata string
+; CHECK-NEXT: call <4 x i32> @llvm.amdgcn.global.load.b128({{.*}})
+; CHECK-NEXT: metadata i32 1
+define <4 x i32> @global_load_b128_00(ptr addrspace(1) %addr) {
+entry:
+  %data = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %addr, metadata !3)
+  ret <4 x i32> %data
+}
+
+; CHECK: global load/store intrinsics require that the last argument is a metadata string
+; CHECK-NEXT: call void @llvm.amdgcn.global.store.b128({{.*}})
+; CHECK-NEXT: metadata i32 1
+define void @global_store_b128_00(ptr addrspace(1) %addr, <4 x i32> %data) {
+entry:
+  call void @llvm.amdgcn.global.store.b128(ptr addrspace(1) %addr, <4 x i32> %data, metadata !3)
+  ret void
+}
+
+; ---------- non-tuple metadata ------------------------------------------------
+; CHECK:      global load/store intrinsics require that the last argument is a metadata string
+; CHECK-NEXT: call <4 x i32> @llvm.amdgcn.global.load.b128({{.*}})
+; CHECK-NEXT: metadata !0
+define <4 x i32> @global_load_b128_01(ptr addrspace(1) %addr) {
+entry:
+  %data = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %addr, metadata !0)
+  ret <4 x i32> %data
+}
+
+; CHECK:      global load/store intrinsics require that the last argument is a metadata string
+; CHECK-NEXT: call void @llvm.amdgcn.global.store.b128({{.*}})
+; CHECK-NEXT: metadata !0
+define void @global_store_b128_01(ptr addrspace(1) %addr, <4 x i32> %data) {
+entry:
+  call void @llvm.amdgcn.global.store.b128(ptr addrspace(1) %addr, <4 x i32> %data, metadata !0)
+  ret void
+}
+
+; ---------- invalid string metadata -------------------------------------------
+; CHECK:      'wave' is not a valid scope for global load/store intrinsics
+; CHECK-NEXT: call <4 x i32> @llvm.amdgcn.global.load.b128({{.*}})
+; CHECK-NEXT: metadata !2
+define <4 x i32> @global_load_b128_02(ptr addrspace(1) %addr) {
+entry:
+  %data = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %addr, metadata !2)
+  ret <4 x i32> %data
+}
+
+; CHECK:      'wave' is not a valid scope for global load/store intrinsics
+; CHECK-NEXT: call void @llvm.amdgcn.global.store.b128({{.*}})
+; CHECK-NEXT: metadata !2
+define void @global_store_b128_02(ptr addrspace(1) %addr, <4 x i32> %data) {
+entry:
+  call void @llvm.amdgcn.global.store.b128(ptr addrspace(1) %addr, <4 x i32> %data, metadata !2)
+  ret void
+}
+
+
+!0 = !{!1}
+!1 = !{!""}
+
+!2 = !{!"wave"}
+
+!3 = !{i32 1}
diff --git a/llvm/utils/TableGen/Common/GlobalISel/GlobalISelMatchTable.cpp b/llvm/utils/TableGen/Common/GlobalISel/GlobalISelMatchTable.cpp
index 8540faed34e5d..df326374deb46 100644
--- a/llvm/utils/TableGen/Common/GlobalISel/GlobalISelMatchTable.cpp
+++ b/llvm/utils/TableGen/Common/GlobalISel/GlobalISelMatchTable.cpp
@@ -1472,6 +1472,12 @@ Error OperandMatcher::addTypeCheckPredicate(const TypeSetByHwMode &VTy,
     return Error::success();
   }
 
+  llvm::MVT::SimpleValueType STy = VTy.getMachineValueType().SimpleTy;
+  if (STy == MVT::Metadata) {
+    addPredicate<MachineOperandTypeMatcher>(MachineOperand::MO_Metadata);
+    return Error::success();
+  }
+
   auto OpTyOrNone = MVTToLLT(VTy.getMachineValueType().SimpleTy);
   if (!OpTyOrNone)
     return failUnsupported("unsupported type");
@@ -1937,6 +1943,17 @@ bool InstructionOperandMatcher::isHigherPriorityThan(
   return false;
 }
 
+//===- MachineOperandTypeMatcher -----------------------------------------===//
+
+void MachineOperandTypeMatcher::emitPredicateOpcodes(MatchTable &Table,
+                                                     RuleMatcher &Rule) const {
+  Table << MatchTable::Opcode("GIM_CheckMachineOperandType")
+        << MatchTable::Comment("MI") << MatchTable::ULEB128Value(InsnVarID)
+        << MatchTable::Comment("Op") << MatchTable::ULEB128Value(OpIdx)
+        << MatchTable::Comment("Ty") << MatchTable::ULEB128Value(MOTy)
+        << MatchTable::LineBreak;
+}
+
 //===- OperandRenderer ----------------------------------------------------===//
 
 OperandRenderer::~OperandRenderer() = default;
diff --git a/llvm/utils/TableGen/Common/GlobalISel/GlobalISelMatchTable.h b/llvm/utils/TableGen/Common/GlobalISel/GlobalISelMatchTable.h
index 6a8017894a486..d770f6ec653b1 100644
--- a/llvm/utils/TableGen/Common/GlobalISel/GlobalISelMatchTable.h
+++ b/llvm/utils/TableGen/Common/GlobalISel/GlobalISelMatchTable.h
@@ -23,6 +23,7 @@
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGenTypes/LowLevelType.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/SaveAndRestore.h"
@@ -837,6 +838,7 @@ class PredicateMatcher {
     OPM_MBB,
     OPM_RecordNamedOperand,
     OPM_RecordRegType,
+    OPM_MOType,
   };
 
 protected:
@@ -1926,6 +1928,22 @@ class InstructionOperandMatcher : public OperandPredicateMatcher {
   }
 };
 
+class MachineOperandTypeMatcher : public OperandPredicateMatcher {
+  const MachineOperand::MachineOperandType MOTy;
+
+public:
+  MachineOperandTypeMatcher(unsigned InsnVarID, unsigned OpIdx,
+                            MachineOperand::MachineOperandType MOTy)
+      : OperandPredicateMatcher(OPM_MOType, InsnVarID, OpIdx), MOTy(MOTy) {}
+
+  static bool classof(const PredicateMatcher *P) {
+    return P->getKind() == OPM_MOType;
+  }
+
+  void emitPredicateOpcodes(MatchTable &Table,
+                            RuleMatcher &Rule) const override;
+};
+
 //===- Actions ------------------------------------------------------------===//
 class OperandRenderer {
 public:



More information about the llvm-commits mailing list